Multi-permutation data comparison in R

72 views Asked by At

I have been comparing the common elements in multiple datasets and have been having great success with a small number of sets. However, I want to compare a larger number of datasets and I am trying to automate the process of writing the code since the number of permutations possible is in the thousands. Here is what I have so far that has been working:

Intersect <- function (x) {  
  if (length(x) == 1) {
    unlist(x)
  } else if (length(x) == 2) {
    intersect(x[[1]], x[[2]])
  } else if (length(x) > 2){
    intersect(x[[1]], Intersect(x[-1]))
  }
}

Union <- function (x) {  
  if (length(x) == 1) {
    unlist(x)
  } else if (length(x) == 2) {
    union(x[[1]], x[[2]])
  } else if (length(x) > 2) {
    union(x[[1]], Union(x[-1]))
  }
}

Setdiff <- function (x, y) {
  xx <- Intersect(x)
  yy <- Union(y)
  setdiff(xx, yy)
}

set.seed(1)
xx.1 <- list(A = set2, 
             B = set3, 
             C = set4, 
             D = set5)
abcd = Intersect(xx.1)

ab = Setdiff(xx.1[c("A", "B")], xx.1[c("C", "D")])

ac = Setdiff(xx.1[c("A", "C")], xx.1[c("B", "D")])

ad = Setdiff(xx.1[c("A", "D")], xx.1[c("B", "C")])

bc = Setdiff(xx.1[c("B", "C")], xx.1[c("A", "D")])

bd = Setdiff(xx.1[c("B", "D")], xx.1[c("A", "C")])

cd = Setdiff(xx.1[c("C", "D")], xx.1[c("A", "B")])

abc = Setdiff(xx.1[c("A", "B", "C")], xx.1[c("D")])

abd = Setdiff(xx.1[c("A", "B", "D")], xx.1[c("C")])

bcd = Setdiff(xx.1[c("C", "B", "D")], xx.1[c("A")])

acd = Setdiff(xx.1[c("C", "B", "A")], xx.1[c("D")])

a = Setdiff(xx.1[c("A")], xx.1[c("E","B", "C", "D")])

b = Setdiff(xx.1[c("B")], xx.1[c("E","A", "C", "D")])

c = Setdiff(xx.1[c("C")], xx.1[c("E","B", "A", "D")])

d = Setdiff(xx.1[c("D")], xx.1[c("E","B", "C", "A")])

e = Setdiff(xx.1[c("E")], xx.1[c("A","B", "C", "D")])


write.table(qpcR:::cbind.na(abcd,ab,ac,ad,bc,bd,cd,abc,abd,bcd,acd,a,b,c,d), 
            file="output.csv", sep=",", quote=FALSE, row.names=FALSE, col.names=TRUE)

What I want to do is automate the process of writing the "setdiff" statements so I can compare more than 4 datasets but I am having some trouble. Here is what I have so far:

Intersect <- function (x) {  
  # Multiple set version of intersect
  # x is a list
  if (length(x) == 1) {
    unlist(x)
  } else if (length(x) == 2) {
    intersect(x[[1]], x[[2]])
  } else if (length(x) > 2){
    intersect(x[[1]], Intersect(x[-1]))
  }
}

Union <- function (x) {  
  # Multiple set version of union
  # x is a list
  if (length(x) == 1) {
    unlist(x)
  } else if (length(x) == 2) {
    union(x[[1]], x[[2]])
  } else if (length(x) > 2) {
    union(x[[1]], Union(x[-1]))
  }
}

Setdiff <- function (x, y) {
  # Remove the union of the y's from the common x's. 
  # x and y are lists of characters.
  xx <- Intersect(x)
  yy <- Union(y)
  setdiff(xx, yy)
}


allCombs <- function(x) c(x, lapply(seq_along(x)[-1L], 
                                    function(y) combn(x, y, paste0, collapse = "")),
                          recursive = TRUE)

list1 = list("b","d","e","g","h","j","k","l","n","o","q","s","t","u","v","z")

permutations = allCombs(list1) 

stringleft =" = Setdiff(xx.1[c("
stringleft2 = ")], xx.1[c("
stringright = ")])"


for (x in permutations) {

  
  left = strsplit(x,split = "" )
  
  right = Setdiff (c(left),c(list1))

  pr = paste(x, stringleft, left, stringleft2, right, stringright)
  
  print(pr)
  
}

the permutations are just fine, but I seem to be having trouble with the Setdiff not returning any values for the list "right" It should look something like this:

bdeghj = Setdiff(xx.1[c("b","d","e", "g", "h","j")], xx.1[c("k","l","n","o","q","s","t","u","v","z")])

but I keep on getting this:

[1] "bdhjklnuz  = Setdiff(xx.1[c( c(\"b\", \"d\", \"h\", \"j\", \"k\", \"l\", \"n\", \"u\", \"z\") )], xx.1[c(  )])"
[1] "bdhjklnvz  = Setdiff(xx.1[c( c(\"b\", \"d\", \"h\", \"j\", \"k\", \"l\", \"n\", \"v\", \"z\") )], xx.1[c(  )])"
[1] "bdhjkloqs  = Setdiff(xx.1[c( c(\"b\", \"d\", \"h\", \"j\", \"k\", \"l\", \"o\", \"q\", \"s\") )], xx.1[c(  )])"
[1] "bdhjkloqt  = Setdiff(xx.1[c( c(\"b\", \"d\", \"h\", \"j\", \"k\", \"l\", \"o\", \"q\", \"t\") )], xx.1[c(  )])"
[1] "bdhjkloqu  = Setdiff(xx.1[c( c(\"b\", \"d\", \"h\", \"j\", \"k\", \"l\", \"o\", \"q\", \"u\") )], xx.1[c(  )])"
0

There are 0 answers