Table of contingency tables

258 views Asked by At

I have a table with the following data:

ex = structure(list(A = c(482, 208, 227, 239, 783, 141), B = c(155, 
69, 63, 65, 255, 25), C = c(64, 24, 28, 29, 134, 34), D = c(408, 
180, 196, 207, 638, 104)), .Names = c("A", "B", "C", "D"), class = "data.frame", row.names = c("E", 
"F", "G", "H", "I", "J"))


>ex
    A   B   C   D
E 482 155  64 408
F 208  69  24 180
G 227  63  28 196
H 239  65  29 207
I 783 255 134 638
J 141  25  34 104

I want to comput the chisq.test() for all pairs of rows for A vs B and for C vs D. This sounds pretty ambiguous to me so here is a concrete example:

   A   B       C   D
E 482 155   E 64  408
F 208  69   F 24  180

   A   B       C   D
E 482 155   E 64  408
G 227  63   G 28  196

... repeat for all pairs of E, F, G, H, I, and J

compute P values using chisq.tests() for each of these tables.

I have already done this, but it's output is in an annoying format. Basically I used combn(rownames(ex),2) to get the pairs, then wrote an lapply that went through the result of combn, constructed the matrices from the table, then and gave me the chisq of the matrix.

tests = combn(rownames(ex), 2)
tests = apply(tests,2,list)

testResults = lapply(tests, function(cat){
  test = unlist(cat)
  AvsBm = matrix(c(ex[test[1],'A'],ex[test[2],'A'],ex[test[1],'B'],ex[test[2],'B']),nrow=2, ncol=2)
  AvsBp = chisq.test(AvsBm)$p.value
  CvsDm = matrix(c(ex[test[1],'C'],ex[test[2],'C'],ex[test[1],'D'],ex[test[2],'D']),nrow=2, ncol=2)
  CvsDp = chisq.test(CvsDm)$p.value
  a = c(test[1], test[2], AvsBp, CvsDp)
})

testResults = data.frame(do.call(rbind, testResults))
names(testResults) = c('Var1', 'Var2', 'AvsB', 'CvsD')

The results looked like this:

> testResults
   Var1 Var2                AvsB                CvsD
1     E    F   0.918199692198075   0.608649272659481
2     E    G   0.432572099792864   0.790459437339736
3     E    H   0.358651246275942   0.723319426118104
4     E    I   0.960564133271506  0.0896848347203047
5     E    J  0.0144029906033956  0.0028292317888333
6     F    G   0.424982446036333   0.932706790402674
7     F    H    0.36295549985099   0.982958067120215
8     F    I   0.968631154321032  0.0684734899837275
9     F    J  0.0195800439529193 0.00302299304015596
10    G    H   0.998659183486833   0.999999999999997
11    G    I   0.354996420259763   0.102779771508206
12    G    J   0.107030315095613 0.00460404677366423
13    H    I   0.284826573788384  0.0801050087692166
14    H    J   0.123057932646613 0.00332480813135708
15    I    J 0.00951511015485216  0.0559833381301495

This works ok, but it feels like it should be a lot easier. I have to do a lot of reformatting of the table afterwards to get it into a pretty looking table. The ideal format would be two triangular tables one for A-vs-B and the other for C-vs-D.

Are there any built in functions that do this kind of thing?

Hope my question isn't too vague, Cheers.

2

There are 2 answers

1
Ferdinand.kraft On BEST ANSWER

You can use this instead:

within(as.data.frame(t(combn(rownames(ex), 2)), stringsAsFactors=FALSE), {
    CvsDp <- mapply(function(i,j)chisq.test(ex[c(i,j),c("C","D")])$p.value,V1,V2)
    AvsBp <- mapply(function(i,j)chisq.test(ex[c(i,j),c("A","B")])$p.value,V1,V2)
})

Result

   V1 V2      AvsBp       CvsDp
1   E  F 0.91819969 0.608649273
2   E  G 0.43257210 0.790459437
3   E  H 0.35865125 0.723319426
4   E  I 0.96056413 0.089684835
5   E  J 0.01440299 0.002829232
6   F  G 0.42498245 0.932706790
7   F  H 0.36295550 0.982958067
8   F  I 0.96863115 0.068473490
9   F  J 0.01958004 0.003022993
10  G  H 0.99865918 1.000000000
11  G  I 0.35499642 0.102779772
12  G  J 0.10703032 0.004604047
13  H  I 0.28482657 0.080105009
14  H  J 0.12305793 0.003324808
15  I  J 0.00951511 0.055983338

EDIT: as a triangular table, given x = the result above:

m <- matrix(nrow=nrow(ex), ncol=nrow(ex))
rownames(m) <- colnames(m) <- rownames(ex)
m[cbind(x$V1,x$V2)] <- x$AvsBp

Result

   E         F         G         H         I          J
E NA 0.9181997 0.4325721 0.3586512 0.9605641 0.01440299
F NA        NA 0.4249824 0.3629555 0.9686312 0.01958004
G NA        NA        NA 0.9986592 0.3549964 0.10703032
H NA        NA        NA        NA 0.2848266 0.12305793
I NA        NA        NA        NA        NA 0.00951511
J NA        NA        NA        NA        NA         NA

For CvsDp just replace it in the last line.

0
A5C1D2H2I1M1N2O1R2T1 On

Here's another alternative:

Step 1: Create an empty matrix and a list of the column combinations you are looking at

A <- list(c("A", "B"), c("C", "D"))
A2 <- sapply(A, paste, collapse = "vs")
myMat <- matrix(NA, nrow = nrow(ex), ncol = nrow(ex),
                dimnames = list(rownames(ex), rownames(ex)))

Step 2: Use the FUN argument for combn to get a vector of "p.values" from chisq.test. This will result in a 2 row by 15 column matrix.

csTest <- combn(rownames(ex), 2, FUN=function(y) {
  sapply(A, function(z) {
    chisq.test(ex[y, z])$p.value
  })
})

Step 3: Use lapply to create a list of the two rows as a matrix. Make use of lower.tri to automatically fill in the matrix.

setNames(lapply(sequence(nrow(csTest)), function(y) {
  myMat[lower.tri(myMat)] <- csTest[y, ]
  myMat
}), A2)
# $AvsB
#            E          F         G         H          I  J
# E         NA         NA        NA        NA         NA NA
# F 0.91819969         NA        NA        NA         NA NA
# G 0.43257210 0.42498245        NA        NA         NA NA
# H 0.35865125 0.36295550 0.9986592        NA         NA NA
# I 0.96056413 0.96863115 0.3549964 0.2848266         NA NA
# J 0.01440299 0.01958004 0.1070303 0.1230579 0.00951511 NA
# 
# $CvsD
#             E           F           G           H          I  J
# E          NA          NA          NA          NA         NA NA
# F 0.608649273          NA          NA          NA         NA NA
# G 0.790459437 0.932706790          NA          NA         NA NA
# H 0.723319426 0.982958067 1.000000000          NA         NA NA
# I 0.089684835 0.068473490 0.102779772 0.080105009         NA NA
# J 0.002829232 0.003022993 0.004604047 0.003324808 0.05598334 NA