dataframe vs contingency table & chisq.test()

80 views Asked by At

Firstly, I'm sorry if it seems basic question. I'm stack with a simple chisq.test, I think its linked to my datas class (data.frame), even if I think that it is ok with the shape:

cat3=structure(list(`1` = c(1, 3, 0, 0, 0, 2, 0, 3, 0, 0, 1.5, 4, 
0, 0, 0, 0, 0, 0, 9, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 
5, 0, 0, 0, 0, 0, 0.5), `2` = c(1, 0, 0, 0, 0, 2, 0, 2, 0.5, 
0, 2.5, 6, 0, 0, 0, 0, 0, 0, 11.5, 1, 2, 1.5, 0, 0, 1, 0, 0, 
0, 0, 0, 0, 0, 0, 7, 1, 0, 0, 0, 0, 1.5), `3` = c(0, 0, 0, 0, 
2.5, 2, 0, 0, 0, 0, 0, 5.5, 2, 0, 1, 0, 0, 0, 3, 1, 0, 1, 0, 
1, 0, 0, 0, 0, 0, 0, 0, 0, 4, 2, 0, 0, 0, 0, 0, 3.5), `4` = c(0, 
1, 0.5, 0, 0, 1, 0, 1, 0, 0, 0, 3, 1, 0, 3, 0, 0, 0, 1, 7, 0, 
2, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0.5, 0, 0, 3.5, 4), 
    `5` = c(0, 2, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1.5, 0, 2, 0, 
    0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 2.5, 4), `6` = c(0, 0, 0, 0, 0, 2.5, 0, 1, 0, 1, 
    0, 1.5, 0, 0, 3.5, 0, 0, 0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 0, 
    1, 0, 0, 0, 0.5, 0, 2.33333333333333, 0, 0, 0, 2, 1), `7` = c(1, 
    1, 1, 0, 0, 3.5, 0, 0, 0, 0, 0, 1.5, 0, 0, 1, 0, 0, 0, 0, 
    4, 0, 0, 0, 0, 0, 1, 0.5, 0, 0.5, 0, 1, 0, 0, 0, 3, 1, 2, 
    0, 1.5, 0), `8` = c(1, 0, 0, 0, 0, 1.5, 0, 0, 0, 2, 0, 1, 
    0, 0, 0.5, 2, 1, 0, 0, 0.5, 0, 0, 0, 0.333333333333333, 0.5, 
    0, 0, 0, 1.5, 0, 0, 0, 0, 0, 1.5, 1.5, 0, 0, 1, 0), `9` = c(2, 
    0, 0, 0, 0.5, 0.5, 0, 3.5, 0, 0, 0, 0.5, 0, 0, 0, 0, 1, 0, 
    0, 3, 0, 0, 0, 0, 0, 0.5, 1, 1, 2, 0, 0, 0, 0, 3, 3, 3, 0, 
    0, 0, 0.5), `10` = c(2, 0, 0, 0, 0, 0, 0, 2.5, 0, 0, 0, 0, 
    0, 0, 0, 1.5, 1.5, 1, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0.333333333333333, 
    7, 0, 0, 0, 0, 0, 2, 3.5, 0, 0, 0, 0), `11` = c(1, 0, 0, 
    0, 0, 0, 0.5, 0, 0, 0, 0, 0, 0, 0.5, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 1.5, 0, 0, 0, 0, 0, 0, 0, 0, 0.333333333333333, 
    4.33333333333333, 0, 0, 0, 0), `12` = c(1, 0, 0, 0.5, 0, 
    1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 
    0, 7, 0, 0, 1, 0, 0, 0, 0, 0, 0, 4.5, 0, 0, 0, 0)), .Names = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"), class = "data.frame", row.names = c(NA, 
-40L))

I'm trying to run the chisq.test(cat3) function but I have no p-value nor a X².

        Pearson's Chi-squared test

data:  cat3
X-squared = NaN, df = 429, p-value = NA

I found no way to transform a dataframe into a contingency table in order to use chisq.test function. Is it due to my class table ? else what is the problem, why I have no value for p and X² ?

Thanks a lot.

1

There are 1 answers

2
Istrel On BEST ANSWER

The problem is, you have all 0 (empty) rows in your data:

> any(rowSums(cat3) == 0)
[1] TRUE

You can delete empty rows:

> cat3_sub <- cat3[rowSums(cat3) != 0, ]
> chisq.test(cat3_sub)

Pearson's Chi-squared test

data:  cat3_sub
X-squared = 731.21, df = 407, p-value < 2.2e-16

Warning message:
In chisq.test(cat3_sub) : Chi-squared approximation may be incorrect