Concordance index for logistics regression in R

247 views Asked by At

I have the following dataframe (my_df) in R:

race trauma y1 y2 y3
0      0  1  0  0
0      0  1  0  0
0      0  1  0  0
0      0  1  0  0
0      0  1  0  0
0      0  1  0  0
0      0  1  0  0
0      0  0  1  0
0      0  0  1  0
0      0  0  1  0
0      0  0  1  0
0      0  0  1  0
0      0  0  1  0
0      0  0  1  0
0      0  0  1  0
0      0  0  1  0
0      0  0  1  0
0      0  0  1  0
0      0  0  1  0
0      0  0  1  0
0      0  0  1  0
0      0  0  1  0
0      0  0  0  1
0      1  1  0  0
0      1  1  0  0
0      1  1  0  0
0      1  1  0  0
0      1  1  0  0
0      1  1  0  0
0      1  1  0  0
0      1  1  0  0
0      1  0  1  0
0      1  0  1  0
0      1  0  1  0
0      1  0  1  0
0      1  0  1  0
0      1  0  1  0
0      1  0  1  0
0      1  0  1  0
0      1  0  1  0
0      1  0  1  0
0      1  0  1  0
0      1  0  1  0
0      1  0  0  1
0      2  1  0  0
0      2  1  0  0
0      2  1  0  0
0      2  1  0  0
0      2  1  0  0
0      2  0  1  0
0      2  0  1  0
0      2  0  1  0
0      2  0  1  0
0      2  0  1  0
0      2  0  1  0
0      2  0  1  0
0      2  0  1  0
0      2  0  1  0
0      2  0  1  0
0      2  0  1  0
0      2  0  1  0
0      2  0  1  0
0      2  0  1  0
0      2  0  1  0
0      2  0  1  0
0      2  0  0  1
0      3  1  0  0
0      3  0  1  0
0      3  0  1  0
0      3  0  1  0
0      3  0  1  0
0      3  0  1  0
0      3  0  1  0
0      3  0  1  0
0      3  0  1  0
0      3  0  1  0
0      3  0  0  1
0      4  1  0  0
0      4  0  1  0
0      4  0  1  0
0      4  0  1  0
0      4  0  1  0
0      5  0  0  1
0      5  0  0  1
1      0  0  1  0
1      0  0  0  1
1      1  0  1  0
1      1  0  1  0
1      1  0  1  0
1      1  0  0  1
1      2  0  1  0
1      2  0  1  0
1      2  0  1  0
1      2  0  0  1
1      3  0  1  0
1      3  0  1  0
1      3  0  0  1

The output dput(my_df) gives

structure(list(race = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L), trauma = c(0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 0L, 0L, 
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L), happy = c(1L, 1L, 
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 1L, 1L, 1L, 1L, 1L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 1L, 2L, 2L, 2L, 2L, 
3L, 3L, 2L, 3L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 3L, 2L, 2L, 3L)), class = "data.frame", row.names = c(NA, 
-97L))

I use the vgam package to fit a cumulative logistics model and calculate the concordance index through the pROC package:

> fit = vglm(cbind(y1,y2,y3) ~ race + trauma, family=cumulative(parallel=TRUE), data=my_df)
> multiclass.roc(as.numeric(cbind(my_df$y1, my_df$y2, my_df$y3)), as.numeric(predict(fit, type="response")), plot=FALSE, legacy.axes=TRUE)

The area under the ROC curve, i.e. the concordance index, is 0.8271. Based on the source which I'm trying to reproduce (Categorical Data Analysis, Agresti, 3rd edition, pg. 314), the result should be 0.688. The results of the fit agree with those presented in the book. I guess there must be something wrong with my concordance index. I can't figure out what. Thanks in advance for your help.

0

There are 0 answers