Make a frequency data frame from a list while maintaining rows in R

96 views Asked by At

I have a list that looks like this:

>AP
$CMP1
[1] 411050384 411050456 411050456 411058568

$CMP2
[1] 411050384 411050456

$CMP3
[1] 411050384 411050456 411058568 428909002 428909002

And I want to transform the list into a data frame that uses every unique entry as a column name, and the entries in the data frame are frequency counts for each member of the list "CMP". This is what I want the data frame to look like.

     411050384 411050456 411058568 428909002
CMP1         1         2         1         0
CMP2         1         1         0         0
CMP3         1         1         1         2

I have looked though the documentation for the 'plyr' and 'reshape2' packages, and I have not had any luck. Any help is appreciated. Thanks.

3

There are 3 answers

0
DatamineR On BEST ANSWER

What about this?

res <- t(sapply(AP, function(y) sapply(unique(unlist(AP)), function(x) sum(x == y))))
colnames(res) <- unique(unlist(AP))
res
     411050384 411050456 411058568 428909002
CMP1         1         2         1         0
CMP2         1         1         0         0
CMP3         1         1         1         2
1
jalapic On

I don't profess this to be the most elegant, but it works.

Your data:

CMP1=c(411050384, 411050456, 411050456, 411058568)
CMP2=c(411050384, 411050456)
CMP3=c(411050384, 411050456, 411058568, 428909002, 428909002)
AP=list(CMP1, CMP2, CMP3)
names(AP)=c('CMP1', 'CMP2', 'CMP3')

First use table on each element of the list to get the frequency. Then I used Map to add the name of each element in the list as a variable and rbind to put it together.

x<-lapply(lapply(AP, table), cbind)
x<-Map(cbind, x, id = names(AP))
x1<-do.call('rbind',x)

I made a dataframe devoid of factors to make making the final matrix easier:

df<-data.frame(x=x1[,2], y=rownames(x1), z=x1[,1], stringsAsFactors = F)

Using reshape2 to get your matrix.

mat <- reshape2::acast(df, x~y, value.var="z", fill=0)

mat


     411050384 411050456 411058568 428909002
CMP1 "1"       "2"       "1"       "0"      
CMP2 "1"       "1"       "0"       "0"      
CMP3 "1"       "1"       "1"       "2"      
0
akrun On

You can try mtabulate from qdapTools

library(qdapTools)
mtabulate(AP)
 #     411050384 411050456 411058568 428909002
 #CMP1         1         2         1         0
 #CMP2         1         1         0         0
 #CMP3         1         1         1         2

Or melt/acast from reshape2

 library(reshape2)
 acast(melt(AP), L1~value, length)
 #     411050384 411050456 411058568 428909002
 #CMP1         1         2         1         0
 #CMP2         1         1         0         0
 #CMP3         1         1         1         2

Or using base R

 table(stack(AP)[2:1])
 #      values
 #ind    411050384 411050456 411058568 428909002
 # CMP1         1         2         1         0
 # CMP2         1         1         0         0
 # CMP3         1         1         1         2