pivot_wider for multiple choice questionnaire

77 views Asked by At

have a dataset from a survey. 2 questions have multiple choice options and it doesn't allow me to use pivot_wider

here is a sample data.

nam=c("per_id","quest","answ")
per_id=c("0012","0023","0045","0003","0003","0045","0003","0012","0023","0004","0004","0001","0005","0546","0001","0546","0005","0004","0004")
quest=c("A","A","A","A","A","A","A","A","A","A","A","B","B","B","B","B","B","B","B")
answ=c("Self","Self","Self","Self","Father","Sister","Daughter","Mother","Father","Self","Father","X1","Y1","Z1","Y1","Z2","X1","X1","X1")

df=cbind.data.frame(per_id,quest,answ)
names(df)=nam

what I want to have is

here

1

There are 1 answers

2
Friede On BEST ANSWER

You might try the following approach using split + Reduce + merge based on unique rows.

Approach

l = lapply(split(df, df$quest), \(x) { x = unique(x); x$quest = NULL; x })
# names(l$A)[2] = "A"
# names(l$B)[2] = "B"
# in case you have more than two quests: 
l = lapply(names(l), \(x) setNames(l[[x]], c(names(l[[x]])[1], x)))
Reduce(\(...) merge(..., all = TRUE), l) 

Result

   per_id        A    B
1    0001     <NA>   X1
2    0001     <NA>   Y1
3    0003     Self <NA>
4    0003   Father <NA>
5    0003 Daughter <NA>
6    0004     Self   X1
7    0004   Father   X1
8    0005     <NA>   Y1
9    0005     <NA>   X1
10   0012     Self <NA>
11   0012   Mother <NA>
12   0023     Self <NA>
13   0023   Father <NA>
14   0045     Self <NA>
15   0045   Sister <NA>
16   0546     <NA>   Z1
17   0546     <NA>   Z2

Data

df = structure(list(per_id = c("0012", "0023", "0045", "0003", "0003", 
"0045", "0003", "0012", "0023", "0004", "0004", "0001", "0005", 
"0546", "0001", "0546", "0005", "0004", "0004"), quest = c("A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "B", "B", "B", 
"B", "B", "B", "B", "B"), answ = c("Self", "Self", "Self", "Self", 
"Father", "Sister", "Daughter", "Mother", "Father", "Self", "Father", 
"X1", "Y1", "Z1", "Y1", "Z2", "X1", "X1", "X1")), class = "data.frame", row.names = c(NA, 
-19L))