Data manipulation: spread columns with different number of rows in dplyr

321 views Asked by At

I am trying to spread the time columns of my dataframe. left_join would be my choice, but the age groups age and geo differ, thus I end up with most years containing NA values and one of the age categories disappears.

library(dplyr)

dt %>% 
  filter(time!=2001) %>%
  group_by(time, geo, age, sex) %>% 
  filter(time==2011) %>%
left_join(.,dt %>% 
        group_by(time, sex, age, geo) %>% 
        mutate(time2 = 2011) %>% 
        filter(time != 2011) %>% 
        spread(time, value),
      by = c('time' = 'time2', 'age', 'geo'))

What I obtain is this:

time   geo sex.x   age value sex.y `2000` `2001` `2002` `2003`
2011 51900     1     0 27933     1     NA  26193     NA     NA
2011 51900     1     0 27933     2     NA  22760     NA     NA
2011 51900     1     5 20627     1     NA  26213     NA     NA
2011 51900     1     5 20627     2     NA  25647     NA     NA
...
2011 51900     1    75  6400     1     NA   5313     NA     NA
2011 51900     1    75  6400     2     NA  11500     NA     NA
2011 51900     1    80  4520    NA     NA     NA     NA     NA

but there's a problem with the ```value`` column as it repeats the same values twice (and it shouldn't) and years 2000, 2002, ..., 2020

What I would like is this:

geo    sex  age   2001     2011    2000     2002     2003  ... 2020
51900  1    0     39290    41900   69844    55281    55545     58045
51900  2    0     34140    38270   61192    65301    65429     65391
51902  1    0     4307     4193    69844    55281    55545     58045
51902  2    0     3753     3453    61192    65301    65429     65391
...
51900  1    80    NA       41900   104766   97952    98143     87068
51900  2    80    NA       38270   91788    89921    83317     98086


dt = structure(list(time = c(2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 
2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2000L, 2000L, 2000L, 2000L, 2000L, 2002L, 
2002L, 2002L, 2002L, 2002L, 2003L, 2003L, 2003L, 2003L, 2003L, 2004L, 2004L, 2004L, 2004L, 2004L, 2005L, 2005L, 2005L, 2005L, 
2005L, 2006L, 2006L, 2006L, 2006L, 2006L, 2007L, 2007L, 2007L, 2007L, 2007L, 2008L, 2008L, 2008L, 2008L, 2008L, 2009L, 2009L, 
2009L, 2009L, 2009L, 2010L, 2010L, 2010L, 2010L, 2010L, 2012L, 2012L, 2012L, 2012L, 2012L, 2013L, 2013L, 2013L, 2013L, 2013L, 
2014L, 2014L, 2014L, 2014L, 2014L, 2015L, 2015L, 2015L, 2015L, 2015L, 2016L, 2016L, 2016L, 2016L, 2016L, 2017L, 2017L, 2017L, 
2017L, 2017L, 2018L, 2018L, 2018L, 2018L, 2018L, 2019L, 2019L, 2019L, 2019L, 2019L, 2020L, 2020L, 2020L, 2020L, 2020L, 2000L, 
2000L, 2000L, 2000L, 2000L, 2002L, 2002L, 2002L, 2002L, 2002L, 2003L, 2003L, 2003L, 2003L, 2003L, 2004L, 2004L, 2004L, 2004L, 
2004L, 2005L, 2005L, 2005L, 2005L, 2005L, 2006L, 2006L, 2006L, 2006L, 2006L, 2007L, 2007L, 2007L, 2007L, 2007L, 2008L, 2008L, 
2008L, 2008L, 2008L, 2009L, 2009L, 2009L, 2009L, 2009L, 2010L, 2010L, 2010L, 2010L, 2010L, 2012L, 2012L, 2012L, 2012L, 2012L, 
2013L, 2013L, 2013L, 2013L, 2013L, 2014L, 2014L, 2014L, 2014L, 2014L, 2015L, 2015L, 2015L, 2015L, 2015L, 2016L, 2016L, 2016L, 
2016L, 2016L, 2017L, 2017L, 2017L, 2017L, 2017L, 2018L, 2018L, 2018L, 2018L, 2018L, 2019L, 2019L, 2019L, 2019L, 2019L, 2020L, 
2020L, 2020L, 2020L, 2020L), geo = c(51900L, 51900L, 51900L, 51900L, 51900L, 51900L, 51900L, 51900L, 51902L, 51902L, 51902L, 
51902L, 51902L, 51902L, 51902L, 51902L, 51900L, 51900L, 51900L, 51900L, 51900L, 51900L, 51900L, 51900L, 51900L, 51900L, 51902L, 
51902L, 51902L, 51902L, 51902L, 51902L, 51902L, 51902L, 51902L, 51902L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 
51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 
51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 
51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L,     51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L,     51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L,     51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 
51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 
51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L), sex = c(1L, 
1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L), age = c(0L, 5L, 10L, 75L, 0L, 5L, 10L, 75L, 0L, 5L, 10L, 75L, 0L, 5L,     10L, 75L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 
80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L,     0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 
0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L,             10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 10L, 75L,     80L, 0L, 5L, 10L, 75L, 80L, 0L, 5L, 
10L, 75L, 80L, 0L, 5L, 10L, 75L, 80L), value = c(26193L, 26213L, 31653L,     5313L, 22760L, 25647L, 31393L, 11500L, 4307L, 4793L, 
5947L, 667L, 3753L, 4500L, 5207L, 1440L, 27933L, 20627L, 20593L, 6400L,     4520L, 25513L, 17480L, 17800L, 9520L, 8560L, 4193L, 3027L, 
3453L, 800L, 580L, 3453L, 2473L, 2980L, 1013L, 1167L, 61192L, 88249L,     105509L, 20595L, 18198L, 55281L, 76667L, 99967L, 25571L, 
19187L, 55545L, 70490L, 95697L, 28376L, 19340L, 56564L, 64639L, 90809L, 30322L, 19579L, 57471L, 59755L, 85464L, 30949L, 20081L, 
60145L, 55926L, 79537L, 30083L, 22373L, 61425L, 53664L, 73329L, 27916L, 24891L, 61683L, 52992L, 67148L, 25620L, 27118L, 61776L, 
53403L, 61637L, 24601L, 28551L, 62477L, 53990L, 57438L, 25439L, 29074L, 64401L, 56247L, 52992L, 31317L, 30495L, 64691L, 58095L, 
52582L, 35069L, 30691L, 64689L, 60083L, 52853L, 37023L, 31297L, 64391L, 61877L, 53538L, 36327L, 32537L, 63158L, 63367L, 54657L, 
33260L, 35359L, 61961L, 64311L, 56249L, 28203L, 38591L, 60751L, 64639L, 58159L, 22742L, 41433L, 59469L, 64485L, 60081L, 18813L, 
42936L, 58045L, 64127L, 61703L, 17280L, 42758L, 69844L, 93632L, 109773L, 11025L, 7397L, 65301L, 82373L, 103304L, 16130L, 7705L, 
65429L, 77025L, 98764L, 18861L, 7835L, 66195L, 72123L, 93892L, 20763L, 8231L, 66949L, 68002L, 88909L, 21513L, 8973L, 69257L, 
64759L, 83202L, 21269L, 10813L, 70402L, 62813L, 77601L, 20044L, 12820L, 70681L, 62125L, 72404L, 18627L, 14631L, 70818L, 62321L, 
68099L, 17947L, 15893L, 71579L, 62729L, 65085L, 18379L, 16509L, 73653L, 64712L, 61851L, 21697L, 17861L, 73764L, 66737L, 61483L, 
23663L, 18103L, 73537L, 68968L, 61599L, 24347L, 18455L, 73041L, 70867L,     62190L,     23305L, 18986L, 71645L, 72368L, 63235L, 21077L, 
20717L, 70201L, 73275L, 64867L, 17653L, 22534L, 68704L, 73517L, 
66893L, 14089L, 23935L, 67117L, 73238L, 68928L, 11606L, 24343L, 65391L, 72725L, 70609L, 10697L, 23592L)), .Names = c("time", 
"geo", "sex", "age", "value"), class = "data.frame", row.names = c(NA, -226L))
1

There are 1 answers

0
Alex On BEST ANSWER

You can use the spread function from tidyr

dt_final <- dt %>% spread (time, # the variable I want to use to create multiple columns
               value)# the variable to use to fill the rows in the new columns

head(as.tibble(dt_final))

# geo   sex   age `2000` `2001` `2002` `2003` `2004` `2005` `2006` `2007` `2008` `2009` `2010` `2011` `2012` `2013` `2014` `2015` `2016` `2017` `2018` `2019` `2020`
# <int> <int> <int>  <int>  <int>  <int>  <int>  <int>  <int>  <int>  <int>  <int>  <int>  <int>  <int>  <int>  <int>  <int>  <int>  <int>  <int>  <int>  <int>  <int>
#   1    51     1     0  69844     NA  65301  65429  66195  66949  69257  70402  70681  70818  71579     NA  73653  73764  73537  73041  71645  70201  68704  67117  65391
# 2    51     1     5  93632     NA  82373  77025  72123  68002  64759  62813  62125  62321  62729     NA  64712  66737  68968  70867  72368  73275  73517  73238  72725
# 3    51     1    10 109773     NA 103304  98764  93892  88909  83202  77601  72404  68099  65085     NA  61851  61483  61599  62190  63235  64867  66893  68928  70609
# 4    51     1    75  11025     NA  16130  18861  20763  21513  21269  20044  18627  17947  18379     NA  21697  23663  24347  23305  21077  17653  14089  11606  10697
# 5    51     1    80   7397     NA   7705   7835   8231   8973  10813  12820  14631  15893  16509     NA  17861  18103  18455  18986  20717  22534  23935  24343  23592
# 6    51     2     0  61192     NA  55281  55545  56564  57471  60145  61425  61683  61776  62477     NA  64401  64691  64689  64391  63158  61961  60751  59469  58045