cumsum by day with hourly observations with dplyr

366 views Asked by At

I was able to sum up all hourly observations (column: gdd10) in a day, and group that by the three factors (Treatment, Rep, and Depth). I used the following code:

##Summarise by Day
gdd10<- tempsub%>% 
group_by(Date = as.Date.character(Date), Treatment,Rep,Depth)%>%
summarise(sum = sum(gdd10)) %>% 
arrange(sum)

However, I would like to accumulate the newly created gdd10 sums per day over the entire period of time. So that I have a cumulative sum at the end grouped by treatment, Rep, Depth. I tried following code (based on cumsum in grouped data with dplyr):

##Cumulate sum over entire period
gdd10csum<- gdd10%>% 
group_by(Treatment,Rep,Depth)%>%
mutate(cumsum = cumsum(sum)) %>% 
arrange(cumsum)

When looking at a ggplot, it reveals that this code does not sum up over the entire period:

I would like to have ONE accumulated (cumsum) value by Date over the entire period of time with grouping by treatment, rep and depth. Any ideas what is wrong with this code are highly appreciated?

> dput(gdd10csum)
structure(list(Date = structure(c(16941, 16941, 16941, 16941, 
16941, 16941, 16941, 16941, 16941, 16941, 16941, 16942, 16941, 
16941, 16941, 16941, 16942, 16942, 16941, 16942, 16942, 16942, 
16942, 16941, 16941, 16942, 16942, 16941, 16942, 16941, 16941, 
16941, 16941, 16942, 16941, 16942, 16942, 16942, 16941, 16941, 
16941, 16942, 16941, 16941, 16941, 16941, 16941, 16942, 16942, 
16942, 16942, 16942, 16942, 16942, 16942, 16942, 16942, 16942, 
16942, 16942, 16942, 16942, 16942, 16942), class = "Date"), Treatment = structure(c(2L, 
1L, 5L, 3L, 1L, 5L, 2L, 3L, 5L, 5L, 4L, 2L, 4L, 3L, 4L, 4L, 1L, 
5L, 2L, 5L, 3L, 1L, 2L, 2L, 2L, 5L, 3L, 1L, 5L, 5L, 1L, 5L, 1L, 
4L, 3L, 4L, 3L, 4L, 3L, 1L, 4L, 4L, 5L, 3L, 3L, 4L, 4L, 2L, 2L, 
2L, 1L, 5L, 1L, 5L, 1L, 3L, 3L, 4L, 1L, 5L, 3L, 3L, 4L, 4L), .Label = c("Disc", 
"Strip_NT", "Strip_ST", "Vt_high", "Vt_low"), class = "factor"), 
    Rep = structure(c(2L, 1L, 1L, 2L, 2L, 4L, 1L, 3L, 2L, 3L, 
    4L, 2L, 1L, 1L, 2L, 3L, 1L, 1L, 2L, 4L, 2L, 2L, 1L, 1L, 3L, 
    2L, 3L, 1L, 3L, 1L, 4L, 4L, 3L, 4L, 2L, 1L, 1L, 2L, 4L, 2L, 
    1L, 3L, 2L, 3L, 1L, 3L, 2L, 2L, 3L, 1L, 1L, 1L, 4L, 4L, 3L, 
    2L, 4L, 1L, 2L, 2L, 3L, 1L, 3L, 2L), .Label = c("1", "2", 
    "3", "4"), class = "factor"), Depth = structure(c(2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 
    1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L), .Label = c("5", "30"), class = "factor"), sum = c(19.264, 
    27.405, 37.078, 40.186, 40.838, 42.793, 44.761, 47.656, 47.842, 
    53.317, 59.739, 40.824, 60.351, 61.321, 62.353, 64.521, 47.445, 
    55.643, 94.985, 59.336, 62.25, 63.425, 64.323, 110.563, 110.912, 
    68.41, 68.735, 116.45, 73.875, 127.29, 129.475, 130.305, 
    133.851, 78.288, 139.362, 79.38, 80.889, 83.889, 147.819, 
    150.538, 150.655, 89.098, 156.007, 159.148, 168.585, 173.395, 
    175.571, 116.011, 128.453, 129.605, 133.488, 140.31, 139.496, 
    143.325, 150.379, 153.905, 157.835, 163.186, 166.053, 174.543, 
    175.186, 176.603, 190.917, 192.106), cumsum = c(19.264, 27.405, 
    37.078, 40.186, 40.838, 42.793, 44.761, 47.656, 47.842, 53.317, 
    59.739, 60.088, 60.351, 61.321, 62.353, 64.521, 74.85, 92.721, 
    94.985, 102.129, 102.436, 104.263, 109.084, 110.563, 110.912, 
    116.252, 116.391, 116.45, 127.192, 127.29, 129.475, 130.305, 
    133.851, 138.027, 139.362, 139.731, 142.21, 146.242, 147.819, 
    150.538, 150.655, 153.619, 156.007, 159.148, 168.585, 173.395, 
    175.571, 210.996, 239.365, 240.168, 249.938, 267.6, 268.971, 
    273.63, 284.23, 293.267, 305.654, 313.841, 316.591, 330.55, 
    334.334, 345.188, 364.312, 367.677)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -64L), vars = list(
    Treatment, Rep, Depth), indices = list(c(27L, 50L), c(1L, 
16L), c(39L, 58L), c(4L, 21L), c(32L, 54L), c(30L, 52L), c(23L, 
49L), c(6L, 22L), c(18L, 47L), c(0L, 11L), c(24L, 48L), c(44L, 
61L), c(13L, 36L), c(34L, 55L), c(3L, 20L), c(43L, 60L), c(7L, 
26L), c(38L, 56L), c(40L, 57L), c(12L, 35L), c(46L, 63L), c(14L, 
37L), c(45L, 62L), c(15L, 41L), c(10L, 33L), c(29L, 51L), c(2L, 
17L), c(42L, 59L), c(8L, 25L), c(9L, 28L), c(31L, 53L), c(5L, 
19L)), drop = TRUE, group_sizes = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), biggest_group_size = 2L, .Names = c("Date", 
"Treatment", "Rep", "Depth", "sum", "cumsum"), labels = structure(list(
    Treatment = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
    2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("Disc", "Strip_NT", 
    "Strip_ST", "Vt_high", "Vt_low"), class = "factor"), Rep = structure(c(1L, 
    1L, 2L, 2L, 3L, 4L, 1L, 1L, 2L, 2L, 3L, 1L, 1L, 2L, 2L, 3L, 
    3L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 1L, 1L, 2L, 2L, 3L, 4L, 
    4L), .Label = c("1", "2", "3", "4"), class = "factor"), Depth = structure(c(1L, 
    2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 
    2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 
    2L), .Label = c("5", "30"), class = "factor")), class = "data.frame", row.names = c(NA, 
-32L), vars = list(Treatment, Rep, Depth), indices = list(c(34L, 
40L), c(1L, 8L), c(48L, 55L), c(5L, 19L), c(41L, 47L), c(37L, 
43L), c(31L, 38L), c(7L, 20L), c(30L, 33L), c(0L, 4L), c(32L, 
36L), c(56L, 61L), c(16L, 27L), c(42L, 50L), c(3L, 17L), c(53L, 
59L), c(9L, 23L), c(46L, 52L), c(49L, 54L), c(15L, 26L), c(60L, 
63L), c(18L, 28L), c(57L, 62L), c(21L, 29L), c(14L, 25L), c(35L, 
44L), c(2L, 12L), c(51L, 58L), c(10L, 22L), c(11L, 24L), c(39L, 
45L), c(6L, 13L)), drop = TRUE, group_sizes = c(2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), biggest_group_size = 2L, .Names = c("Treatment", 
"Rep", "Depth")))
1

There are 1 answers

0
Pat14 On BEST ANSWER

The answer is to exclude the arrange function in the first step:

##Summarise by Day
gdd10<- tempsub%>% 
group_by(Date = as.Date.character(Date), Treatment,Rep,Depth)%>%
summarise(sum = sum(gdd10))