Convert the time intervals to equal hours and fill in the value column

52 views Asked by At

I have data with this structure (YR weather forecasts)

df1 <- read.table(text = "time temperature
00 0
01 0
02 1
03 1
04 2
05 2
06 2
07-13 3
13-19 4
19-01 1", header = TRUE)

I want to get a structure: each row is one hour, and therefore transform the hourly intervals into the appropriate number of rows

> df1.full
   time temperature
1     0           0
2     1           0
3     2           1
4     3           1
5     4           2
6     5           2
7     6           2
8     7           3
9     8           3
10    9           3
11   10           3
12   11           3
13   12           3
14   13           4
15   14           4
16   15           4
17   16           4
18   17           4
19   18           4
20   19           1
21   20           1
22   21           1
23   22           1
24   23           1
2

There are 2 answers

6
r2evans On BEST ANSWER

Since you're discarding things after 11pm, we can do this:

library(dplyr)
library(tidyr) # unnest
doseq <- function(z) {
  z <- as.integer(z)
  if (length(z) > 1) {
    if (z[2] < z[1]) z[2] <- 24
    z <- z[1]:max(z[1], z[2]-1)
  }
  z
}

df1 |>
  mutate(time = lapply(strsplit(time, "-"), doseq)) |>
  unnest(time) |>
  print(n=99)
# # A tibble: 24 × 2
#     time temperature
#    <int>       <int>
#  1     0           0
#  2     1           0
#  3     2           1
#  4     3           1
#  5     4           2
#  6     5           2
#  7     6           2
#  8     7           3
#  9     8           3
# 10     9           3
# 11    10           3
# 12    11           3
# 13    12           3
# 14    13           4
# 15    14           4
# 16    15           4
# 17    16           4
# 18    17           4
# 19    18           4
# 20    19           1
# 21    20           1
# 22    21           1
# 23    22           1
# 24    23           1
0
jay.sf On

Not sure how your real data looks like, but in the case shown, you could sub 19-01 to 19-23, then - to : and evaluate the sequences to cbind, i.e. sth like

> with(df1, 
+      Map(cbind, 
+          lapply(lapply(sub('-', ':', sub('-01', '-23', time)), str2lang), eval), 
+          temperature)) |> do.call(what='rbind')
      [,1] [,2]
 [1,]    0    0
 [2,]    1    0
 [3,]    2    1
 [4,]    3    1
 [5,]    4    2
 [6,]    5    2
 [7,]    6    2
 [8,]    7    3
 [9,]    8    3
[10,]    9    3
[11,]   10    3
[12,]   11    3
[13,]   12    3
[14,]   13    3
[15,]   13    4
[16,]   14    4
[17,]   15    4
[18,]   16    4
[19,]   17    4
[20,]   18    4
[21,]   19    4
[22,]   19    1
[23,]   20    1
[24,]   21    1
[25,]   22    1
[26,]   23    1

Data:

> dput(df1)
structure(list(time = c("00", "01", "02", "03", "04", "05", "06", 
"07-13", "13-19", "19-01"), temperature = c(0L, 0L, 1L, 1L, 2L, 
2L, 2L, 3L, 4L, 1L)), class = "data.frame", row.names = c(NA, 
-10L))