R Expand Data Frame List

106 views Asked by At
DATA = data.frame(STUDENT = c(1:3),
                  G1 = c(2,2,1),
                  G2 = c(3,5,4))

DATA1 = data.frame(STUDENT1 = c(0,1,1,0,0),
                   STUDENT2 = c(0,1,1,1,1),
                   STUDENT3 = c(1,1,1,1,0))

For each STUDENT in DATA I wish to create a new column that has 5 rows, Imagine filling all the rows with 0 but then filling in the row with a value of '1' if the row number is within the range between G1 and G2 from DATA for every student, like show in DATA1

5

There are 5 answers

0
r2evans On BEST ANSWER

dplyr+tidyr

library(dplyr
library(tidyr) # pivot_wider
DATA |>
  rowwise() |>
  reframe(STUDENT, row = 1:5, val = replace(rep(0, 5), G1:G2, 1)) |>
  pivot_wider(id_cols = row, names_from = STUDENT, values_from = val, names_prefix = "STUDENT") |>
  select(-row)

data.table

library(data.table)
out <- as.data.table(DATA)[, .(STUDENT, row = 1:5, val = replace(rep(0, 5), G1:G2, 1)),
                           by = 1:nrow(DATA) ] |>
  dcast(row ~ STUDENT, value.var="val") |>
  _[, row := NULL] # needs R-4.3 or newer for the `|> _[]` trick
setnames(out, paste0("STUDENT", names(out)))
out
#    STUDENT1 STUDENT2 STUDENT3
#       <num>    <num>    <num>
# 1:        0        0        1
# 2:        1        1        1
# 3:        1        1        1
# 4:        0        1        1
# 5:        0        1        0
0
Allan Cameron On

You could use apply here:

DATA[-1] |>
  apply(1, \(x) { y <- seq(max(DATA$G2)); as.numeric(y >= x[1] & y <= x[2])}) |>
  as.data.frame() |>
  setNames(paste0(names(DATA[1]), DATA[[1]]))
#>   STUDENT1 STUDENT2 STUDENT3
#> 1        0        0        1
#> 2        1        1        1
#> 3        1        1        1
#> 4        0        1        1
#> 5        0        1        0
0
jpsmith On

One base R approach is to vectorize the seq argument to create indices, then and use lapply:

# create new data frame
newDF <- setnames(data.frame(matrix(rep(0, 5*nrow(DATA)), ncol = nrow(DATA))),
                  paste0(names(DATA)[1], DATA[,1]))
# vectorize seq
mySeq <- Vectorize(seq.default, vectorize.args = c("to", "from"))

inx <- mySeq(DATA$G1, DATA$G2) # create indices 

newDF[] <- lapply(seq_along(newDF), \(x){
  newDF[inx[[x]], x] <- 1
  newDF[,x]
       })

#   STUDENT1 STUDENT2 STUDENT3
# 1        0        0        1
# 2        1        1        1
# 3        1        1        1
# 4        0        1        1
# 5        0        1        0
0
Onyambu On
fn <- function(x, y){
    d <- seq(max(y))
    Map(\(i,j) (i<=d)*(d<=j), x, y)
 }

data.frame(with(DATA, setNames(fn(G1, G2), paste0('STUDENT', STUDENT))))
  STUDENT1 STUDENT2 STUDENT3
1        0        0        1
2        1        1        1
3        1        1        1
4        0        1        1
5        0        1        0
0
Ben On

Was curious to try something different. Here we create a 2-column reference matrix based on the ranges provided that will contain the row and column to fill with 1 (in a final new matrix).

mat_val <- as.matrix(stack(setNames(Map(`:`, DATA$G1, DATA$G2), DATA$STUDENT)))
mat_new <- matrix(0, 
                  nrow = max(DATA$G2), 
                  ncol = ncol(DATA), 
                  dimnames = list(min(DATA$G1):max(DATA$G2), DATA$STUDENT))
mat_new[mat_val] <- 1
colnames(mat_new) <- paste0("STUDENT", colnames(mat_new))
mat_new

Output

  STUDENT1 STUDENT2 STUDENT3
1        0        0        1
2        1        1        1
3        1        1        1
4        0        1        1
5        0        1        0