Create a named list of numerics from a data.frame in R

58 views Asked by At

I want to create a named list of numerics from my DATA, such that all Variables ending in baseline get subtracted from their corresponding Variables ending in post1.

For example, the first element's name will be "beginner_post1 - beginner_baseline".

For example, the first element's numerics will be c(5,-2) meaning they are on Row 5 and Row 2.

Is it possible to automate this process to achieve my Desired_output (below)?

DATA <- read.table(header=TRUE,text =
"
Row           Variables
1     advanced_baseline
2     beginner_baseline
3 intermediate_baseline
4        advanced_post1
5        beginner_post1
6    intermediate_post1
7        advanced_post2
8        beginner_post2
9    intermediate_post2")


Desired_output = 
list("(    beginner_post1 - beginner_baseline    )"= c(5,-2),
     "(intermediate_post1 - intermediate_baseline)"= c(6,-3),
     "(    advanced_post1 - advanced_baseline    )"= c(4,-1),
     "(    beginner_post2 - beginner_baseline    )"= c(8,-2),
     "(intermediate_post2 - intermediate_baseline)"= c(9,-3),
     "(    advanced_post2 - advanced_baseline    )"= c(7,-1)
     )
1

There are 1 answers

3
LMc On BEST ANSWER

Edit

Here is an option for any number of "post" measures:

library(dplyr)

DATA |>
  filter(!endsWith(Variables, "baseline")) |>
  mutate(Variables2 = sub("post\\d+", "baseline", Variables),
         Variables = paste0("(", Variables, " - ", Variables2, ")")) |>
  right_join(filter(DATA, endsWith(Variables, "baseline")), 
             by = c("Variables2" = "Variables"), suffix = c("_post", "_pre")) |>
  summarize(Variables, Row = list(c(Row_post, -Row_pre)), .by = Variables) |>
  tibble::deframe()
# $`(advanced_post1 - advanced_baseline)`
# [1]  4 -1
# 
# $`(beginner_post1 - beginner_baseline)`
# [1]  5 -2
# 
# $`(intermediate_post1 - intermediate_baseline)`
# [1]  6 -3
# 
# $`(advanced_post2 - advanced_baseline)`
# [1]  7 -1
# 
# $`(beginner_post2 - beginner_baseline)`
# [1]  8 -2
# 
# $`(intermediate_post2 - intermediate_baseline)`
# [1]  9 -3

library(tidyr)
library(dplyr)

DATA |> 
  separate_wider_delim(Variables, "_", names = c("x", "y")) |>
  pivot_wider(names_from = x, values_from = Row, names_glue = "({x}_post1 - {x}_baseline)") |>
  arrange(desc(y)) |>
  mutate(across(-1, \(x) x * c(1, -1)), .keep = "none") |> 
  as.list()
# $`(advanced_post1 - advanced_baseline)`
# [1]  4 -1
# 
# $`(beginner_post1 - beginner_baseline)`
# [1]  5 -2
# 
# $`(intermediate_post1 - intermediate_baseline)`
# [1]  6 -3