% count(cyl, gear) #> cyl gear n #> 1 4 3 1 #> 2 4 4 8 #> 3 4 5 2 #> 4 6 3 2 #> 5 6 4 4 #> 6 6 5 1 #> 7 8 3 12 " /> % count(cyl, gear) #> cyl gear n #> 1 4 3 1 #> 2 4 4 8 #> 3 4 5 2 #> 4 6 3 2 #> 5 6 4 4 #> 6 6 5 1 #> 7 8 3 12 " /> % count(cyl, gear) #> cyl gear n #> 1 4 3 1 #> 2 4 4 8 #> 3 4 5 2 #> 4 6 3 2 #> 5 6 4 4 #> 6 6 5 1 #> 7 8 3 12 "/>

multiple columns into seperate name-value pairs

56 views Asked by At

What I have:

library("dplyr")

mtcars %>% count(cyl, gear)
#>   cyl gear  n
#> 1   4    3  1
#> 2   4    4  8
#> 3   4    5  2
#> 4   6    3  2
#> 5   6    4  4
#> 6   6    5  1
#> 7   8    3 12
#> 8   8    5  2

What I need:

#>   variable1 category1 variable2 category2  n
#> 1       cyl         4      gear         3  1
#> 2       cyl         4      gear         4  8
#> 3       cyl         4      gear         5  2
#> 4       cyl         6      gear         3  2
#> 5       cyl         6      gear         4  4
#> 6       cyl         6      gear         5  1
#> 7       cyl         8      gear         3 12
#> 8       cyl         8      gear         5  2

The catch is that the variables (cyl and gear in this case) passed to count() are not fixed, and moreover the number of variables can vary from 1 upwards. They will be passed as arguments to a broader function. Hence I'm looking for a solution that would work nicely with curly-curly or similar. The names of the variables follow no pattern.

I've considered using multiple calls to tidyr::pivot_longer() but I can't work out how this would work with a varying number of variables.

I thought a better approach may be to use dplyr::across() along with dplyr::cur_column(). Something like the pseudo-code below:

var_count <- function(cnt_var) {
  mtcars %>%
    count(across({{ cnt_var }})) %>% # this works as intended
    mutate(across({{ cnt_var }}, \(col) cur_column(), .names = "category")) %>% # an attempt to create the 'variable' names. doesn't work when length(cnt_var) > 1
    rename_with(.cols = {{ cnt_var }}, .fn = "category") # a thought about how to create the 'category' columns
}

var_count(cnt_var = c(cyl)) # this ideally should produce one name-value pair: variable1 and category1

var_count(cnt_var = c(cyl, gear)) # this should produce two pairs: variable1, category1, variable2, category2

var_count(cnt_var = c(cyl, gear, vs)) # this should produce three pairs, etc

I'd ideally like a tidyverse solution, but all suggestions are most welcome. Thanks folks!

2

There are 2 answers

3
stefan On BEST ANSWER

As you already guessed I would go for across, cur_column and some renaming. As a first step I create the category and variable columns, then get rid of the original columns. Afterwards I use rename_with and string::str_replace to replace the column names suffixes with numeric suffixes:

library(dplyr, warn = FALSE)
library(stringr)

mtcars %>%
  count(cyl, gear) |>
  mutate(
    across(-n, list(
      variable = ~ cur_column(),
      category = ~.x
    ),
    .names = "{.fn}_{.col}"
    )
  ) |>
  select(matches("^(category|variable)"), n) |>
  rename_with(
    ~ stringr::str_replace(.x, "_.*$", \(x) as.numeric(factor(x))),
    .cols = -n
  )
#>   variable1 category1 variable2 category2  n
#> 1       cyl         4      gear         3  1
#> 2       cyl         4      gear         4  8
#> 3       cyl         4      gear         5  2
#> 4       cyl         6      gear         3  2
#> 5       cyl         6      gear         4  4
#> 6       cyl         6      gear         5  1
#> 7       cyl         8      gear         3 12
#> 8       cyl         8      gear         5  2

var_count <- function(.data, ...) {
  .data |> 
    count(...) |>
    mutate(
      across(-n, list(
        variable = ~ cur_column(),
        category = ~.x
      ),
      .names = "{.fn}_{.col}"
      )
    ) |>
    select(matches("^(category|variable)"), n) |>
    rename_with(
      ~ stringr::str_replace(.x, "_.*$", \(x) as.numeric(factor(x))),
      .cols = -n
    )
}

var_count(mtcars, cyl)
#>   variable1 category1  n
#> 1       cyl         4 11
#> 2       cyl         6  7
#> 3       cyl         8 14

var_count(mtcars, cyl, gear)
#>   variable1 category1 variable2 category2  n
#> 1       cyl         4      gear         3  1
#> 2       cyl         4      gear         4  8
#> 3       cyl         4      gear         5  2
#> 4       cyl         6      gear         3  2
#> 5       cyl         6      gear         4  4
#> 6       cyl         6      gear         5  1
#> 7       cyl         8      gear         3 12
#> 8       cyl         8      gear         5  2

var_count(mtcars, cyl, gear, vs)
#>    variable1 category1 variable2 category2 variable3 category3  n
#> 1        cyl         4      gear         3        vs         1  1
#> 2        cyl         4      gear         4        vs         1  8
#> 3        cyl         4      gear         5        vs         0  1
#> 4        cyl         4      gear         5        vs         1  1
#> 5        cyl         6      gear         3        vs         1  2
#> 6        cyl         6      gear         4        vs         0  2
#> 7        cyl         6      gear         4        vs         1  2
#> 8        cyl         6      gear         5        vs         0  1
#> 9        cyl         8      gear         3        vs         0 12
#> 10       cyl         8      gear         5        vs         0  2

UPDATE And thanks to the comment by @lotus we can make the function more concise using .keep="unused" and .before=1 to get rid of the select and by doing the renaming in across() to get rid of the rename_with:

var_count <- function(.data, ...) {
  .data |>
    count(...) |>
    mutate(
      across(-n, list(
        variable = ~ cur_column(),
        category = ~.x
      ),
      .names = "{.fn}{(seq_along(.col) + 1) %/% 2}"
      ),
      .keep = "unused",
      .before = 1
    )
}

var_count(mtcars, cyl)
#>   variable1 category1  n
#> 1       cyl         4 11
#> 2       cyl         6  7
#> 3       cyl         8 14

var_count(mtcars, cyl, gear)
#>   variable1 category1 variable2 category2  n
#> 1       cyl         4      gear         3  1
#> 2       cyl         4      gear         4  8
#> 3       cyl         4      gear         5  2
#> 4       cyl         6      gear         3  2
#> 5       cyl         6      gear         4  4
#> 6       cyl         6      gear         5  1
#> 7       cyl         8      gear         3 12
#> 8       cyl         8      gear         5  2

var_count(mtcars, cyl, gear, vs)
#>    variable1 category1 variable2 category2 variable3 category3  n
#> 1        cyl         4      gear         3        vs         1  1
#> 2        cyl         4      gear         4        vs         1  8
#> 3        cyl         4      gear         5        vs         0  1
#> 4        cyl         4      gear         5        vs         1  1
#> 5        cyl         6      gear         3        vs         1  2
#> 6        cyl         6      gear         4        vs         0  2
#> 7        cyl         6      gear         4        vs         1  2
#> 8        cyl         6      gear         5        vs         0  1
#> 9        cyl         8      gear         3        vs         0 12
#> 10       cyl         8      gear         5        vs         0  2
0
ThomasIsCoding On

You can try the code below with pivot_*

mtcars %>%
   count(cyl, gear) %>%
   pivot_longer(!n, names_to = "variable", values_to = "category") %>%
   mutate(grp = cur_group_id(), rid = row_number(), .by = variable) %>%
   pivot_wider(
      values_from = c(variable, category),
      names_from = grp, names_sep = ""
   ) %>%
   select(-rid) %>%
   select(order(as.numeric(sub("\\D+", "", names(.))))) %>%
   relocate(n,.after = last_col())

which gives

# A tibble: 8 × 5
  variable1 category1 variable2 category2     n
  <chr>         <dbl> <chr>         <dbl> <int>
1 cyl               4 gear              3     1
2 cyl               4 gear              4     8
3 cyl               4 gear              5     2
4 cyl               6 gear              3     2
5 cyl               6 gear              4     4
6 cyl               6 gear              5     1
7 cyl               8 gear              3    12
8 cyl               8 gear              5     2