Mutate new column in tibble with values of other columns by matched pattern

101 views Asked by At

My tibble:

df <- tibble(a = c(1,2,3,4,5,6,7,8,9,10),
                 b = c("", "", "1", "", "1", "3", "2", "", "5", ""),
                 c = c("", "", "", "", "x1", "x3", "x2", "", "x5", ""),
                 d = c("", "", "1", "", "1", "3", "2", "", "5", ""),
                 e = c("x1", "x4", "", "x5", "", "", "", "x2", "", "x2"))

# A tibble: 10 × 5
       a b     c     d     e    
   <dbl> <chr> <chr> <chr> <chr>
 1     1 ""    ""    ""    "x1" 
 2     2 ""    ""    ""    "x4" 
 3     3 "1"   ""    "1"   ""   
 4     4 ""    ""    ""    "x5" 
 5     5 "1"   "x1"  "1"   ""   
 6     6 "3"   "x3"  "3"   ""   
 7     7 "2"   "x2"  "2"   ""   
 8     8 ""    ""    ""    "x2" 
 9     9 "5"   "x5"  "5"   ""   
10    10 ""    ""    ""    "x2" 

The desired outcome:

# A tibble: 10 × 6
       a b     c     d     e     f    
   <dbl> <chr> <chr> <chr> <chr> <chr>
 1     1 ""    ""    ""    "x1"  "x1" 
 2     2 ""    ""    ""    "x4"  "x4" 
 3     3 "1"   ""    "1"   ""    ""   
 4     4 ""    ""    ""    "x5"  "x5" 
 5     5 "1"   "x1"  "1"   ""    "x1" 
 6     6 "3"   "x3"  "3"   ""    "x3" 
 7     7 "2"   "x2"  "2"   ""    "x2" 
 8     8 ""    ""    ""    "x2"  "x2" 
 9     9 "5"   "x5"  "5"   ""    "x5" 
10    10 ""    ""    ""    "x2"  "x2" 

I want to add a new column with that value of a row that has an "x" in it. I don't want to specify the columns. I just want to look at any number of column there is, which may vary.

Somehow like this, maybe:

df %>%
 find_value_with_x_per_row %>%
 put_that_value_in_new_column_per_row %>%
 if_you_dont_find_value_with_x_put_""_instead
4

There are 4 answers

1
TarJae On BEST ANSWER

Here we check each column if there is an x. If so combine them and put it into new column f. This is important if you have two columns with an x in one row:

library(dplyr)
library(tidyr)

df %>% 
  mutate(across(a:e, ~case_when(grepl("x", .) ~.), .names = 'new_{col}')) %>%
  unite(f, starts_with('new'), na.rm = TRUE, sep = ' ')
      a b     c     d     e     f    
   <dbl> <chr> <chr> <chr> <chr> <chr>
 1     1 ""    ""    ""    "x1"  "x1" 
 2     2 ""    ""    ""    "x4"  "x4" 
 3     3 "1"   ""    "1"   ""    ""   
 4     4 ""    ""    ""    "x5"  "x5" 
 5     5 "1"   "x1"  "1"   ""    "x1" 
 6     6 "3"   "x3"  "3"   ""    "x3" 
 7     7 "2"   "x2"  "2"   ""    "x2" 
 8     8 ""    ""    ""    "x2"  "x2" 
 9     9 "5"   "x5"  "5"   ""    "x5" 
10    10 ""    ""    ""    "x2"  "x2" 

Consider this example:

df <- tibble(a = c(1,2,3,4,5,6,7,8,9,10),
             b = c("", "", "x1", "", "1", "3", "2", "", "5", ""),
             c = c("", "", "", "", "x1", "x3", "x2", "", "x5", ""),
             d = c("", "", "x1", "", "1", "3", "2", "", "5", ""),
             e = c("x1", "x4", "", "x5", "", "", "", "x2", "", "x2"))

# A tibble: 10 × 6
       a b     c     d     e     f    
   <dbl> <chr> <chr> <chr> <chr> <chr>
 1     1 ""    ""    ""    "x1"  x1   
 2     2 ""    ""    ""    "x4"  x4   
 3     3 "x1"  ""    "x1"  ""    x1   
 4     4 ""    ""    ""    "x5"  x5   
 5     5 "1"   "x1"  "1"   ""    x1   
 6     6 "3"   "x3"  "3"   ""    x3   
 7     7 "2"   "x2"  "2"   ""    x2   
 8     8 ""    ""    ""    "x2"  x2   
 9     9 "5"   "x5"  "5"   ""    x5   
10    10 ""    ""    ""    "x2"  x2  

library(dplyr)
library(tidyr)

df %>% 
  mutate(across(a:e, ~case_when(grepl("x", .) ~.), .names = 'new_{col}')) %>%
  unite(f, starts_with('new'), na.rm = TRUE, sep = ' ')


       a b     c     d     e     f    
   <dbl> <chr> <chr> <chr> <chr> <chr>
 1     1 ""    ""    ""    "x1"  x1   
 2     2 ""    ""    ""    "x4"  x4   
 3     3 "x1"  ""    "x1"  ""    x1 x1
 4     4 ""    ""    ""    "x5"  x5   
 5     5 "1"   "x1"  "1"   ""    x1   
 6     6 "3"   "x3"  "3"   ""    x3   
 7     7 "2"   "x2"  "2"   ""    x2   
 8     8 ""    ""    ""    "x2"  x2   
 9     9 "5"   "x5"  "5"   ""    x5   
10    10 ""    ""    ""    "x2"  x2   
0
mapardo On

A short answer in case you have only one "X*" coincidence by row. If there is not "X*" string in a row, the output is "". You can run it with any number of columns, independently of the column names

cbind(df,f=apply(df,1,function(x){ifelse(any(grepl("x",x)),x[grepl("x",x)],"")}))
0
Onyambu On

If your data does not contain anything that starts with y or z you could use pmax:

cbind(df, f=sub("^[^x].*", "", do.call(pmax,df)))

    a b  c d  e  f
1   1        x1 x1
2   2        x4 x4
3   3 1    1      
4   4        x5 x5
5   5 1 x1 1    x1
6   6 3 x3 3    x3
7   7 2 x2 2    x2
8   8        x2 x2
9   9 5 x5 5    x5
10 10        x2 x2
2
Mark On
dplyr::mutate(df, f = c_across(everything())[grepl("x", c_across(everything()))][1], .by = a)