I have two dataframes with acession numbers, and I want to substitute the values in a single column acession dataframe with the values from the type column.

First dataframe:

ï..study.identifier Year  source type     strain    sample identifier
21                  15 2009   human  DT8        DT8 ERS007592      C0110
22                  75 2004    duck DT30  S06281-04 ERS015643  S06281-04
23                  76 2009   duck  DT30  S04178-09 ERS015644  S04178-09
24                  81 2005 chicken DT36  S03433-05 ERS015648  S03433-05
25                  95 1996   duck   DT9 12342-1996 ERS015661 12342-1996
26                  96 2001    duck  DT9  4300-2001 ERS015662  4300-2001
       code  acession level1 level2 level3
21 5103_5_3 ERR024405      1      1      2
22 5391_2_7 ERR028639      1      1      2
23 5391_2_8 ERR028640      1      1      2
24 5505_1_1 ERR029213      1      1      2
25 5505_2_3 ERR029229      1      1      2
26 5505_2_4 ERR029230      1      1      2

Second dataframe:

  ï..study.identifier Year      source   type     strain    sample
67                  29   NA        <NA>  DT102  3193-1995 ERS015603
68                  35 2001         pig  DT108   547-2001 ERS015613
69                  39 1999         pig DT104A  7302-1999 ERS015607
70                  44 1996      cattle  DT108 10984-1996 ERS015612
71                  51 1998         pig  DT120  4284-1995 ERS015619
72                  61 1997 environment  DT167  8721-1997 ERS015629
   identifier      code  acession level1 level2 level3
67  3193-1995 5386_5_11 ERR028272      2      4     11
68   547-2001 5386_6_10 ERR028283      2      4     11
69   7302-199  5386_6_4 ERR028287      2      4     11
70 10984-1996  5386_6_9 ERR028292      2      4     11
71  4284-1995  5386_7_5 ERR028300      2      4     11
72  8721-1997  5386_8_4 ERR028311      2      4     11

Dataframe whose values should be substituted:

 tree.tip.label
1      ERR028314
2      ERR028313
3      ERR028300
4      ERR028635
5      ERR028292
6      ERR028312

How do I achieve this in R?

I'm newish to R and I can't seem to find a source that's specific to my problem

1

There are 1 answers

0
TarJae On

bind df1 and df2 to one data frame. Add a column with the name of the data frame. Check with an ifelse statement your condition:

library(dplyr)
bind_rows(list(df1 = df1, df2 = df2), .id = "source_df") %>% 
  mutate(acession = ifelse(acession %in% df3$tree.tip.label, type, acession))

output:

   source_df study.identifier Year      source   type     strain    sample identifier      code  acession level1 level2 level3
21       df1               15 2009       human    DT8        DT8 ERS007592      C0110  5103_5_3 ERR024405      1      1      2
22       df1               75 2004        duck   DT30  S06281-04 ERS015643  S06281-04  5391_2_7 ERR028639      1      1      2
23       df1               76 2009        duck   DT30  S04178-09 ERS015644  S04178-09  5391_2_8 ERR028640      1      1      2
24       df1               81 2005     chicken   DT36  S03433-05 ERS015648  S03433-05  5505_1_1 ERR029213      1      1      2
25       df1               95 1996        duck    DT9 12342-1996 ERS015661 12342-1996  5505_2_3 ERR029229      1      1      2
26       df1               96 2001        duck    DT9  4300-2001 ERS015662  4300-2001  5505_2_4 ERR029230      1      1      2
67       df2               29   NA        <NA>  DT102  3193-1995 ERS015603  3193-1995 5386_5_11 ERR028272      2      4     11
68       df2               35 2001         pig  DT108   547-2001 ERS015613   547-2001 5386_6_10 ERR028283      2      4     11
69       df2               39 1999         pig DT104A  7302-1999 ERS015607   7302-199  5386_6_4 ERR028287      2      4     11
70       df2               44 1996      cattle  DT108 10984-1996 ERS015612 10984-1996  5386_6_9     DT108      2      4     11
71       df2               51 1998         pig  DT120  4284-1995 ERS015619  4284-1995  5386_7_5     DT120      2      4     11
72       df2               61 1997 environment  DT167  8721-1997 ERS015629  8721-1997  5386_8_4 ERR028311      2      4     11

data:

df1 <- structure(list(study.identifier = c(15L, 75L, 76L, 81L, 95L, 
96L), Year = c(2009L, 2004L, 2009L, 2005L, 1996L, 2001L), source = c("human", 
"duck", "duck", "chicken", "duck", "duck"), type = c("DT8", "DT30", 
"DT30", "DT36", "DT9", "DT9"), strain = c("DT8", "S06281-04", 
"S04178-09", "S03433-05", "12342-1996", "4300-2001"), sample = c("ERS007592", 
"ERS015643", "ERS015644", "ERS015648", "ERS015661", "ERS015662"
), identifier = c("C0110", "S06281-04", "S04178-09", "S03433-05", 
"12342-1996", "4300-2001"), code = c("5103_5_3", "5391_2_7", 
"5391_2_8", "5505_1_1", "5505_2_3", "5505_2_4"), acession = c("ERR024405", 
"ERR028639", "ERR028640", "ERR029213", "ERR029229", "ERR029230"
), level1 = c(1L, 1L, 1L, 1L, 1L, 1L), level2 = c(1L, 1L, 1L, 
1L, 1L, 1L), level3 = c(2L, 2L, 2L, 2L, 2L, 2L)), class = "data.frame", row.names = c("21", 
"22", "23", "24", "25", "26"))

df2 <- structure(list(study.identifier = c(29L, 35L, 39L, 44L, 51L, 
61L), Year = c(NA, 2001L, 1999L, 1996L, 1998L, 1997L), source = c("<NA>", 
"pig", "pig", "cattle", "pig", "environment"), type = c("DT102", 
"DT108", "DT104A", "DT108", "DT120", "DT167"), strain = c("3193-1995", 
"547-2001", "7302-1999", "10984-1996", "4284-1995", "8721-1997"
), sample = c("ERS015603", "ERS015613", "ERS015607", "ERS015612", 
"ERS015619", "ERS015629"), identifier = c("3193-1995", "547-2001", 
"7302-199", "10984-1996", "4284-1995", "8721-1997"), code = c("5386_5_11", 
"5386_6_10", "5386_6_4", "5386_6_9", "5386_7_5", "5386_8_4"), 
    acession = c("ERR028272", "ERR028283", "ERR028287", "ERR028292", 
    "ERR028300", "ERR028311"), level1 = c(2L, 2L, 2L, 2L, 2L, 
    2L), level2 = c(4L, 4L, 4L, 4L, 4L, 4L), level3 = c(11L, 
    11L, 11L, 11L, 11L, 11L)), class = "data.frame", row.names = c("67", 
"68", "69", "70", "71", "72"))

df3 <- structure(list(tree.tip.label = c("ERR028314", "ERR028313", "ERR028300", 
"ERR028635", "ERR028292", "ERR028312")), class = "data.frame", row.names = c("1", 
"2", "3", "4", "5", "6"))