issue in exporting rstatix::tukey_test() loop over multiple columns and factor variables of data frame

20 views Asked by At

I have a dataset df with 11 numeric environmental variables (here for simplicity only 4 are reported: c,ph,t,do) and three factors (stream, season, event). I would like to run a loop on this four columns with the pipe friendly anova_test and tukey_test from rstatix package. This is because I would like to know the differences in each environmental variable among the three different streams accounting the season (fall, winter, spring, summer) but also the event (flood, drought) (e.g. what's the variation in "ph" among the three streams during the fall drought, fall flood and so on)

Here's a piece of the dataset:

df<-structure(list(stream = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L), levels = c("blendziava", "smeltaite", "sventoji"), class = "factor"), 
    season = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 4L, 4L, 4L, 4L), levels = c("fall", "winter", "spring", 
    "summer"), class = c("ordered", "factor")), event = structure(c(1L, 
    1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), levels = c("drought", 
    "flood"), class = "factor"), c = c(533, 596, 622, 590, 282, 
    313, 310, 293, 285, 292, 280, 262, 357, 360, 348, 337, 424, 
    464, 441, 427, 506, 703, 606, 579, 506, 605, 587, 526, 307, 
    366, 301, 330, 385, 346, 396, 210, 225, 218, 217, 228, 226, 
    270, 130, 136, 135, 143, 150, 153, 151, 222, 232, 234, 235, 
    238, 238, 240, 299, 296, 296, 297, 308, 305, 293, 360, 384, 
    382, 368, 367, 360, 362, 387, 417, 404, 389, 386, 382, 378, 
    386, 390, 397, 360, 190, 195, 191, 203, 175, 176, 173, 177, 
    235, 226, 220, 217, 316, 303, 291, 292, 366, 355, 343, 350, 
    378, 368, 341, 384), ph = c(8.31, 8.04, 8.28, 8.2, 8.35, 
    8.06, 8.02, 8.19, 8.14, 7.98, 7.91, 8.04, 7.91, 7.71, 7.82, 
    7.98, 8.3, 7.97, 8.17, 8.21, 8.05, 7.5, 7.86, 8.06, 7.86, 
    7.68, 7.88, 7.92, 9.34, 9.18, 9.34, 9.27, 9.16, 9.18, 8.97, 
    8.08, 8.05, 8, 8.03, 7, 8.07, 8.38, 7.97, 7.95, 7.97, 8.03, 
    8.05, 8.23, 8.65, 8.07, 8.17, 8.22, 8.21, 8.23, 8.27, 8.34, 
    8.3, 8.54, 8.53, 8.46, 8.49, 8.51, 8.59, 8.2, 8.18, 8.27, 
    8.3, 8.44, 8.46, 8.47, 8.04, 7.94, 8.15, 8.17, 8.29, 8.38, 
    8.39, 8.34, 8.33, 8.38, 8.26, 8.12, 8.09, 8.07, 8.05, 8.1, 
    8.1, 6.03, 8.01, 7.94, 7.94, 8, 8.16, 8.23, 8.08, 8.17, 8.16, 
    7.93, 7.87, 7.87, 7.7, 7.65, 7.71, 7.72, 7.81), t = c(12.9, 
    12.2, 12.1, 12.1, 8.2, 8.3, 8.2, 7.9, 2.5, 2.3, 2.3, 1.9, 
    4.7, 3.6, 3.3, 4, 11.5, 11.9, 10.5, 10.5, 18, 19, 17.6, 16, 
    17.3, 19.1, 17.9, 17, 10.7, 11.3, 11.3, 10.9, 11.3, 11.3, 
    11.2, 4.3, 5, 5.1, 5.2, 5.4, 5.4, 5.4, 1.6, 1.8, 1.9, 2.1, 
    2.1, 2.2, 2.4, 4.9, 4.7, 4.9, 4.2, 4.2, 4.3, 4.6, 10.5, 11.8, 
    11.5, 10.7, 12, 12.2, 11.3, 15.2, 17.5, 17.3, 16.1, 17.2, 
    16.8, 17.6, 17, 19.5, 19, 17.9, 18.6, 18.4, 19.4, 11.3, 11.1, 
    11.5, 11.5, 5.3, 5.3, 5.4, 5.6, 2.5, 2.3, 2.4, 2.4, 4.2, 
    3.6, 3.3, 3.5, 11, 10.6, 9.7, 9.8, 18, 18, 17, 16.6, 19.9, 
    20, 19, 19.6), do = c(9.2, 5.8, 7.8, 9.2, 7.9, 7.5, 8, 9.1, 
    11.7, 11.3, 11.7, 13.5, 13.3, 11.7, 13.2, 13.7, 14.9, 11.4, 
    13.8, 11.6, 7.9, 5, 5, 7.9, 8.2, 4.3, 7, 8.3, 9.9, 9.8, 10.5, 
    10.1, 10.8, 11, 10.6, 11.2, 11.5, 11.6, 11.4, 11.3, 11.9, 
    12.6, 12.4, 12.9, 12.5, 12.2, 12.3, 12.6, 14.7, 13.7, 14.9, 
    14.3, 14.3, 14.5, 14.5, 14.2, 14.2, 15.7, 15, 13.5, 13.4, 
    13.2, 13.9, 10.5, 8.6, 9.9, 10.6, 11.6, 10.6, 9.7, 10, 7.3, 
    9.7, 9.9, 10.6, 10.9, 10.9, 8.7, 9.8, 10.1, 9.8, 9.4, 9, 
    9.5, 9.8, 13, 12.5, 12.1, 12.7, 13.3, 14.4, 14.8, 15.5, 11.8, 
    12.1, 13, 14.4, 8, 8.8, 8.9, 7.3, 5.4, 8.4, 7.8, 6.1)), row.names = c(NA, 
-105L), class = c("tbl_df", "tbl", "data.frame"))

So far I toke inspiration from this thread: https://stackoverflow.com/questions/72920778/rstatixanova-test-over-multiple-columns-of-data-frame Using the code:

library(rstatix)
results <- NULL
for(i in 4:ncol(df)){
   results<-df %>%
      group_by(season,event) %>%
      tukey_hsd(formula(paste0(names(df)[i],"~","stream")))%>%
   add_significance()
}
results

The problem is that it gives me only the tibble of the last environmental variable of the dataset, do in this case, when I would like to have a complete output for all the environmental variables, exportable to excel. Can somebody help me?

0

There are 0 answers