How to add p-values and comparison bars to a grouped violin ggplot?

23 views Asked by At

I use following data

structure(list(Conditioning = c("PBS", "PBS", "PBS", "PBS", "PBS", 
"PBS", "PBS", "PBS", "RSV", "RSV", "RSV", "RSV", "RSV", "RSV", 
"RSV", "RSV", "PBS", "PBS", "PBS", "PBS", "PBS", "PBS", "PBS", 
"RSV", "RSV", "RSV", "RSV", "RSV", "RSV", "RSV", "RSV", "PBS", 
"PBS", "PBS", "PBS", "PBS", "PBS", "PBS", "PBS", "PBS", "PBS", 
"PBS", "PBS", "RSV", "RSV", "RSV", "RSV", "RSV", "RSV", "RSV", 
"RSV", "RSV", "RSV", "RSV", "RSV", "PBS", "PBS", "PBS", "PBS", 
"PBS", "PBS", "PBS", "PBS", "PBS", "PBS", "PBS", "PBS", "RSV", 
"RSV", "RSV", "RSV", "RSV", "RSV", "RSV", "RSV", "RSV", "RSV", 
"RSV", "RSV", "PBS", "PBS", "PBS", "PBS", "PBS", "PBS", "PBS", 
"PBS", "RSV", "RSV", "RSV", "RSV", "RSV", "RSV", "RSV", "RSV", 
"PBS", "PBS", "PBS", "PBS", "PBS", "PBS", "PBS", "PBS", "PBS", 
"PBS", "RSV", "RSV", "RSV", "RSV", "RSV", "RSV", "RSV", "RSV", 
"RSV", "RSV"), Stimulation = c("PBS", "PBS", "PBS", "PBS", "Poly(I:C)", 
"Poly(I:C)", "Poly(I:C)", "Poly(I:C)", "PBS", "PBS", "PBS", "PBS", 
"Poly(I:C)", "Poly(I:C)", "Poly(I:C)", "Poly(I:C)", "PBS", "PBS", 
"PBS", "Poly(I:C)", "Poly(I:C)", "Poly(I:C)", "Poly(I:C)", "PBS", 
"PBS", "PBS", "PBS", "Poly(I:C)", "Poly(I:C)", "Poly(I:C)", "Poly(I:C)", 
"PBS", "PBS", "PBS", "PBS", "Poly(I:C)", "Poly(I:C)", "Poly(I:C)", 
"Poly(I:C)", "LPS", "LPS", "LPS", "LPS", "PBS", "PBS", "PBS", 
"PBS", "Poly(I:C)", "Poly(I:C)", "Poly(I:C)", "Poly(I:C)", "LPS", 
"LPS", "LPS", "LPS", "PBS", "PBS", "PBS", "PBS", "Poly(I:C)", 
"Poly(I:C)", "Poly(I:C)", "Poly(I:C)", "LPS", "LPS", "LPS", "LPS", 
"PBS", "PBS", "PBS", "PBS", "Poly(I:C)", "Poly(I:C)", "Poly(I:C)", 
"Poly(I:C)", "LPS", "LPS", "LPS", "LPS", "PBS", "PBS", "PBS", 
"PBS", "LPS", "LPS", "LPS", "LPS", "PBS", "PBS", "PBS", "PBS", 
"LPS", "LPS", "LPS", "LPS", "PBS", "PBS", "PBS", "PBS", "LPS", 
"LPS", "LPS", "LPS", "LPS", "LPS", "PBS", "PBS", "PBS", "PBS", 
"LPS", "LPS", "LPS", "LPS", "LPS", "LPS"), ddCt = c(0.712890726470947, 
-1.51053422470093, 0.62173572402954, 0.175907774200441, 1.1765065486145, 
1.42335632339478, 1.93200543045044, 0.908589529571534, 1.17023241165161, 
0.988701464385988, 0.132903547515872, 1.57435436294556, 1.75699954299927, 
-1.87696014511109, 2.88366565994263, 1.23123718154907, 0.116335588582356, 
0.352466984456381, -0.468802573038737, 1.3970194148763, 0.481066022542318, 
0.600303055623375, 1.34794571187337, 0.825963625081382, 0.694048035481771, 
0.975223453572593, 0.856327275594076, 2.11683821248372, 1.33858894551595, 
1.65258184041341, 1.56159302083334, 0.0904685324096688, 0.0660666299438466, 
-0.348436968688966, 0.191901806335451, 0.304727882385254, 0.323959453430176, 
1.9253715725708, 0.694553711242675, 0.520885572814939, 0.646400688171386, 
1.91832291473389, 0.609325671691894, 0.879443809509278, 0.803959282531738, 
1.90072258331299, 1.34354614898682, 2.00206379241943, 2.01044038360596, 
1.6196945993042, 1.65221146392822, 2.66974447418213, 2.63014309295654, 
2.52014644500732, 2.25022933990479, 0.721713081970215, -0.471333789367677, 
-0.113340635070799, -0.13703865753174, 1.93499449188232, 1.5370870980835, 
1.74555804168701, 2.10604276702881, 1.23211120697022, 1.5463953024292, 
1.22186406951904, 1.16494460662842, 1.14316965057373, 1.43851313568116, 
1.11419914031983, 1.51056688873291, 2.85070015167236, 2.58818091705323, 
2.8083994821167, 2.84965019561768, 2.57636979827881, 1.27906429168701, 
1.93625658050537, 2.66391016204834, 1.04198787261963, 0.575379369812012, 
1.47649689910889, -0.290016893005369, -0.476672705383301, 1.99234144744873, 
1.3199579953003, 1.86869477996826, 2.14944308013916, 1.63165351776123, 
1.68175697357178, 1.68018687408447, 3.58688294830322, 3.21138949554443, 
3.19321478973389, 3.72463947906494, -0.0350475311279297, 0.165266036987305, 
0.000474929809570312, -0.130693435668945, 1.18828201293945, 1.21581077575684, 
0.838380813598633, 1.08798027038574, 0.548923492431641, 0.587751388549805, 
1.72222900390625, 1.3463020324707, 1.67831993103027, 1.47892761230469, 
2.93190002441406, 2.90556716918945, 2.60245323181152, 1.76128578186035, 
3.37945556640625, 3.02278518676758)), row.names = c(NA, -115L
), class = "data.frame")

To create following split violin plot

p <- ggplot(data, aes(fct_relevel(Stimulation, 'PBS', 'LPS'), y=ddCt, fill=Conditioning))
p + 
  scale_fill_brewer(palette = "Dark2")+                                                                                                                                     
  geom_split_violin(draw_quantiles=c(0.25, 0.5, 0.75), linetype="solid", linewidth=1, colour="grey30", alpha=0.5) +
  geom_quasirandom(method="pseudorandom", dodge.width = 0.3, varwidth = TRUE, width=0.05, alpha=1/2, size =2) +
  labs(title="Gene", x = "Stimulation", y = "log2 fold change relative to PBS (ddCt)", tag="Fig", caption="") +             
  theme(        plot.title = element_text(color="black", size=24, face="bold.italic", hjust = 0.5),          
                axis.title.x = element_text(color="black", size=18, face="bold", hjust=0.5, vjust=-1),       
                axis.title.y = element_text(color ="black", size=18, face="bold", angle = 90, vjust=6),      
                axis.text.x = element_text(color = "black", size=16, angle=0, vjust=0.5, hjust=0.5),        
                axis.text.y = element_text(color = "black", size=16),                                       
                plot.tag = element_text(color="black", size=14, face="bold"),                                 
                panel.background = element_rect(fill="transparent", colour="grey50"),                         
                plot.background = element_rect(fill="transparent" ),                                      
                panel.grid.major.y=element_line(colour="grey90"),                                           
                legend.position = "right")

Split Violin plot

I also perform a 2-Way ANOVA with TukeyHSD multiple comparison posthoc

anova_result <- aov(ddCt ~ Conditioning*Stimulation, data = data)
Tukey <-TukeyHSD(anova_result)

And get following output:

print(Tukey)

 Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = ddCt ~ Conditioning * Stimulation, data = data)

$Conditioning
            diff       lwr      upr p adj
RSV-PBS 1.141487 0.8891432 1.393831     0

$Stimulation
                    diff        lwr         upr     p adj
PBS-LPS       -1.1843314 -1.5435941 -0.82506874 0.0000000
Poly(I:C)-LPS -0.3578207 -0.7519162  0.03627481 0.0832824
Poly(I:C)-PBS  0.8265108  0.4547506  1.19827094 0.0000020

$`Conditioning:Stimulation`
                                   diff         lwr        upr     p adj
RSV:LPS-PBS:LPS              1.65631875  0.99618366  2.3164538 0.0000000
PBS:PBS-PBS:LPS             -0.93541028 -1.55863667 -0.3121839 0.0004264
RSV:PBS-PBS:LPS              0.22271223 -0.39478761  0.8402121 0.9010389
PBS:Poly(I:C)-PBS:LPS        0.18262626 -0.49782542  0.8630779 0.9705935
RSV:Poly(I:C)-PBS:LPS        0.75805114  0.07759946  1.4385028 0.0197215
PBS:PBS-RSV:LPS             -2.59172903 -3.21495542 -1.9685026 0.0000000
RSV:PBS-RSV:LPS             -1.43360653 -2.05110636 -0.8161067 0.0000000
PBS:Poly(I:C)-RSV:LPS       -1.47369250 -2.15414418 -0.7932408 0.0000001
RSV:Poly(I:C)-RSV:LPS       -0.89826761 -1.57871929 -0.2178159 0.0028786
RSV:PBS-PBS:PBS              1.15812251  0.58024809  1.7359969 0.0000009
PBS:Poly(I:C)-PBS:PBS        1.11803654  0.47332941  1.7627437 0.0000282
RSV:Poly(I:C)-PBS:PBS        1.69346143  1.04875430  2.3381686 0.0000000
PBS:Poly(I:C)-RSV:PBS       -0.04008597 -0.67925903  0.5990871 0.9999717
RSV:Poly(I:C)-RSV:PBS        0.53533892 -0.10383414  1.1745120 0.1552462
RSV:Poly(I:C)-PBS:Poly(I:C)  0.57542489 -0.12475412  1.2756039 0.1709891

I am having trouble puting these multiple comparisons (as bars and p_adj values) onto a plot. Most of my attempts using e.g. stat_pvalue_manual results in creating comparisons between groups on X axis, but ignoring the 'fill' groups. So e.g. I'd like to create a comparison between RSV:PBS and RSV:LPS, but can't! Please help!

0

There are 0 answers