I didn´t found a sufficient answer in this forum yet, so I decided to raise my own question.
I want to get the linear regression equation of a linear fit from a boxplot. I have this data:
library(ggplot2)
data <- structure(list(x = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L
), .Label = c("1", "2", "3", "4", "5", "6"), class = "factor"),
y = c(169, 79.5, 78.5, 75, 99.5, 68, 14, 30.5, 107.5, 51,
43, 33, 21.5, 35, 11, 1, 38, 54.5, 26.5, 143, 158, 171, 31.5,
67.5, 1, 57.5, 12, 36.5, 1, 23.5, 22.5, 71, 141, 218, 7.5,
1, 129, 144.5, 76, 46.5, 75.5, 45, 12, 24, 67, 65.5, 44.5,
37.5, 25.5, 19, 15, 1, 17.5, 50, 22.5, 90, 226, 220, 32,
69.5, 1, 79.5, 7, 44, 1, 15.5, 22, 75.5, 178, 153, 4.5, 1,
159, 89, 57, 71, 98.5, 47.5, 18.5, 30, 119, 57.5, 41, 33.5,
30, 31, 10, 1, 12, 43.5, 20.5, 98, 146.5, 145, 34, 64.5,
1, 40.5, 17, 41, 1, 14.5, 16.5, 71, 181, 168, 2, 1, 159,
103, 69, 65.5, 97.5, 37.5, 21, 15.5, 120.5, 46, 27, 29.5,
16.5, 20, 7.5, 1, 15.5, 42.5, 21.5, 111, 102.5, 124, 20.5,
51.5, 1, 22.5, 15, 42, 1, 13, 13.5, 64.5, 138, 155, 4.9,
1, 190, 89.5, 74.5, 79, 78, 59.5, 19.5, 21, 88.5, 44, 18,
19, 10, 13, 4, 1, 9.5, 44, 17, 140.5, 98, 112.5, 29.5, 62.56,
1, 31, 11.5, 49.5, 1, 10, 8.5, 40.5, 121, 141, 2.5, 1, 170,
87.5, 92, 77, 65, 34, 8, 26, 98, 51.5, 26, 19, 9, 8.5, 7.5,
1, 4.5, 0, 15.5, 80, 69, 59, 28, 44.5, 1, 38.5, 10, 51.5,
1, 3, 5, 65, 107, 152, 5, 1)), row.names = c(NA, -216L), class = "data.frame")
p <- ggplot(data = data) +
aes(x = x,
y = y) +
geom_boxplot(outlier.shape = NA) + geom_jitter(shape = 1, position = position_jitter(0.1)) +
ylim(0, NA) +
theme_light() +
geom_smooth(method = "lm",se = TRUE, formula = y ~ x, aes(group = 1))
print(p)
fit <- lm(y ~ x, data = data)
fit
How can I extract the regression equation for this dataset? The function
fit <- lm(y ~ x, data = data)
just gives me one intercept and 5 coefficients, which is not my desired output. I want a simple regression equation in the form ofy = a + bx
.How can I put this equation into the diagramm? I´ve already looked into
ggpmisc::stat_poly_eq()
, but this doesn´t seem to work with boxplot linear regression.
Can you guys help me out?