Adding totals above a proportions stacked bar chart with multiple response variables

43 views Asked by At

I am trying to add totals to a stacked bar chart where I depict proportions and where both the x and y variables are multiple response ones. I managed to do it with total counts despite getting a warning message, but it totally gets messed up when I try to add totals to the proportions bar chart.

This is an excerpt of my df hnvf3:

structure(list(HNV_grasslands = c(1L, 0L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L), HNV_arable = c(1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 
1L, 1L, 1L), HNV_permanent_crop = c(1L, 1L, 1L, 0L, 0L, 1L, 1L, 
1L, 1L, 1L, 1L), HNV_mosaic = c(1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 
1L, 0L, 1L), Animal_vegetal = c("", "Both", "Both", "Both", "", 
"Vegetal species", "Vegetal species", "", "Both", "Animal species", 
"Both")), row.names = 10:20, class = "data.frame")

This is the code I used for adding the total in the counts version and despite getting a warning:

The following aesthetics were dropped during statistical transformation: fill This can happen when ggplot fails to infer the correct grouping structure in the data. Did you forget to specify a group aesthetic or to convert a numerical variable into a factor?

I get the total counts on top.

hnvf4 <- hnvf3 %>%
  pivot_longer(
    cols = starts_with("HNV"),
    names_to = c("HNV"),
    names_prefix = "HNV_",
    values_to = "value1") |>
    filter(value1 == 1 & Animal_vegetal !="") |>
  select(-value1) # pone esto para que hnvf2 no sea todo "1" en todas las filas, sino el valor de provisioning, etc
glimpse(hnvf4)

hnvf4 |>
  ggplot(aes(x = HNV,fill = factor(Animal_vegetal, levels=c("Animal species", "Vegetal species", "Both")))) +
  geom_bar()+
  geom_text(stat ="count",aes(label=after_stat(count), group= HNV, vjust= -1))+
    scale_fill_manual(values = c("#FDCDAC", "#B3E2CD","#CBD5E8"))+
  theme(axis.text.x = element_text(angle=90, vjust = 0.5, hjust=1), 
        axis.ticks.x = element_blank())+
  theme(axis.line.y = element_line(linewidth = 0.5, colour = "black"),
        axis.ticks.length.y.left = unit(.25, "cm"))+
  theme(panel.background = element_rect(fill=NA))+
  scale_x_discrete(labels=c('arable land', 'grasslands', 'mosaic landscape', 
                            'permanent croplands')) +
  labs(x= "HNV farmland type", y="Number of studies", fill= "")+
  #geom_text(data=stacked, aes(label=nsum, variable),size=2, vjust= 1)+
  theme(axis.title.y = element_text(margin=margin(r= 15)))+
  scale_y_continuous(limits= c(0,100), breaks=c(0,25,50,75, 100),expand = c(0,0)) +
  theme(legend.position = c(.99, .95),
        legend.justification = c("right", "top"))

And this is the coding that is not working for the proportions:

hnvf4%>%
  ggplot(aes(x = HNV,fill = factor(Animal_vegetal,levels=c("Animal species", "Vegetal species", "Both")))) +
  geom_bar(position="fill")+
  #geom_text(stat ="count",aes(label=after_stat(count), group= HNV, vjust= -1))+
  scale_fill_manual(values = c("#FDCDAC", "#B3E2CD","#CBD5E8"))+
  theme(axis.text.x = element_text(angle=90, vjust = 0.5, hjust=1), 
        axis.ticks.x = element_blank())+
  theme(axis.line.y = element_line(linewidth = 0.5, colour = "black"),
        axis.ticks.length.y.left = unit(.25, "cm"))+
  theme(panel.background = element_rect(fill=NA))+
  scale_x_discrete(labels=c('arable land', 'grasslands', 'mosaic landscape', 
                            'permanent croplands')) +
  labs(x= "HNV farmland type", y="Proportion of studies", fill= "")+
  #geom_text(data=stacked, aes(label=nsum, variable),size=2, vjust= 1)+
  theme(axis.title.y = element_text(margin=margin(r= 15)))+
  # para que no haya espacio extra y que el eje y salga de 0
  scale_y_continuous(expand = c(0,0))

I have also checked the following entries in stackflow: question1, question2, question3, text, but I find it difficult to create a new data frame for the geom_text() since I do not have a variable that displays counts of my variables. I have tried to modify my data and create a new df with counts for each variable, but still it does not work. Any help would be much appreciated.

1

There are 1 answers

0
L Tyrone On BEST ANSWER

The approach you tried regarding creating a numeric version of your data is the easiest when comes to plotting. I've provided a tidyverse method to sum your groups.

The thing you needed to do for the text labels on your proportions plot was to set position = "fill". I have swapped geom_text() for stat_summary() and added position = "fill" to the proportions plot so the labels and bars display correctly.

Note that I could not find a way to plot text outside the y-axis using geom_text() or stat_summary() if scale_y_continuous(expand = c(0,0)). So I resorted to coord_cartesian(clip = 'off') to place your labels above the bar. If you want to use scale_y_continuous(expand = c(0,0)), place the labels inside the plot by changing vjust. There may be a way to plot above the bar using annotate() or similar, comment below if someone knows how.

Data and Packages

library(dplyr)
library(tidyr)
library(ggplot2)

# Sum species groups by HNV values and add factor levels for Animal_vegetal
hnvf4 <- hnvf3 %>%
  pivot_longer(
    cols = starts_with("HNV"),
    names_to = c("HNV"),
    names_prefix = "HNV_",
    values_to = "value1") |>
  filter(value1 == 1 & Animal_vegetal != "") |>
  select(-value1) |> 
  group_by(HNV, Animal_vegetal) |>
  summarise(count = n(), .groups = "drop") |>
  ungroup() |>
  mutate(Animal_vegetal = factor(Animal_vegetal, 
                                  levels = c("Animal species", 
                                             "Vegetal species", 
                                             "Both")))

Counts Plot

ggplot(data = hnvf4) +
  geom_bar(aes(x = HNV, y = count, fill = Animal_vegetal),
           stat = "identity") +
  stat_summary(aes(x = HNV, y = count, label = after_stat(y)),
               fun.y = "sum",
               geom = "text", 
               colour = "black",
               vjust = -1) +
  scale_x_discrete(labels=c("arable land", "grasslands", "mosaic landscape", 
                            "permanent croplands")) +
  scale_y_continuous(limits = c(0,100), 
                     breaks = c(0,25,50,75, 100),
                     expand = c(0,0)) +
  scale_fill_manual(values = c("#FDCDAC", "#B3E2CD","#CBD5E8")) +
  labs(x = "HNV farmland type", y="Number of studies", fill = "") +
  theme(axis.text.x = element_text(angle=90, vjust = 0.5, hjust=1), 
        axis.ticks.x = element_blank(),
        axis.line.y = element_line(linewidth = 0.5, colour = "black"),
        axis.ticks.length.y.left = unit(.25, "cm"),
        axis.title.y = element_text(margin = margin(r= 15)),
        panel.background = element_rect(fill = NA),
        legend.position = c(.99, .95),
        legend.justification = c("right", "top"))

result1

Proportions Plot

ggplot(data = hnvf4) +
  geom_bar(aes(x = HNV, y = count, fill = Animal_vegetal),
           stat = "identity",
           position = "fill") +
  stat_summary(aes(x = HNV, y = count, label = after_stat(y)),
               fun.y = "sum",
               geom = "text",
               position = "fill",
               colour = "black",
               vjust = -1) +
  scale_x_discrete(labels =c("arable land", "grasslands", "mosaic landscape", 
                             "permanent croplands")) +
  # scale_y_continuous(expand = c(0,0)) +
  coord_cartesian(clip = "off") +
  scale_fill_manual(values = c("#FDCDAC", "#B3E2CD","#CBD5E8") )+
  labs(x = "HNV farmland type", y="Proportion of studies", fill = "") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1), 
        axis.ticks.x = element_blank(),
        axis.line.y = element_line(linewidth = 0.5, colour = "black"),
        axis.ticks.length.y.left = unit(.25, "cm"),
        panel.background = element_rect(fill = NA),
        axis.title.y = element_text(margin = margin(r = 15)))

result2