geom text for facets is not positioned at visually pleasing location

348 views Asked by At

Data

> dput(my.precious)
structure(list(Vehicle.ID2 = c("2351.2360", "503.496", "2508.2498", 
"2256.2243", "952.946", "2327.2315", "683.682", "880.866", "347.342", 
"115.116", "2239.2229", "1680.1675", "1044.1029", "323.321", 
"2354.2337", "1628.1621", "1603.1598", "417.404", "1291.1285", 
"84.78", "2861.2855", "2804.2802", "1084.1080", "1885.1876", 
"1778.1775", "1509.1505", "379.372", "2620.2616", "1146.1133", 
"2476.2472", "750.737", "2119.2112", "411.397", "1515.1512", 
"2204.2194", "879.872", "986.981", "1129.1124", "2954.2948", 
"2928.2924", "462.438", "2629.2620", "2962.2950", "615.610", 
"1405.1400", "806.800", "1767.1765", "199.192", "1888.1878", 
"2525.2517", "142.141", "687.682", "1446.1445", "39.27", "2556.2550", 
"292.281", "2034.2017", "2464.2447", "2046.2037", "2567.2552", 
"705.697", "180.175", "1701.1699", "2086.2071", "2427.2402", 
"965.961", "1561.1558", "2185.2180", "2148.2138", "2589.2582", 
"1770.1761", "1027.1032", "2995.2982", "973.967", "405.399", 
"2115.2106", "2754.2742", "2586.2576", "1733.1729", "943.928", 
"1245.1239", "31.18", "146.141", "1865.1861", "588.579", "2216.2212", 
"513.501", "1470.1467", "518.515", "2348.2339", "2212.2208", 
"1504.1489", "2814.2812", "2618.2615", "2597.2593", "3018.3009", 
"1641.1638", "929.917", "2052.2045", "1702.1694"), Vehicle.class = structure(c(1L, 
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("Car following", "Heavy-vehicle following"
), class = "factor"), PrecVehClass = structure(c(2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L), .Label = c("Motorcycle", "Car", "Heavy-vehicle"), class = "factor"), 
    CC2 = c(32.5766501673563, 33.1462524122711, 114.985655309494, 
    0, 19.6198370044607, 6.33947396494466, 4.41629586850399, 
    45.7201738350116, 77.2852308366414, 23.4653247796564, 113.858471174095, 
    18.2949618097755, 15.1430447619764, 18.7949281381009, 56.150849563362, 
    0.871136231063019, 10.1789190682619, 21.8538402563161, 24.4424229038064, 
    21.8644774356173, 78.8898916107299, 59.0436899337149, 34.952193382661, 
    30.0676154315454, 12.1631954913147, 22.0999532188296, 34.4320551117948, 
    51.6072494224724, 49.8285734316947, 83.7391153614881, 68.7393621760813, 
    23.3109392847383, 0, 63.8918058981795, 0.117898698373665, 
    35.9301550863017, 41.408066837246, 67.9609018034737, 77.6228604725088, 
    50.3819848446467, 158.427611013205, 61.7191536455709, 63.4184192224484, 
    52.3067956266756, 56.239305476488, 23.4972280626377, 0, 5.44649970936757, 
    45.325372359443, 44.140432941474, 26.4621220704583, 21.9722600148252, 
    0, 47.5859211404629, 65.4619356384739, 50.3173084316458, 
    7.14323295461026, 49.9184456786638, 57.632603327405, 70.4138804098259, 
    27.3086664432516, 39.2627818278854, 13.8954239118315, 16.5224386897373, 
    0.336396348580877, 34.6684621497679, 0.80866365546683, 63.8680515267192, 
    14.7996906960015, 61.5616857306764, 65.3043233970858, 21.5517378489972, 
    26.6451085013455, 16.4717475328769, 34.5554653009784, 36.647363180998, 
    86.7844694571702, 157.154018248369, 47.5411300112071, 2.64972923204488, 
    15.45052725276, 10.0503437206614, 0, 7.95701592069599, 65.2275028899913, 
    16.6622992517697, 0.084677923994235, 23.5450734083073, 20.7709172539573, 
    29.1191855784058, 82.1117069705742, 53.0859602212412, 37.6419285717603, 
    82.0220785025156, 42.6655290135778, 68.302184817338, 62.2055693283554, 
    22.0752327366978, 16.2898985629383, 48.0306011348524)), .Names = c("Vehicle.ID2", 
"Vehicle.class", "PrecVehClass", "CC2"), class = c("tbl_df", 
"data.frame"), row.names = c(NA, -100L))

What I want to do and the relevant code

I want to plot the distribution of the variable 'CC2' in facet_wraps of 'Vehicle.class' and 'PrecVehClass'. Also, I want to display the mean value, standard deviation and number of pairs on the plots. I used following code:

my.theme<-function(base_size = 12, base_family = "Trebuchet MS")
{theme(plot.title = element_text(size = rel(1)), panel.grid.major=element_line(color='grey'), panel.grid.minor=element_line(color='grey', linetype='dashed'), legend.position='right', legend.title=element_blank(),legend.background = element_blank(), strip.text = element_text(size=13, face="bold",lineheight=4),          strip.background = element_rect(colour="black", fill="white"),legend.title = element_text(colour="black", size=16, face="bold"), legend.text = element_text(colour="black", size = 16), axis.title.x = element_text(face="bold", size=14), axis.title.y = element_text(face="bold", size=14))
} 


pairs.CC2 <- ddply(my.precious, .(Vehicle.class, PrecVehClass), function(x) length(unique(x$Vehicle.ID2)))

means.CC2 <- ddply(my.precious, .(Vehicle.class, PrecVehClass), function(x) mean(x$CC2, na.rm=T))
sd.CC2 <- ddply(my.precious, .(Vehicle.class, PrecVehClass), function(x) sd(x$CC2, na.rm=T))



ggplot() + 
  geom_histogram(data=my.precious, aes(x=CC2, y=..count../sum(..count..)*100),color="black", fill="grey", alpha=0.5) + 
  facet_wrap(Vehicle.class~PrecVehClass, scale="free_y") + 
  labs(x = "Distance in addition to safety distance (ft)", y="percentage") + 
  theme_bw() + my.theme() +
  geom_text(data=pairs.CC2, aes(x=200, y=0.4, label=paste(V1, "pairs", sep=" ")), size=5, face="italic") + 
  geom_vline(data=means.CC2, aes(xintercept=V1), color="blue", linetype = "longdash", size=1) + geom_text(data=means.CC2, aes(x=mean(V1, na.rm=T),y=0.4, label=paste("Mean=", round(V1,1), "ft",sep=" ")), size=5) + geom_text(data=sd.CC2, aes(x=mean(V1, na.rm=T),y=0.35, label=paste("SD=", round(V1,1), sep=" ")), size=5)  

This plots following:

enter image description here

Problem and question

You can see the 'mean', 'SD' and 'pairs' texts are not at visually pleasing locations. For this sample data I can relatively easily adjust the positions by controlling x and y arguments in geom_text but in the original data there are atleast 2 more facets for this data frame. And there are lots of other data frames having same kind of distributions which I want to plot. How can I ensure that these text annotations are placed on same locations e.g. top right or top left in every facet so that there is uniformity and plots look publication quality?

2

There are 2 answers

0
eipi10 On

You can gain more control over label placement by creating a data frame with the summary information that includes y-position values. The summary data frame just has to include the facetting variables so that geom_text can automatically place labels at different y-positions for different facets. For example:

library(ggplot2)
library(dplyr)

# Pre-summarize the data into histogram bins. We need this to calculate appropriate
# values for the y-position of the labels
hist.bins = my.precious %>% 
  group_by(Vehicle.class, PrecVehClass,
           breaks=cut(CC2, seq(0,max(CC2)+5,5), 
                      seq(5,max(CC2)+5,5), include.lowest=TRUE)) %>%
  summarise(count=n()) %>%
  ungroup() %>%
  mutate(percent=count/sum(count)*100) 

# Data frame with y-position of labels. I've set the value to 90% of the maximum 
# value of percent, but you can set it to whatever you like, or vary it by group.
pos = hist.bins %>% group_by(Vehicle.class, PrecVehClass) %>%
  summarise(y.pos = 0.9 * max(percent))

# Data frame with summary stats
CC2stats = my.precious %>% group_by(Vehicle.class, PrecVehClass) %>%
    summarise(mean=mean(CC2, na.rm=T),
              sd = sd(CC2, na.rm=T),
              pairs=length(unique(Vehicle.ID2)))

# Merge y-positions into CC2stats
CC2stats = merge(CC2stats, pos, by=c("Vehicle.class", "PrecVehClass"))

# Plot histogram
ggplot() + 
  geom_histogram(data=my.precious, aes(x=CC2, y=..count../sum(..count..)*100),
                 color="black", fill="grey", alpha=0.5, 
                 breaks=seq(0,max(my.precious$CC2)+5,5)) + 
  facet_wrap(Vehicle.class~PrecVehClass, scale="free_y") + 
  labs(x = "Distance in addition to safety distance (ft)", y="percentage") + 
  theme_bw() + my.theme() +
  # Add text labels using CC2stats data frame
  geom_text(data=CC2stats, aes(x=140, y=y.pos, 
                               label=paste(pairs, " pairs", sep=" ")), 
            size=5, face="italic") + 
  geom_vline(data=CC2stats, aes(xintercept=mean), 
             color="blue", linetype = "longdash", size=1) + 
  geom_text(data=CC2stats, 
            aes(x=140,y=0.95*y.pos, label=paste0("Mean = ", round(mean,1), 
                " ft",sep=" ")), size=5) + 
  geom_text(data=CC2stats, 
            aes(x=140,y=0.90*y.pos, label=paste0("SD = ", round(sd,1), sep=" ")), 
            size=5) 

Note that I've included a breaks argument in geom_histogram. This is so that the breaks in the graph will correspond to the breaks in hist.bins, which ensures that the maximum value of hist.bins$percent will correspond to the y-range in the graph.

And here's the result:

enter image description here

0
jlhoward On

It turns out that ggplot stores the axis limits in a "ggplot object" produced when the plot is rendered. You can create but not render with ggplot_build(...) and then access these (albeit in a roundabout way). Calling you original data, df, and using your pairs.CC2, mean.CC2, and sd.CC2,

# build the plot absent the mean, sd, and pairs annotations
ggp <-ggplot() + 
  geom_histogram(data=df, aes(x=CC2, y=..count../sum(..count..)*100),color="black", fill="grey", alpha=0.5) + 
  facet_wrap(Vehicle.class~PrecVehClass, scale="free_y") + 
  labs(x = "Distance in addition to safety distance (ft)", y="percentage") + 
  theme_bw() + my.theme() +
  geom_vline(data=means.CC2, aes(xintercept=V1), color="blue", linetype = "longdash", size=1)

# extract x- and y-range information for each panel (facet)
panels  <- ggplot_build(ggp)[["panel"]]
limits  <- do.call(rbind,lapply(panels$ranges,  
                               function(range)c(range$x.range,range$y.range)))
colnames(limits) <- c("x.lo","x.hi","y.lo","y.hi")

# combine this with your mean, sd, and pairs data
labs <- cbind(means.CC2,sd=sd.CC2$V1,pairs=pairs.CC2$V1,limits)

# use labs to drive the placement of the annotations
ggp +
  geom_text(data=labs, aes(x=x.hi,y=y.hi-0.0*(y.hi-y.lo),label=paste(pairs,"pairs",sep=" ")), size=5,hjust=1)+
  geom_text(data=labs, aes(x=x.hi,y=y.hi-0.1*(y.hi-y.lo),label=paste("Mean=", round(V1,1), "ft",sep=" ")), size=5,hjust=1) + 
  geom_text(data=labs, aes(x=x.hi,y=y.hi-0.2*(y.hi-y.lo),label=paste("SD=", round(sd,1),sep=" ")), size=5,hjust=1) 

Produces this: