How can I color the area below several normal distributions?

Question

How can I color the area below several normal distributions?

488 views Asked by antecessor At 21 June 2015 at 21:19

After several tries, I finally could obtained a unique figure with several normal distributions. In those distributions, the 1sd was also drawn as vertical rectangles. The code I used is this one:

x1<-50:200
a1<-dnorm(x1,134,20)
b1<-dnorm(x1,130,14)
c1<-dnorm(x1,132,12)
d1<-dnorm(x1,105,10)

scale<-range(pretty(range(a1,b1,c1,d1)))

remap<-function(x, to, from=range(x)) {
    (x-from[1]) / (from[2]-from[1]) * (to[2]-to[1]) + to[1] 
}

plot(NA, NA, xaxt="n", yaxt="n", type="n", xlim=scale, ylim=scale, xlab="Variable X", ylab="")
rect(remap(134-20, scale, range(x1)), scale[1],
     remap(134+20, scale, range(x1)), scale[2], col="#ff606025")
rect(remap(130-14, scale, range(x1)), scale[1],
     remap(130+14, scale, range(x1)), scale[2], col="#005ccd40")
rect(remap(132-12, scale, range(x1)), scale[1],
     remap(132+12, scale, range(x1)), scale[2], col="#005ccd40")
rect(remap(105-10, scale, range(x1)), scale[1],
     remap(105+10, scale, range(x1)), scale[2], col="#005ccd40")
#R1429
rect(remap(183, scale, range(x1)), scale[1],
     remap(183, scale, range(x1)), scale[2], col="darkblue", lwd=3,lty=3)

lines(remap(x1,scale), a1, col="#ff6060", lwd=3)
lines(remap(x1,scale), b1, col="#005ccd", lwd=3, lty=3)
lines(remap(x1,scale), c1, col="#005ccd", lwd=3)
lines(remap(x1,scale), d1, col="#005ccd", lwd=3,lty=3)

axis(2);
axis(1, at=remap(pretty(x1), scale), pretty(x1))

I got the next figure after running the code: enter image description here

But my question is: how can I color only the area below each normal distribution, instead of doing vertical rectangles?

It would be much easier to interpret.

Thanks in advance!

Original Q&A

There are 4 answers

Rorschach On 21 June 2015 at 22:37

You can fill in under curves by using polygon.

## Some distributions
x1 <- 50:200
means <- c(134, 130, 132, 105)
sds <- c(20, 14, 12, 10)
dists <- lapply(seq_along(means), function(i) dnorm(x1, means[i], sds[i]))

## Some colors
cols <- colorRampPalette(c("red", "blue"))(length(dists))

## Blank plot
plot(c(x1[1], x1[length(x1)]), c(min(unlist(dists)), max(unlist(dists))), 
     type="n", xlab="X", ylab="Density")

## Add polygons
for (i in seq_along(dists))
    polygon(c(x1, rev(x1)), 
            c(numeric(length(x1)), rev(dists[[i]])), 
            col=cols[i],
            density=40)

enter image description here

Edit: for polygons within 1sd

xs <- sapply(seq_along(dists), function(i)  # get supports on x1
    do.call(`:`, as.list(which(x1 %in% (means[i] + c(-1,1)*sds[i])))))

plot(range(x1), range(unlist(dists)), type="n", xlab="X", ylab="Density")
for (i in seq_along(dists)) {
    x <- x1[xs[[i]]]
    polygon(c(x, rev(x)), 
            c(numeric(length(x)), rev(dists[[i]][xs[[i]]])), 
            col=cols[i],
            density=40)
    points(x1, dists[[i]], type="l", lty=2, col=cols[i])
}

enter image description here

Nick Kennedy On 21 June 2015 at 22:45

Here's a way of doing it using some of Hadley Wickham's packages:

library("dplyr")
library("ggplot2")
library("tidyr")
data.frame(x = 50:200) %>%
  mutate(a = dnorm(x,134,20),
         b = dnorm(x,130,14),
         c = dnorm(x,132,12),
         d = dnorm(x,105,10)) %>%
  gather(group, y, -x) %>%
  ggplot(aes(x, y, fill = group)) %>%
  + geom_area(alpha = 0.3, position = "identity") %>%
  + geom_line() %>%
  print

output with 4 normal distributions

Here's a version filling only within 1 SD:

data.frame(group = letters[1:4],
  m = c(130, 134, 132, 105),
  s = c(20, 14, 12, 10)
) %>%
  group_by(group) %>%
  do(data_frame(group = .$group,
    x = 50:200,
    y = dnorm(x, .$m, .$s),
    withinSd = abs(x - .$m) <= .$s)
  ) %>% {
    ggplot(., aes(x = x, y = y, colour = group)) +
      geom_line() +
      geom_area(aes(fill = group), filter(., withinSd),
        position = "identity", alpha = 0.3) +
      guides(colour = "none")
    }

Graph with 1 SD

If you wanted the graphs to all be the same height, you could just add a bit of extra dplyr magic:

data.frame(group = letters[1:4],
           m = c(130, 134, 132, 105),
           s = c(20, 14, 12, 10)
) %>%
  group_by(group) %>%
  do(data_frame(group = .$group,
                x = 50:200,
                y = dnorm(x, .$m, .$s),
                withinSd = abs(x - .$m) <= .$s)
  ) %>% 
  group_by(group) %>%
  mutate(y = y / max(y)) %>%
  {
    ggplot(., aes(x = x, y = y, colour = group)) +
      geom_line() +
      geom_area(aes(fill = group), filter(., withinSd),
                position = "identity", alpha = 0.3) +
      guides(colour = "none")
  }

enter image description here

Dagremu On 22 June 2015 at 00:20

Here's another version using base R. This one uses the type='h' option in lines() to draw a lot of vertical lines, so many that it ends up shading the region. Note that this required increasing the number of sample points in x1 (try changing x1 back to 50:200 to see what happens).

x1 <- seq(50,200,length=1000)
a1 <- dnorm(x1,134,20)
b1 <- dnorm(x1,130,14)
c1 <- dnorm(x1,132,12)
d1 <- dnorm(x1,105,10)

dists <- list(a1,b1,c1,d1)

# specify color names then convert them to RGB+alpha values
col <- c("red","green","blue","yellow")
col.rgba <- rgb(t(col2rgb(col))/255, alpha=0.2)

plot(NA, NA, xlim=range(x1), ylim=range(unlist(dists)), xlab="Variable X", ylab="")

# loop through each distribution
for (i in 1:length(dists)) {
  lines(x1, dists[[i]], type='h', lwd=2, col=col.rgba[i]) # add shaded region
  lines(x1, dists[[i]], type='l') # add solid line at top
}

Here's the output:

a busy cat

**Steven Beaupré** · Accepted Answer · 2015-06-22T03:17:51+00:00

Here's another version using ggvis:

library(dplyr)
library(ggvis)

## -- data generation copied from @NickK -- ##
data.frame(group = letters[1:4],
           m = c(130, 134, 132, 105),
           s = c(20, 14, 12, 10)) %>%
  group_by(group) %>%
  do(data_frame(group = .$group,
                x = 50:200,
                y = dnorm(x, .$m, .$s),
                withinSd = abs(x - .$m) <= .$s)) %>%
## ---------------------------------------- ##
  mutate(dash = ifelse(grepl("a|d", group), 5, 0),
         color = ifelse(grepl("a|c|d", group), "blue", "red"))  %>%
  ggvis() %>%
  layer_paths(~x, ~y, stroke := ~color, strokeDash := ~dash) %>%
  filter(withinSd) %>%
  layer_ribbons(~x, ~y, y2 = ~y-y, fill := ~color, fillOpacity := 0.2) %>%
  hide_legend("fill") %>%
  add_axis("y", title_offset = 50)

enter image description here

TechQA.

How can I color the area below several normal distributions?

There are 4 answers

Edit: for polygons within 1sd

Related Questions in R

Related Questions in NORMAL-DISTRIBUTION

Related Questions in STANDARD-DEVIATION

Popular Questions

Trending Questions