Is there an elegant way to create a violin plot in R using highcharts

46 views Asked by At

I am working on a document with several plots using highcharts for all visualisations. However, I want to create a violin plot and am struggling to make one elegantly - namely because the solution I am currently using creates boxes to place over the violin to convey the distributions. At face value, this is great except that the boxes end up being different series and therefore the hover tooltip is essentially redundant which is one of the reasons I am using highcharts in the first place.

You can find a reprex of the method I am currently deploying below, could you please suggest how I could improve the plots to make them publication quality i.e. having the series display properly in the tooltip and perhaps rounded to two decimals.

I am keen to hear how others create violin plots, or alternatives in highcarts!

#load libraries
library(tidyverse)
library(highcharter)

#create data
input <- tibble(type = rep(c(1, 2, 3), 5),
       value = rnorm(15, 0.1))

#function to prepare data for violin plot
prepare_violin <- function(data) {
  l <- lapply(names(data), function(name) {
    series <- data[[name]]
    index <- match(name, names(data)) - 1
    density <- series %>% density()
    multiplier <- 1 / (2.5 * max(density$y))
    index
    cbind(density$x, -multiplier * density$y + index, multiplier * density$y + index)
  })
  names(l) <- names(data)
  l
}

#function to create violin plot
plot_violin <- function(data) {
  hc <- highchart() %>%
    hc_chart(inverted=T) %>% 
    hc_plotOptions(line = list(linecap = 'square')) %>%
    hc_yAxis(type = 'category',
             min = 0,
             max = length(data) - 1,
             tickLength = 0,
             categories = names(data),
             labels = list(useHTML = TRUE,
                           align = 'left',
                           reserveSpace = TRUE),
             lineWidth = 0,
             lineColor = 'transparent') %>%
    hc_legend(enabled = FALSE) 
  
  densities <- prepare_violin(data)
  i <- 0
  
  delta <- 0.1
  for (name in names(data)) {
    i <- i + 1
    
    series_data <- data[[name]]
    if (!is.null(data)) {
      # compute violin plot
      values <- data
      max_density <- max(densities[[name]])
      
      # add violin plot
      hc <- hc %>% hc_add_series(data = densities[[name]],
                                 type = 'areasplinerange',
                                 enableMouseTracking = FALSE,
                                 color = primary_colours[[1]],
                                 lineColor = 'black',
                                 lineWidth = 1)
      
      # # compute percentiles 5 and 95
      small <- quantile(data[[name]], c(0.05, 0.95)) %>% as.numeric()
      small <- cbind(small, c(i - 1, i - 1))
      # add line spanning these percentiles
      hc <- hc %>% hc_add_series(data = small,
                                 type = 'line',
                                 marker = list(symbol = "circle",
                                               enabled = FALSE),
                                 enableMouseTracking = FALSE,
                                 color = primary_colours[[1]],
                                 name = "whiskers",
                                 linkedTo = "0",
                                 lineWidth = 2)
      
      # # compute percentiles 25, 50, 75
      big_left <- quantile(data[[name]], c(0.25, 0.50)) %>% as.numeric()
      big_right <- quantile(data[[name]], c(0.50, 0.75)) %>% as.numeric()
      
      # # prepare rectangles to show
      big_left_rect <- cbind(big_left,
                             c(i - 1 - delta,  i - 1 - delta),
                             c(i - 1 + delta, i - 1 + delta))
      big_left_rect <- rbind(c(big_left[1], i - 1, i - 1),
                             big_left_rect)
      big_left_rect <- rbind(big_left_rect,
                             c(big_left[2], i - 1, i - 1))
      
      big_right_rect <- cbind(big_right,
                              c(i - 1 - delta,  i - 1 - delta),
                              c(i - 1 + delta, i - 1 + delta))
      big_right_rect <- rbind(c(big_right[1], i - 1, i - 1),
                              big_right_rect)
      big_right_rect <- rbind(big_right_rect,
                              c(big_right[2], i - 1, i - 1))
      big_center <- cbind(c(big_right[1], big_right[1]),
                          c(i - 1,  i - 1))
      
      # # add rectangles
      hc <- hc %>% hc_add_series(data = big_right_rect,
                                 type = 'areasplinerange',
                                 marker = list(symbol = 'circle',
                                               enabled = FALSE),
                                 color = "white",
                                 fillOpacity = '100%',
                                 lineColor = "black",
                                 zIndex = 5,
                                 enableMouseTracking = FALSE,
                                 lineWidth = 1) %>%
        hc_add_series(data = big_center,
                      type = 'line',
                      marker = list(symbol = "circle",
                                    enabled = FALSE),
                      color = "black",
                      zIndex = 10,
                      linkedTo = "0",
                      lineWidth = 0) %>%
        hc_add_series(data = big_left_rect,
                      type = 'areasplinerange',
                      marker = list(symbol = 'circle',
                                    enabled = FALSE),
                      color = "white",
                      fillOpacity = '100%',
                      lineColor = "black",
                      zIndex = 5,
                      enableMouseTracking = FALSE,
                      lineWidth = 1) %>%
        hc_add_series(data = cbind(quantile(data[[name]], c(0.25, 0.50, 0.75)) %>% as.numeric(),
                                   rep(i - 1, length(data))))
    }
  }
  hc
}

#create plot
input %>% 
  group_by(type) %>%
  group_map(~ setNames(list(.x$value), .y$type)) %>%
  unlist(recursive = FALSE) %>%
  prepare_violin() %>% 
  plot_violin()

The solution I am using in my example is based on this blog:
https://medium.com/analytics-vidhya/violin-plots-in-r-with-highcharter-1e434b99e8c6

It seems to be the best solution that I have found thus far, but is far from ideal in terms of flexibility, customisation or publication quality.

0

There are 0 answers