Cumulative Density plot in vega-lite

161 views Asked by At

I created cumulative density visualization using this example of vega-lite as shown below:

var data = [{"student_name": "student 0", "e": "100.15", "d": "127.81"}, {"student_name": "student 1", "e": "100.30", "d": "189.94"}, {"student_name": "student 2", "e": "100.15", "d": "105.33"}, {"student_name": "student 3", "e": "99.41", "d": "85.36"}, {"student_name": "student 4", "e": "100.00", "d": "203.70"}, {"student_name": "student 5", "e": "100.15", "d": "139.05"}, {"student_name": "student 19", "e": "100.15", "d": "102.66"}, {"student_name": "student 20", "e": "95.71", "d": "52.96"}, {"student_name": "student 21", "e": "99.85", "d": "99.41"}, {"student_name": "student 22", "e": "98.96", "d": "100.44"}, {"student_name": "student 23", "e": "100.15", "d": "131.07"}, {"student_name": "student 24", "e": "99.56", "d": "76.92"}, {"student_name": "student 25", "e": "100.15", "d": "213.46"}, {"student_name": "student 26", "e": "100.15", "d": "311.24"}, {"student_name": "student 27", "e": "100.15", "d": "21.89"}, {"student_name": "student 28", "e": "96.60", "d": "6.36"}, {"student_name": "student 29", "e": "53.70", "d": "3.70"}, {"student_name": "student 30", "e": "96.75", "d": "46.60"}, {"student_name": "student 31", "e": "100.15", "d": "100.15"}, {"student_name": "student 32", "e": "100.30", "d": "115.68"}, {"student_name": "student 33", "e": "87.13", "d": "103.85"}, {"student_name": "student 34", "e": "100.15", "d": "104.14"}, {"student_name": "student 35", "e": "99.26", "d": "59.17"}, {"student_name": "student 36", "e": "100.15", "d": "171.30"}, {"student_name": "student 37", "e": "99.11", "d": "94.08"}, {"student_name": "student 38", "e": "81.66", "d": "57.40"}, {"student_name": "student 39", "e": "96.01", "d": "154.59"}, {"student_name": "student 90", "e": "1.04", "d": "1.33"}, {"student_name": "student 91", "e": "99.70", "d": "26.18"}, {"student_name": "student 92", "e": "96.30", "d": "78.11"}, {"student_name": "student 93", "e": "99.85", "d": "11.83"}, {"student_name": "student 94", "e": "100.15", "d": "172.93"}, {"student_name": "student 95", "e": "100.00", "d": "198.82"}, {"student_name": "student 96", "e": "100.15", "d": "155.92"}, {"student_name": "student 97", "e": "92.01", "d": "97.19"}, {"student_name": "student 98", "e": "98.52", "d": "71.30"}, {"student_name": "student 99", "e": "100.15", "d": "111.69"}, {"student_name": "student 100", "e": "0.30", "d": "0.30"}, {"student_name": "student 175", "e": "98.96", "d": "91.12"}, {"student_name": "student 176", "e": "100.00", "d": "226.04"}, {"student_name": "student 177", "e": "98.67", "d": "150.89"}, {"student_name": "student 178", "e": "97.49", "d": "68.79"}, {"student_name": "student 179", "e": "100.15", "d": "133.58"}, {"student_name": "student 180", "e": 0, "d": 0}, {"student_name": "student 181", "e": 0, "d": 0}, {"student_name": "student 182", "e": 0, "d": 0}, {"student_name": "student 183", "e": 0, "d": 0}, {"student_name": "student 184", "e": 0, "d": 0}, {"student_name": "student 185", "e": 0, "d": 0}, {"student_name": "student 186", "e": 0, "d": 0}, {"student_name": "student 187", "e": 0, "d": 0}, {"student_name": "student 188", "e": 0, "d": 0}, {"student_name": "student 189", "e": 0, "d": 0}, {"student_name": "student 190", "e": 0, "d": 0}, {"student_name": "student 191", "e": 0, "d": 0}];

createChart = function (data) {
  let max_d = d3.max(data, record => parseFloat(record.d));
  let max_e = d3.max(data, record => parseFloat(record.e));
  let max_y_scale_value_for_d = d3.max([100, max_d]);
  let max_y_scale_value_for_e = d3.max([100, max_e]);

  
  const plot = vl.markArea()
    .data(data)
    .transform([{ "calculate": "toNumber(datum.d)", "as": "d2" }
                , { "calculate": "toNumber(datum.e)", "as": "e2" }
                , {
                    "sort": [{"field": "d2"}],
                    "window": [{"op": "count", "field": "student_name", "as": "Cumulative Count"}],
                    "frame": [null, 0]
                  }
               ])
    .encode(
      vl.y().fieldQ('Cumulative Count')
      .title('# students'),
      vl.x()
      .fieldQ('d2')//.bin(true)
      .scale({ "domain": [0, max_y_scale_value_for_d] })
      .title('D')
    ).width(500).height(250); 
    
    return plot.toObject();
}

const chart_spec_json = this.createChart(data)
const opt = {
               renderer: "canvas",
               actions: false
            };
vegaEmbed("#stats", chart_spec_json, opt);
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/4.13.0/d3.min.js"></script>
<script src="https://unpkg.com/[email protected]/build/vega.min.js"></script>
<script src="https://unpkg.com/[email protected]/build/vega-lite.min.js"></script>
<script src="https://www.unpkg.com/[email protected]/build/vega-embed.min.js"></script>
<script src="https://unpkg.com/[email protected]/build/vega-lite-api.min.js"></script>
<div id="stats" />

This is how it looks (You can also click "Run code snippet" button to see it in action):

enter image description here

Now, I wanted to convert it to bar chart. So I changed markArea() to markRect() and added bin(true):

var data = [{"student_name": "student 0", "e": "100.15", "d": "127.81"}, {"student_name": "student 1", "e": "100.30", "d": "189.94"}, {"student_name": "student 2", "e": "100.15", "d": "105.33"}, {"student_name": "student 3", "e": "99.41", "d": "85.36"}, {"student_name": "student 4", "e": "100.00", "d": "203.70"}, {"student_name": "student 5", "e": "100.15", "d": "139.05"}, {"student_name": "student 19", "e": "100.15", "d": "102.66"}, {"student_name": "student 20", "e": "95.71", "d": "52.96"}, {"student_name": "student 21", "e": "99.85", "d": "99.41"}, {"student_name": "student 22", "e": "98.96", "d": "100.44"}, {"student_name": "student 23", "e": "100.15", "d": "131.07"}, {"student_name": "student 24", "e": "99.56", "d": "76.92"}, {"student_name": "student 25", "e": "100.15", "d": "213.46"}, {"student_name": "student 26", "e": "100.15", "d": "311.24"}, {"student_name": "student 27", "e": "100.15", "d": "21.89"}, {"student_name": "student 28", "e": "96.60", "d": "6.36"}, {"student_name": "student 29", "e": "53.70", "d": "3.70"}, {"student_name": "student 30", "e": "96.75", "d": "46.60"}, {"student_name": "student 31", "e": "100.15", "d": "100.15"}, {"student_name": "student 32", "e": "100.30", "d": "115.68"}, {"student_name": "student 33", "e": "87.13", "d": "103.85"}, {"student_name": "student 34", "e": "100.15", "d": "104.14"}, {"student_name": "student 35", "e": "99.26", "d": "59.17"}, {"student_name": "student 36", "e": "100.15", "d": "171.30"}, {"student_name": "student 37", "e": "99.11", "d": "94.08"}, {"student_name": "student 38", "e": "81.66", "d": "57.40"}, {"student_name": "student 39", "e": "96.01", "d": "154.59"}, {"student_name": "student 90", "e": "1.04", "d": "1.33"}, {"student_name": "student 91", "e": "99.70", "d": "26.18"}, {"student_name": "student 92", "e": "96.30", "d": "78.11"}, {"student_name": "student 93", "e": "99.85", "d": "11.83"}, {"student_name": "student 94", "e": "100.15", "d": "172.93"}, {"student_name": "student 95", "e": "100.00", "d": "198.82"}, {"student_name": "student 96", "e": "100.15", "d": "155.92"}, {"student_name": "student 97", "e": "92.01", "d": "97.19"}, {"student_name": "student 98", "e": "98.52", "d": "71.30"}, {"student_name": "student 99", "e": "100.15", "d": "111.69"}, {"student_name": "student 100", "e": "0.30", "d": "0.30"}, {"student_name": "student 175", "e": "98.96", "d": "91.12"}, {"student_name": "student 176", "e": "100.00", "d": "226.04"}, {"student_name": "student 177", "e": "98.67", "d": "150.89"}, {"student_name": "student 178", "e": "97.49", "d": "68.79"}, {"student_name": "student 179", "e": "100.15", "d": "133.58"}, {"student_name": "student 180", "e": 0, "d": 0}, {"student_name": "student 181", "e": 0, "d": 0}, {"student_name": "student 182", "e": 0, "d": 0}, {"student_name": "student 183", "e": 0, "d": 0}, {"student_name": "student 184", "e": 0, "d": 0}, {"student_name": "student 185", "e": 0, "d": 0}, {"student_name": "student 186", "e": 0, "d": 0}, {"student_name": "student 187", "e": 0, "d": 0}, {"student_name": "student 188", "e": 0, "d": 0}, {"student_name": "student 189", "e": 0, "d": 0}, {"student_name": "student 190", "e": 0, "d": 0}, {"student_name": "student 191", "e": 0, "d": 0}];

createChart = function (data) {
  let max_d = d3.max(data, record => parseFloat(record.d));
    let max_e = d3.max(data, record => parseFloat(record.e));
    let max_y_scale_value_for_d = d3.max([100, max_d]);
    let max_y_scale_value_for_e = d3.max([100, max_e]);

  
  const plot = vl.markRect()
    .data(data)
    .transform([{ "calculate": "toNumber(datum.d)", "as": "d2" }
                , { "calculate": "toNumber(datum.e)", "as": "e2" }
                , {
                    "sort": [{"field": "d2"}],
                    "window": [{"op": "count", "field": "student_name", "as": "Cumulative Count"}],
                    "frame": [null, 0]
                  }
               ])
    .encode(
      vl.y().fieldQ('Cumulative Count')
      .title('# students'),
      vl.x()
      .fieldQ('d2').bin(true)
      .scale({ "domain": [0, max_y_scale_value_for_d] })
      .title('D')
      .bin({maxbins: 20})
    ).width(500).height(250); 
    
    return plot.toObject();
}

const chart_spec_json = this.createChart(data)
const opt = {
               renderer: "canvas",
               actions: false
            };
vegaEmbed("#stats", chart_spec_json, opt);
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/4.13.0/d3.min.js"></script>
<script src="https://unpkg.com/[email protected]/build/vega.min.js"></script>
<script src="https://unpkg.com/[email protected]/build/vega-lite.min.js"></script>
<script src="https://www.unpkg.com/[email protected]/build/vega-embed.min.js"></script>
<script src="https://unpkg.com/[email protected]/build/vega-lite-api.min.js"></script>
<div id="stats" />

This is how it looks (You can also click "Run code snippet" button to see it in action):

enter image description here

The second plot shows no bars for range 240-300, presumably because there are no students with D value between 240 and 300. But why is this so? My understanding is that it should show bar of exact same height as that for 220-240 something like first chart. At least first chart does not show such gap at 240-300.

PS: Here is the observable notebook link.

1

There are 1 answers

3
Marcelo On

The intended purpose of bin transform is to create a histogram which counts the number of elements on each bucket. As you said in the original post, there are buckets if no values.

Basically, you have the following data. I binned the d value first, then calculated the cumulative count.

Binned data points

If you use the line mark you almost have the desired chart but you want to use rect mark.

The rect mark requires the width and height of the rectangle, or the encoding channels x2 and y2. How can you provide the data to those channels? If you are using Vega-lite, you should use an ordinal scale in order to supply those values. Vega-lite is a simplification of the more complex grammar Vega and is not that flexible.

Fortunately, Vega-lite has the density transformation. From the title of your post, it seems to be what are you looking for. Here is the code using the density transformation.

var data = [{"student_name": "student 0", "e": "100.15", "d": "127.81"}, {"student_name": "student 1", "e": "100.30", "d": "189.94"}, {"student_name": "student 2", "e": "100.15", "d": "105.33"}, {"student_name": "student 3", "e": "99.41", "d": "85.36"}, {"student_name": "student 4", "e": "100.00", "d": "203.70"}, {"student_name": "student 5", "e": "100.15", "d": "139.05"}, {"student_name": "student 19", "e": "100.15", "d": "102.66"}, {"student_name": "student 20", "e": "95.71", "d": "52.96"}, {"student_name": "student 21", "e": "99.85", "d": "99.41"}, {"student_name": "student 22", "e": "98.96", "d": "100.44"}, {"student_name": "student 23", "e": "100.15", "d": "131.07"}, {"student_name": "student 24", "e": "99.56", "d": "76.92"}, {"student_name": "student 25", "e": "100.15", "d": "213.46"}, {"student_name": "student 26", "e": "100.15", "d": "311.24"}, {"student_name": "student 27", "e": "100.15", "d": "21.89"}, {"student_name": "student 28", "e": "96.60", "d": "6.36"}, {"student_name": "student 29", "e": "53.70", "d": "3.70"}, {"student_name": "student 30", "e": "96.75", "d": "46.60"}, {"student_name": "student 31", "e": "100.15", "d": "100.15"}, {"student_name": "student 32", "e": "100.30", "d": "115.68"}, {"student_name": "student 33", "e": "87.13", "d": "103.85"}, {"student_name": "student 34", "e": "100.15", "d": "104.14"}, {"student_name": "student 35", "e": "99.26", "d": "59.17"}, {"student_name": "student 36", "e": "100.15", "d": "171.30"}, {"student_name": "student 37", "e": "99.11", "d": "94.08"}, {"student_name": "student 38", "e": "81.66", "d": "57.40"}, {"student_name": "student 39", "e": "96.01", "d": "154.59"}, {"student_name": "student 90", "e": "1.04", "d": "1.33"}, {"student_name": "student 91", "e": "99.70", "d": "26.18"}, {"student_name": "student 92", "e": "96.30", "d": "78.11"}, {"student_name": "student 93", "e": "99.85", "d": "11.83"}, {"student_name": "student 94", "e": "100.15", "d": "172.93"}, {"student_name": "student 95", "e": "100.00", "d": "198.82"}, {"student_name": "student 96", "e": "100.15", "d": "155.92"}, {"student_name": "student 97", "e": "92.01", "d": "97.19"}, {"student_name": "student 98", "e": "98.52", "d": "71.30"}, {"student_name": "student 99", "e": "100.15", "d": "111.69"}, {"student_name": "student 100", "e": "0.30", "d": "0.30"}, {"student_name": "student 175", "e": "98.96", "d": "91.12"}, {"student_name": "student 176", "e": "100.00", "d": "226.04"}, {"student_name": "student 177", "e": "98.67", "d": "150.89"}, {"student_name": "student 178", "e": "97.49", "d": "68.79"}, {"student_name": "student 179", "e": "100.15", "d": "133.58"}, {"student_name": "student 180", "e": 0, "d": 0}, {"student_name": "student 181", "e": 0, "d": 0}, {"student_name": "student 182", "e": 0, "d": 0}, {"student_name": "student 183", "e": 0, "d": 0}, {"student_name": "student 184", "e": 0, "d": 0}, {"student_name": "student 185", "e": 0, "d": 0}, {"student_name": "student 186", "e": 0, "d": 0}, {"student_name": "student 187", "e": 0, "d": 0}, {"student_name": "student 188", "e": 0, "d": 0}, {"student_name": "student 189", "e": 0, "d": 0}, {"student_name": "student 190", "e": 0, "d": 0}, {"student_name": "student 191", "e": 0, "d": 0}];

createChart = function (data) {
  let max_d = d3.max(data, record => parseFloat(record.d));
  let max_e = d3.max(data, record => parseFloat(record.e));
  let max_y_scale_value_for_d = d3.max([100, max_d]);
  let max_y_scale_value_for_e = d3.max([100, max_e]);

  
  const plot = vl.markRect()
    .data(data)
    .transform([ { "calculate": "toNumber(datum.d)", "as": "d2" }
                ,{ "calculate": "toNumber(datum.e)", "as": "e2" }
                ,{
                    "density": "d2",
                    "cumulative": true,
                    "counts":true,
                    "steps": 20
                 }
               ])
    .encode(
      vl.y().fieldQ('density')
      .title('# students'),
      vl.x()
      .fieldO('value')//.bin(true)
      .axis({labelAngle:0,format:"d"})
      .title('D')
    ).width(500).height(250); 
    
    return plot.toObject();
}

const chart_spec_json = this.createChart(data)
const opt = {
               renderer: "canvas",
               actions: false
            };
vegaEmbed("#stats", chart_spec_json, opt);
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/4.13.0/d3.min.js"></script>
<script src="https://unpkg.com/[email protected]/build/vega.min.js"></script>
<script src="https://unpkg.com/[email protected]/build/vega-lite.min.js"></script>
<script src="https://www.unpkg.com/[email protected]/build/vega-embed.min.js"></script>
<script src="https://unpkg.com/[email protected]/build/vega-lite-api.min.js"></script>
<div id="stats" />