Get median values from sorted data

1.2k views Asked by At

I have a set of sorted (ascending) data in the format below:

| Category | Value | S.D. |
|     A    |  0.1  | 0.1  |
|     A    |  0.2  | 0.05 |
|     A    |  1.3  | 0.08 |
|     B    |  0.1  | 0.01 |
|     B    |  0.2  | 0.08 |
|     B    |  0.6  | 0.9  |
|     B    |  0.7  | 0.01 |
|     B    |  0.9  | 0.05 |
|     B    |  1.1  | 0.6  |
|     C    |  0.5  | 0.3  |
|     C    |  0.9  | 0.04 |
|     C    |  1.0  | 0.14 |
|     C    |  2.1  | 0.1  | etc...

There are about 300 rows of this. I have imported this from csv and have sorted as a List. For example data.get(1).getCategory() results in "A", and data.get(2).getValue() results in "0.2" (It is a String as I am using a library.)

The data is subject to change. I need to calculate a median value for each category, and print each median value with it's category name. Where there are an even number of entries, the middle value with the smallest S.D. should be used. For example, using the above data:

"A: 0.2"
"B: 0.7"
"C: 0.9"
1

There are 1 answers

0
LeffeBrune On BEST ANSWER

Here is a single pass over a sorted list solution:

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

public class Medians {
  public static void printMedians(List<Row> rows) {
    if (rows.size() == 0) return;
    Collections.sort(rows);
    int currentCategoryIndex = 0;
    String currentCategory = rows.get(0).category;
    for (int i = 0; i < rows.size(); i++) {
      if (i == rows.size() - 1
          || !currentCategory.equals(rows.get(i + 1).category)) {
        int categorySize = i + 1 - currentCategoryIndex;
        int medianIndex = currentCategoryIndex + categorySize / 2;
        double median;

        if (categorySize % 2 == 0) {
          median = rows.get(medianIndex - 1).stdDev < rows.get(medianIndex).stdDev
              ? rows.get(medianIndex - 1).value
              : rows.get(medianIndex).value;
        } else {
          median = rows.get(medianIndex).value;
        }

        System.out.printf("%s: %.1f%n", currentCategory, median);

        if (i < rows.size() - 1) {
          currentCategory = rows.get(i + 1).category;
          currentCategoryIndex = i + 1;
        }
      }
    }
  }

  private static class Row implements Comparable<Row> {
    private final String category;
    private final double value;
    private final double stdDev;

    public Row(String category, double value, double standardDeviation) {
      this.category = category;
      this.value = value;
      this.stdDev = standardDeviation;
    }

    @Override
    public int compareTo(Row o) {
      if (category.equals(o.category)) {
        return value == o.value ? 0 : value > o.value ? 1 : - 1;
      }
      return category.compareTo(o.category);
    }
  }

  public static void main(String[] args) {
    List<Row> rows = new ArrayList<>();
    rows.add(new Row("A", 0.2, 0.05));
    rows.add(new Row("A", 1.3, 0.08));
    rows.add(new Row("A", 0.1, 0.1));

    rows.add(new Row("B", 0.6, 0.9));
    rows.add(new Row("B", 1.1, 0.6));
    rows.add(new Row("B", 0.7, 0.01));
    rows.add(new Row("B", 0.9, 0.05));
    rows.add(new Row("B", 0.1, 0.01));
    rows.add(new Row("B", 0.2, 0.08));

    rows.add(new Row("C", 0.5, 0.3));
    rows.add(new Row("C", 1.0, 0.14));
    rows.add(new Row("C", 2.1, 0.1));
    rows.add(new Row("C", 0.9, 0.04));
    printMedians(rows);
  }
}

But I like this one more:

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

public class CategoryMedianCalculator {
  private final Map<String, List<Row>> categories = new HashMap<>();

  public void addRow(String category, double value, double stdDev) {
    List<Row> rows = categories.get(category);
    if (rows == null) {
      rows = new ArrayList<>();
      categories.put(category, rows);
    }
    rows.add(new Row(category, value, stdDev));
  }

  public Map<String, Double> getMedians() {
    Map<String, Double> result = new TreeMap<>();
    for (Map.Entry<String, List<Row>> entry: categories.entrySet()) {
      result.put(entry.getKey(), getMedian(entry.getValue()));
    }
    return result;
  }

  private static double getMedian(List<Row> rows) {
    Collections.sort(rows);
    int index = rows.size() / 2;
    if (rows.size() % 2 == 0) {
      return rows.get(index - 1).stdDev < rows.get(index).stdDev
          ? rows.get(index - 1).value
          : rows.get(index).value;
    } else {
      return rows.get(index).value;
    }
  }

  private static class Row implements Comparable<Row> {
    private final String category;
    private final double value;
    private final double stdDev;

    public Row(String category, double value, double stdDev) {
      this.category = category;
      this.value = value;
      this.stdDev = stdDev;
    }

    @Override
    public int compareTo(Row o) {
      return value == o.value ? 0 : value > o.value ? 1 : - 1;
    }
  }

  public static void main(String[] args) {
    CategoryMedianCalculator calc = new CategoryMedianCalculator();
    calc.addRow("A", 0.2, 0.05);
    calc.addRow("A", 1.3, 0.08);
    calc.addRow("A", 0.1, 0.1);

    calc.addRow("B", 0.6, 0.9);
    calc.addRow("B", 1.1, 0.6);
    calc.addRow("B", 0.7, 0.01);
    calc.addRow("B", 0.9, 0.05);
    calc.addRow("B", 0.1, 0.01);
    calc.addRow("B", 0.2, 0.08);

    calc.addRow("C", 0.5, 0.3);
    calc.addRow("C", 1.0, 0.14);
    calc.addRow("C", 2.1, 0.1);
    calc.addRow("C", 0.9, 0.04);

    for (Map.Entry<String, Double> median : calc.getMedians().entrySet()) {
      System.out.printf("%s: %.1f%n", median.getKey(), median.getValue());
    }
  }
}