How to change y axis data in np.histogram?

21 views Asked by At

I have a code that creates histogram from data. Data contains areas of "dots". Now the histogram plots number of dots in each category, but I want calculate dots per square meter (done with calculate_dots_per_m2 function) and then plot histogram where y data is dots/m^2 instead of the original dots/mm^2. Here is my code:

import numpy as np
from matplotlib import pyplot as plt

color_dict = {
    "RED": (0, 0, 255),
    "GREEN": (0, 225, 0),
    "BLUE": (255, 0, 0),
    "ORANGE": (0, 165, 255),
    "MAGENTA": (255, 0, 255)
    }

color_texts = list(color_dict.keys())
colors = list(color_dict.values())
all_areas = [0.01, 0.1, 1, 10, 100]

def calculate_dots_per_m2(num_sizes):
    scale_factor = 25
    num_of_dots_per_m2 = []
    for i, num_of_defects in enumerate(num_sizes):
        num_of_dots_per_m2.append(num_of_defects*scale_factor)
    total_dots_per_m2 = sum(num_of_dots_per_m2)
    return num_of_dots_per_m2, total_dots_per_m2

def make_histogram():
    dots = [[] for i in range(len(all_areas))]
    all_dot_areas = [0.05, 1.4, 1.5, 1.9, 3.7, 11.5, 29.1, 99.0, 111.1, 4123.1]

    for dot_area in all_dot_areas:
        for i, a in enumerate(all_areas[:-1]):
            if all_areas[i] < dot_area < all_areas[i + 1]:
                dots[i].append(dot_area)
        if dot_area > all_areas[-1]:
            dots[-1].append(dot_area)

    num_sizes = [len(subls) for subls in dots]
    num_of_dots_per_m2, total_dots_per_m2 = calculate_dots_per_m2(num_sizes)

    bins = all_areas + [1e9]
    hist, bin_edges = np.histogram(all_dot_areas, bins)
    fig, ax = plt.subplots()
    fig.set_size_inches(16, 9)
    ylabel = "Number of dots per mm^2"
    plt.ylabel(ylabel)

    ticklabels = []
    for i, a in enumerate(all_areas[:-1]):
        size_x_label = "Dots sized {}-{} mm^2: {}".format(all_areas[i], all_areas[i + 1], num_sizes[i])
        ticklabels.append(size_x_label)
    lastlabel = "Dots with size over {} mm^2: {}".format(all_areas[-1], num_sizes[-1])
    ticklabels.append(lastlabel)

    colours = [c.lower() for c in color_texts]
    ax.bar(range(len(hist)), hist, width=0.75, align="center", tick_label=ticklabels, color=colours, ec="black")
    plt.show()

make_histogram()

I want to change the code so that the x data is the same (x is the number of dots in each category), but y data is scaled to m^2 (Number of dots per m^2). So in this simple example I would expect all bars to be 25 times higher than they are now. I tried to change this hist, bin_edges = np.histogram(all_dot_areas, bins) to this hist, bin_edges = np.histogram(num_of_dots_per_m2, bins) but it did not help. I don't understand what I should do here.

1

There are 1 answers

0
Matt Haberland On

You can use the weights parameter of histogram to scale the counts by 25:

hist, bin_edges = np.histogram(all_dot_areas, bins, 
                               weights=np.full_like(all_dot_areas, 25))

enter image description here