Any thoughts on why the distribution created by np.random.pareto based on the alpha manually calculated in this code is slightly different from original distribution?
Could this mean a different distribution is more effective?
plt.hist(data['sdev'], bins=50, density=True, alpha=0.6, color='b')
plt.xlabel("retn_abs(stdevs - mean)")
plt.ylabel("density")
plt.show()
# calc alpha
data_len = len(data['sdev'])
x_min = np.min(data['sdev'])
x_max = np.max(data['sdev'])
data['sdev'] = np.sort(data['sdev'])
divide = data['sdev'] / x_min
data['alpha_calc'] = np.log(divide)
alpha = (np.sum(data['alpha_calc']) / data_len) ** -1
alpha = alpha
error = (alpha - 1) / (data_len) ** (1/2)
eighty = .2 ** ((alpha - 2) / (alpha - 1))
print("alpha", alpha)
print("error", error)
print(eighty)
#alpha = 2.75
# create test dist
out = np.random.pareto(alpha, data_len)
out = out + x_min
cust_min = np.min(out)
cust_max = np.max(out)
cust_max = 13
print("cust_max", cust_max)
plt.hist(out, align = 'right', bins = 50, range = (cust_min, cust_max), density=True, alpha=0.6, color='b')
#plt.hist(out, bins=50, density=True, alpha=0.6, color='b')
plt.show()