I have a normally distributed variable x (like product demand), an index id_1 (like product number) and a second index id_2 (like product group). My goal is to estimate the mean and the standard deviations for x hierarchically (all > product group > product). That's my data:
import numpy as np
import pymc3 as pm
import arviz as az
# data
my_step_1 = 0.4
my_step_2 = 4.1
sd_step_1 = 0.1
sd_step_2 = 0.2
my = 10
sd = .1
grp_n = 8
grps_1 = 5
grps_2 = 4
x = np.round(np.concatenate([np.random.normal(my + i * my_step_1 + j * my_step_2, \
sd + i * sd_step_1 + j * sd_step_2, grp_n) \
for i in range(grps_1) for j in range(grps_2)]), 1) # demand
id_1 = np.repeat(np.arange(grps_1 * grps_2), grp_n) # group, product number
id_2 = np.tile(np.repeat(np.arange(grps_2), grp_n), grps_1) # super-group, product group
shape_1 = len(np.unique(id_1))
shape_2 = len(np.unique(id_2))
I've managed a single hierarchy:
with pm.Model() as model_h1:
#
mu_mu_hyper = pm.Normal('mu_mu_hyper', mu = 0, sd = 10)
mu_sd_hyper = pm.HalfNormal('mu_sd_hyper', 10)
sd_hyper = pm.HalfNormal('sd_hyper', 10)
#
mu = pm.Normal('mu', mu = mu_mu_hyper, sd = mu_sd_hyper, shape = shape_1)
sd = pm.HalfNormal('sd', sd = sd_hyper, shape = shape_1)
y = pm.Normal('y', mu = mu[id_1], sd = sd[id_1], observed = x)
trace_h1 = pm.sample(1000)
#az.plot_forest(trace_h1, var_names=['mu', 'sd'], combined = True)
But how can I code 2 hierarchies?
# 2 hierarchies .. doesn't work
with pm.Model() as model_h2:
#
mu_mu_hyper2 = pm.Normal('mu_mu_hyper2', mu = 0, sd = 10)
mu_sd_hyper2 = pm.HalfNormal('mu_sd_hyper2', sd = 10)
sd_mu_sd_hyper2 = pm.HalfNormal('sd_mu_sd_hyper2', sd = 10)
sd_hyper2 = pm.HalfNormal('sd_hyper2', sd = 10)
#
mu_mu_hyper1 = pm.Normal('mu_hyper1', mu = mu_mu_hyper2, sd = mu_sd_hyper2, shape = shape_2)
mu_sd_hyper1 = pm.HalfNormal('mu_sd_hyper1', sd = sd_mu_sd_hyper2, shape = shape_2)
sd_hyper1 = pm.HalfNormal('sd_hyper1', sd = sd_hyper2, shape = shape_2)
#sd_hyper1 = pm.HalfNormal('sd_hyper1', sd = sd_hyper2[id_2], shape = shape_2)??
#
mu = pm.Normal('mu', mu = mu_mu_hyper1, sd = mu_sd_hyper1, shape = shape_1)
sd = pm.HalfNormal('sd', sd = sd_hyper1, shape = shape_1)
y = pm.Normal('y', mu = mu[id_1], sd = sd[id_1], observed = x)
trace_h2 = pm.sample(1000)
You could try looping through product groups and use the mean, std for a group as constraints for the products belonging to this particular group.