Proper specification of a nested logit model in pymc3

436 views Asked by At

I'm working on a nested logistic regression model with 3 outcomes representing choices A, B, or C. The first level represents a choice between A and B or C, and the second level represents the choice between B and C. The code for some made up data is below but I'm not sure I specified the model correctly. The probability of B or C is by definition more than the probability of B, but when sampling from the posterior for very small sample sizes, "BorC" can be less than B. Such small sample sizes probably won't occur in the actual data I'm interested in, but the fact that this occurs makes me think I did something wrong. Thanks!

import numpy as np
import pandas as pd
import pymc3 as pm
from scipy.special import logit
import matplotlib.pyplot as plt
from theano import shared

import cPickle as pickle # python 2

def hierarchical_normal(name, shape, mu=0.,cs=5.):
    delta = pm.Normal('delta_{}'.format(name), 0., 1., shape=shape)
    sigma = pm.HalfCauchy('sigma_{}'.format(name), cs)

    return pm.Deterministic(name, mu + delta * sigma)

NUTS_KWARGS = {'target_accept': 0.99}
SEED = 4260026 # from random.org, for reproducibility

np.random.seed(SEED)

ndraws = 1000

counts =[[19, 50, 37],
         [21, 67, 55],
         [11, 53, 38],
         [17, 54, 45],
         [24, 93, 66],
         [27, 53, 70]]

counts = pd.DataFrame(counts,columns=["A","B","C"])

counts["BorC"] = counts[["B","C"]].sum(axis=1)
counts["n"]    = counts[["A","B","C"]].sum(axis=1)
print counts

group   = counts.index.values
n_group = np.unique(group).size

obs_B    = counts.B.values
obs_BorC = counts.BorC.values
obs_n    = counts.n.values

obs_n_ = shared(obs_n)

with pm.Model() as model:
    beta0      = pm.Normal('beta0', 0.,sd=5.)
    beta_group = hierarchical_normal('beta_group', n_group)

    eta_BorC   = beta0 + beta_group[group]
    p_BorC     = pm.math.sigmoid(eta_BorC)
    like_BorC  = pm.Binomial('obs_BorC', obs_n_, p_BorC, observed=obs_BorC)

    alpha0      = pm.Normal('alpha0', 0.,sd=5.)
    alpha_group = hierarchical_normal('alpha_group', n_group)

    eta_BgivenBorC  = alpha0 + alpha_group[group]
    p_BgivenBorC    = pm.math.sigmoid(eta_BgivenBorC)
    like_BgivenBorC = pm.Binomial('obs_BgivenBorC', obs_BorC, p_BgivenBorC, observed=obs_B)

    p_B    = p_BgivenBorC*p_BorC
    like_B = pm.Binomial('obs_B', obs_n_, p_B, observed=obs_B)

    trace  = pm.sample(draws=ndraws, random_seed=SEED,nuts_kwargs=NUTS_KWARGS)

obs_n_.set_value(np.array([10]*6))

pp_trace = pm.sample_ppc(trace, model=model)
C = pp_trace['obs_BorC']-pp_trace['obs_B']
print np.min(np.min(C))
B = pp_trace['obs_B']
C = np.sum(C,axis=1)
B = np.sum(B,axis=1)
diff = C-B
plt.figure()
plt.hist(diff,50)
plt.show()

Edit: I see from browsing the pymc3 code that the log likelihoods are automatically summed over all variables which means my specification of like_B is redundant. In that case, I think I just need to figure out how to set obs_BorC properly for posterior sampling.

1

There are 1 answers

0
user3843408 On

Here is an attempted solution, which I think is ok as a workaround if a nicer solution does not exist. I just made a custom posterior sampler where obs_BorC gets reset each iteration.

import numpy as np
import pandas as pd
import pymc3 as pm
from scipy.special import logit
import matplotlib.pyplot as plt
from theano import shared
from collections import defaultdict

from scipy.stats import norm

def hierarchical_normal(name, shape, mu=0.,cs=5.):
    delta = pm.Normal('delta_{}'.format(name), 0., 1., shape=shape)
    sigma = pm.HalfCauchy('sigma_{}'.format(name), cs)

    return pm.Deterministic(name, mu + delta * sigma)

NUTS_KWARGS = {'target_accept': 0.99}
SEED = 4260026 # from random.org, for reproducibility

np.random.seed(SEED)

ndraws = 1000

counts =[[19, 50, 37],
         [21, 67, 55],
         [11, 53, 38],
         [17, 54, 45],
         [24, 93, 66],
         [27, 53, 70]]

counts = pd.DataFrame(counts,columns=["A","B","C"])

counts["BorC"] = counts[["B","C"]].sum(axis=1)
counts["n"]    = counts[["A","B","C"]].sum(axis=1)
print counts

group   = counts.index.values
n_group = np.unique(group).size

obs_B    = counts.B.values
obs_BorC = counts.BorC.values
obs_n    = counts.n.values

obs_n_ = shared(obs_n)
obs_BorC_ = shared(obs_BorC)

with pm.Model() as model:
    beta0      = pm.Normal('beta0', 0.,sd=5.)
    beta_group = hierarchical_normal('beta_group', n_group)

    eta_BorC   = beta0 + beta_group[group]
    p_BorC     = pm.math.sigmoid(eta_BorC)
    like_BorC  = pm.Binomial('obs_BorC', obs_n_, p_BorC, observed=obs_BorC)

    alpha0      = pm.Normal('alpha0', 0.,sd=5.)
    alpha_group = hierarchical_normal('alpha_group', n_group)

    eta_BgivenBorC  = alpha0 + alpha_group[group]
    p_BgivenBorC    = pm.math.sigmoid(eta_BgivenBorC)
    like_BgivenBorC = pm.Binomial('obs_BgivenBorC', obs_BorC_, p_BgivenBorC, observed=obs_B)

    trace  = pm.sample(draws=ndraws, random_seed=SEED,nuts_kwargs=NUTS_KWARGS)

#plt.figure()
#axs = pm.forestplot(trace,varnames=['beta0','beta_group','alpha0','alpha_group'])
#plt.savefig("Forest.png")
#plt.close()

obs_n_.set_value(np.array([10000]*6))
indices = np.random.randint(0, len(trace), 1000)
ppc = defaultdict(list)
for idx in indices:
    param  = trace[idx]
    n_BorC = model["obs_BorC"].distribution.random(point=param,size=None)
    obs_BorC_.set_value(np.array(n_BorC))

    n_B = model["obs_BgivenBorC"].distribution.random(point=param,size=None)
    ppc["obs_BorC"].append(n_BorC)
    ppc["obs_B"].append(n_B)
pp_trace = {k: np.asarray(v) for k, v in ppc.items()}
#pp_trace = pm.sample_ppc(trace, model=model,samples=20000)
C = pp_trace['obs_BorC']-pp_trace['obs_B']
print np.min(np.min(C))
B = pp_trace['obs_B']
C = np.sum(C,axis=1)
B = np.sum(B,axis=1)
diff = (C-B)/60000.
plt.figure()
x    = np.linspace(np.min(diff),np.max(diff),200)
mean = np.mean(diff)
std  = np.std(diff)
plt.hist(diff,50,normed=True)
plt.plot(x,norm.pdf(x,mean,std))
plt.plot()
plt.show()