I am trying to estimate the covariance of a 3 patches Gaussian mixture model with PyMC3. The mean and covariance are totally unknown and the weights are [1,1,1]
. For the mean estimate, one can use tt.stack([vx,vy])
to build the appropriate quantity. But for the covariance, I want to use [sigma_x, sigma_y, rho]
as the random variable. I try to use tt.stack
to build the corresponding covariance tensor with the code
import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
import pymc3 as pm, theano.tensor as tt
k = 3 # number of patch
ndata = 500 # number of data
centers = np.array([[-10,-10], [0,0], [10,10]])
sigmax = [1,1,1]
sigmay = [1,1,1]
ro = [0,0,0]
cov = lambda sigmax, sigmay, ro: [[sigmax**2, ro*sigmax*sigmay],[ro*sigmax*sigmay, sigmay**2]]
v = np.random.choice([0,1,2], size=ndata, replace=True)
data = []
for i in v:
data.append(np.random.multivariate_normal(centers[i], cov(sigmax[i], sigmay[i], ro[i])))
# plt.hist(data)
data = np.array(data)
# sns.distplot(data[:,1])
ax = plt.gca()
ax.scatter(data[:,0],data[:,1])
# setup model
vx_min, vx_max = -20, 20
vy_min, vy_max = -20, 20
Dx_min, Dx_max = 0.2, 3.0
Dy_min, Dy_max = 0.2, 3.0
ro_min, ro_max = 0.0, 0.9
with pm.Model() as model:
# cluster sizes
p = tt.constant(np.array([1.,1.,1.]))
# cluster centers
vx = pm.Uniform('vx', lower=vx_min, upper=vx_max, shape=3)
vy = pm.Uniform('vy', lower=vx_min, upper=vx_max, shape=3)
Dx1 = pm.Uniform('Dx', lower=Dx_min, upper=Dx_max, shape=3)
Dy1 = pm.Uniform('Dy', lower=Dy_min, upper=Dy_max, shape=3)
ro1 = pm.Uniform('ro', lower=ro_min, upper=ro_max, shape=3)
category = pm.Categorical('category',
p=p,
shape=ndata)
means = tt.stack([vx, vy], axis=-1)
m1 = [Dx1[category]*Dx1[category],ro1[category]*Dx1[category]*Dy1[category]]
m2 = [ro1[category]*Dx1[category]*Dy1[category],Dy1[category]*Dy1[category]]
cov = tt.stack([m1, m2], axis=-1)
points = pm.MvNormal('obs',
mu=means[category],
cov=cov[category],
observed=data)
# sampling
with model:
step1 = pm.Metropolis()
step2 = pm.ElemwiseCategorical(vars=[category], values=[0, 1, 2])
tr = pm.sample(100)
# plot results
pm.plots.traceplot(tr, ['vx']);
plt.show()
but it raises the error:
Traceback (most recent call last):
File "/Users/huox/Downloads/t1108.py", line 51, in <module>
observed=data)
File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/distribution.py", line 36, in __new__
dist = cls.dist(*args, **kwargs)
File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/distribution.py", line 47, in dist
dist.__init__(*args, **kwargs)
File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/multivariate.py", line 222, in __init__
lower=lower, *args, **kwargs)
File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/multivariate.py", line 57, in __init__
raise ValueError('cov must be two dimensional.')
ValueError: cov must be two dimensional.
How should I do to fix this? Are there other efficient ways to estimate the covariance matrix?