How to estimate general covariance of a gaussian mixture

109 views Asked by At

I am trying to estimate the covariance of a 3 patches Gaussian mixture model with PyMC3. The mean and covariance are totally unknown and the weights are [1,1,1]. For the mean estimate, one can use tt.stack([vx,vy]) to build the appropriate quantity. But for the covariance, I want to use [sigma_x, sigma_y, rho] as the random variable. I try to use tt.stack to build the corresponding covariance tensor with the code

import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
import pymc3 as pm, theano.tensor as tt

k = 3 # number of patch
ndata = 500 # number of data

centers = np.array([[-10,-10], [0,0], [10,10]])
sigmax = [1,1,1]
sigmay = [1,1,1]
ro = [0,0,0]
cov = lambda sigmax, sigmay, ro: [[sigmax**2, ro*sigmax*sigmay],[ro*sigmax*sigmay, sigmay**2]]
v = np.random.choice([0,1,2], size=ndata, replace=True)

data = []
for i in v:
    data.append(np.random.multivariate_normal(centers[i], cov(sigmax[i], sigmay[i], ro[i])))

# plt.hist(data)
data = np.array(data)
# sns.distplot(data[:,1])
ax = plt.gca()
ax.scatter(data[:,0],data[:,1])

# setup model
vx_min, vx_max = -20, 20
vy_min, vy_max = -20, 20
Dx_min, Dx_max = 0.2, 3.0
Dy_min, Dy_max = 0.2, 3.0
ro_min, ro_max = 0.0, 0.9

with pm.Model() as model:
    # cluster sizes
    p = tt.constant(np.array([1.,1.,1.]))

    # cluster centers
    vx = pm.Uniform('vx', lower=vx_min, upper=vx_max, shape=3)
    vy = pm.Uniform('vy', lower=vx_min, upper=vx_max, shape=3)
    Dx1 = pm.Uniform('Dx', lower=Dx_min, upper=Dx_max, shape=3)
    Dy1 = pm.Uniform('Dy', lower=Dy_min, upper=Dy_max, shape=3)
    ro1 = pm.Uniform('ro', lower=ro_min, upper=ro_max, shape=3)    
    category = pm.Categorical('category',
                              p=p,
                              shape=ndata)

    means = tt.stack([vx, vy], axis=-1)
    m1 = [Dx1[category]*Dx1[category],ro1[category]*Dx1[category]*Dy1[category]]
    m2 = [ro1[category]*Dx1[category]*Dy1[category],Dy1[category]*Dy1[category]]

    cov = tt.stack([m1, m2], axis=-1)

    points = pm.MvNormal('obs',
                         mu=means[category],
                         cov=cov[category],
                         observed=data)

# sampling
with model:
    step1 = pm.Metropolis()
    step2 = pm.ElemwiseCategorical(vars=[category], values=[0, 1, 2])
    tr = pm.sample(100)

# plot results
pm.plots.traceplot(tr, ['vx']);
plt.show()

but it raises the error:

Traceback (most recent call last):
      File "/Users/huox/Downloads/t1108.py", line 51, in <module>
        observed=data)
      File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/distribution.py", line 36, in __new__
        dist = cls.dist(*args, **kwargs)
      File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/distribution.py", line 47, in dist
        dist.__init__(*args, **kwargs)
      File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/multivariate.py", line 222, in __init__
        lower=lower, *args, **kwargs)
      File "/Users/huox/anaconda2/lib/python2.7/site-packages/pymc3/distributions/multivariate.py", line 57, in __init__
        raise ValueError('cov must be two dimensional.')
    ValueError: cov must be two dimensional.

How should I do to fix this? Are there other efficient ways to estimate the covariance matrix?

0

There are 0 answers