I am trying to calibrate a model using pykalman and the scipy optimiser. For some reasons scipy seem to think that my input is a masked array, but it is not. I have added the code below:
k = 0.00000000000001 #small sarting value
T = np.array([1, 2, 3, 4, 5] , dtype=int) #maturities
delta = 1e-9
x = k
transition_covariance=delta / (1 - delta) * np.eye(1)
def obj(x):
k = x[0]
Q = (1 - k) *np.eye(1)
Z = np.exp(-k*T)
Z =Z[:, None] #reshape to 2 dim
transition_covariance=delta / (1 - delta) * np.eye(1)
observation_covariance=0.001*np.eye(len(T))
kf = KalmanFilter(transition_matrices=np.eye(1)*(1-k),
observation_matrices= Z,
transition_covariance=transition_covariance,
observation_covariance=observation_covariance)
lk = kf.loglikelihood(X) # X is my observed data, 1265 rows × 5 columns
return -lk
x = np.array([k])
k =scipy.optimize.minimize(obj, x, method= 'TNC')
Running the scipy optimiser I get the error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-16-8d87c9f2ced0> in <module>
1 # method that works: TNC, Nelder-Mead
----> 2 k =scipy.optimize.minimize(obj, x, method= 'TNC')
~\Anaconda3\lib\site-packages\scipy\optimize\_minimize.py in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
618 callback=callback, **options)
619 elif meth == 'tnc':
--> 620 return _minimize_tnc(fun, x0, args, jac, bounds, callback=callback,
621 **options)
622 elif meth == 'cobyla':
~\Anaconda3\lib\site-packages\scipy\optimize\tnc.py in _minimize_tnc(fun, x0, args, jac, bounds, eps, scale, offset, mesg_num, maxCGit, maxiter, eta, stepmx, accuracy, minfev, ftol, xtol, gtol, rescale, disp, callback, finite_diff_rel_step, maxfun, **unknown_options)
373 messages = MSG_NONE
374
--> 375 sf = _prepare_scalar_function(fun, x0, jac=jac, args=args, epsilon=eps,
376 finite_diff_rel_step=finite_diff_rel_step,
377 bounds=new_bounds)
~\Anaconda3\lib\site-packages\scipy\optimize\optimize.py in _prepare_scalar_function(fun, x0, jac, args, bounds, epsilon, finite_diff_rel_step, hess)
259 # ScalarFunction caches. Reuse of fun(x) during grad
260 # calculation reduces overall function evaluations.
--> 261 sf = ScalarFunction(fun, x0, args, grad, hess,
262 finite_diff_rel_step, bounds, epsilon=epsilon)
263
~\Anaconda3\lib\site-packages\scipy\optimize\_differentiable_functions.py in __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step, finite_diff_bounds, epsilon)
74
75 self._update_fun_impl = update_fun
---> 76 self._update_fun()
77
78 # Gradient evaluation
~\Anaconda3\lib\site-packages\scipy\optimize\_differentiable_functions.py in _update_fun(self)
164 def _update_fun(self):
165 if not self.f_updated:
--> 166 self._update_fun_impl()
167 self.f_updated = True
168
~\Anaconda3\lib\site-packages\scipy\optimize\_differentiable_functions.py in update_fun()
71
72 def update_fun():
---> 73 self.f = fun_wrapped(self.x)
74
75 self._update_fun_impl = update_fun
~\Anaconda3\lib\site-packages\scipy\optimize\_differentiable_functions.py in fun_wrapped(x)
68 def fun_wrapped(x):
69 self.nfev += 1
---> 70 return fun(x, *args)
71
72 def update_fun():
<ipython-input-13-48731c614f15> in obj(x)
19 observation_covariance=observation_covariance)
20
---> 21 lk = kf.loglikelihood(X) # X is my observed data, 1016 rows × 17 columns
22 return -lk
~\Anaconda3\lib\site-packages\pykalman\standard.py in loglikelihood(self, X)
1470
1471 # get likelihoods for each time step
-> 1472 loglikelihoods = _loglikelihoods(
1473 observation_matrices, observation_offsets, observation_covariance,
1474 predicted_state_means, predicted_state_covariances, Z
~\Anaconda3\lib\site-packages\pykalman\standard.py in _loglikelihoods(observation_matrices, observation_offsets, observation_covariance, predicted_state_means, predicted_state_covariances, observations)
165 + observation_covariance
166 )
--> 167 loglikelihoods[t] = log_multivariate_normal_density(
168 observation[np.newaxis, :],
169 predicted_observation_mean[np.newaxis, :],
~\Anaconda3\lib\site-packages\pykalman\utils.py in log_multivariate_normal_density(X, means, covars, min_covar)
71 lower=True)
72 cv_log_det = 2 * np.sum(np.log(np.diagonal(cv_chol)))
---> 73 cv_sol = solve_triangular(cv_chol, (X - mu).T, lower=True).T
74 log_prob[:, c] = - .5 * (np.sum(cv_sol ** 2, axis=1) + \
75 n_dim * np.log(2 * np.pi) + cv_log_det)
~\Anaconda3\lib\site-packages\scipy\linalg\basic.py in solve_triangular(a, b, trans, lower, unit_diagonal, overwrite_b, debug, check_finite)
332
333 a1 = _asarray_validated(a, check_finite=check_finite)
--> 334 b1 = _asarray_validated(b, check_finite=check_finite)
335 if len(a1.shape) != 2 or a1.shape[0] != a1.shape[1]:
336 raise ValueError('expected square matrix')
~\Anaconda3\lib\site-packages\scipy\_lib\_util.py in _asarray_validated(a, check_finite, sparse_ok, objects_ok, mask_ok, as_inexact)
259 if not mask_ok:
260 if np.ma.isMaskedArray(a):
--> 261 raise ValueError('masked arrays are not supported')
262 toarray = np.asarray_chkfinite if check_finite else np.asarray
263 a = toarray(a)
ValueError: masked arrays are not supported
Maybe something to do with an update in the scipy library? It worked with a different setup, so that's all I can think. My versions are: Python 3.8.5 Scipy 1.5.2 Pykalman 0.9.5
Thanks!
I found the solution, which involves a small change in the utils.py file in the pykalman library (line 73):
Source: https://github.com/pykalman/pykalman/issues/83