VARMAX Results extend causes ValueError: array must not contain infs or NaNs

479 views Asked by At

I'm trying to extend a VARMAX model with new observations in order to do forward-walking validation.

However, my VARMAXResults.extend() is throwing a ValueError.

Reproducible example:

#---- This returns: ValueError: array must not contain infs or NaNs ----------
from statsmodels.tsa.statespace.varmax import VARMAX
import numpy as np

np.random.seed(1)
y_hist = 100*np.random.rand(50,2)

model = VARMAX(endog= y_hist,order=(2,0)).fit()
print("VARMAX model summary")
print(model.summary())
next_y_hat = model.forecast()
print("\nPredicted next value")
print(next_y_hat)

# simulate next observed value
next_y = next_y_hat

# extend model
model = model.extend(endog = next_y) # ValueError

Please note that this approach works fine in the univariate case using SARIMAX:

#-------- This works fine: -------------
from statsmodels.tsa.statespace.sarimax import SARIMAX

uni_model = SARIMAX(endog=y_hist[:,1],order=(2,0,0)).fit()
print("SARIMAX model summary")
print(uni_model.summary())
next_y_hat_uni = uni_model.forecast()
print("\nPredicted next value")
print(next_y_hat_uni)

# simulate next observed value
next_y_uni = next_y_hat_uni

# extend model
uni_model = uni_model.extend(endog = next_y_uni) # no ValueError

Versions: statsmodels v0.11.1, numpy 1.16.3.

Traceback:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-93-347df5f5ac07> in <module>
     16 
     17 # try to update model
---> 18 model = model.extend(endog = next_y)
     19 # returns ValueError: array must not contain infs or NaNs

/usr/local/lib/python3.7/site-packages/statsmodels/tsa/statespace/varmax.py in extend(self, endog, exog, **kwargs)
    915 
    916         if self.smoother_results is not None:
--> 917             res = mod.smooth(self.params)
    918         else:
    919             res = mod.filter(self.params)

/usr/local/lib/python3.7/site-packages/statsmodels/tsa/statespace/mlemodel.py in smooth(self, params, transformed, includes_fixed, complex_step, cov_type, cov_kwds, return_ssm, results_class, results_wrapper_class, **kwargs)
    839         return self._wrap_results(params, result, return_ssm, cov_type,
    840                                   cov_kwds, results_class,
--> 841                                   results_wrapper_class)
    842 
    843     _loglike_param_names = ['transformed', 'includes_fixed', 'complex_step']

/usr/local/lib/python3.7/site-packages/statsmodels/tsa/statespace/mlemodel.py in _wrap_results(self, params, result, return_raw, cov_type, cov_kwds, results_class, wrapper_class)
    736                 wrapper_class = self._res_classes['fit'][1]
    737 
--> 738             res = results_class(self, params, result, **result_kwargs)
    739             result = wrapper_class(res)
    740         return result

/usr/local/lib/python3.7/site-packages/statsmodels/tsa/statespace/varmax.py in __init__(self, model, params, filter_results, cov_type, cov_kwds, **kwargs)
    854                  cov_kwds=None, **kwargs):
    855         super(VARMAXResults, self).__init__(model, params, filter_results,
--> 856                                             cov_type, cov_kwds, **kwargs)
    857 
    858         self.specification = Bunch(**{

/usr/local/lib/python3.7/site-packages/statsmodels/tsa/statespace/mlemodel.py in __init__(self, model, params, results, cov_type, cov_kwds, **kwargs)
   2274             'smoothed_state_disturbance_cov']
   2275         for name in extra_arrays:
-> 2276             setattr(self, name, getattr(self.filter_results, name, None))
   2277 
   2278         # Remove too-short results when memory conservation was used

/usr/local/lib/python3.7/site-packages/statsmodels/tsa/statespace/kalman_filter.py in standardized_forecasts_error(self)
   1914                                 linalg.solve_triangular(
   1915                                     upper, self.forecasts_error[mask, t],
-> 1916                                     trans=1))
   1917                         except linalg.LinAlgError:
   1918                             self._standardized_forecasts_error[mask, t] = (

/usr/local/lib/python3.7/site-packages/scipy/linalg/basic.py in solve_triangular(a, b, trans, lower, unit_diagonal, overwrite_b, debug, check_finite)
    334 
    335     a1 = _asarray_validated(a, check_finite=check_finite)
--> 336     b1 = _asarray_validated(b, check_finite=check_finite)
    337     if len(a1.shape) != 2 or a1.shape[0] != a1.shape[1]:
    338         raise ValueError('expected square matrix')

/usr/local/lib/python3.7/site-packages/scipy/_lib/_util.py in _asarray_validated(a, check_finite, sparse_ok, objects_ok, mask_ok, as_inexact)
    237             raise ValueError('masked arrays are not supported')
    238     toarray = np.asarray_chkfinite if check_finite else np.asarray
--> 239     a = toarray(a)
    240     if not objects_ok:
    241         if a.dtype is np.dtype('O'):

/usr/local/lib/python3.7/site-packages/numpy/lib/function_base.py in asarray_chkfinite(a, dtype, order)
    496 
    497 @array_function_dispatch(_piecewise_dispatcher)
--> 498 def piecewise(x, condlist, funclist, *args, **kw):
    499     """
    500     Evaluate a piecewise-defined function.

ValueError: array must not contain infs or NaNs
1

There are 1 answers

0
C8H10N4O2 On

The default trend in the univariate SARIMAX is 'n' (for "none"). The default trend in the vector case VARMAX is 'c' (for "constant"). So, depending on what you want, do either:

  1. Just set trend='n' if you want no trend component. (That's actually what I wanted, but didn't realize it was not the default argument for VARMAX.) or...

  2. Set trend = 'n' and use the exog= argument as a workaround for a bug with the trend component in VARMAX. Thanks to statsmodels contributor ChadFulton, who recommended this approach until fixed. His solution for when you want a constant trend:

import numpy as np

np.random.seed(1)
y_hist = 100*np.random.rand(50,2)

model = VARMAX(endog= y_hist,order=(2,0), 
               trend='n',                            # <---- notice
               exog=np.ones(y_hist.shape[0])).fit()  # <----
print("VARMAX model summary")
print(model.summary())
next_y_hat = model.forecast(exog=[1])
print("\nPredicted next value")
print(next_y_hat)

# simulate next observed value
next_y = next_y_hat

# extend model
model = model.extend(endog = next_y, exog=[1])       # <----