I want to get the underlying states of a state space model with the Kalman filter. My state space model is:
s_t = d + s_t-1 + eta_t
y_t = s_t + e_t
where eta_t and e_t are both gaussian white noise series, s_t is the state at time t, y_t is the observation at time t and d is the drift.
I am estimating the variances of e_t (sigma_e) and eta_t (sigma_eta), as well as the drift d with Maximum Likelihood Estimation. But the way I do it the smoothed states end up being exactly the observations y_t.
This is my code:
library(ggplot2)
CAN <- read.csv("C:/Users/alina/OneDrive/Dokumente/Canada_GDP.csv")
CAN <- CAN[-(1:5), , drop = FALSE]
# Set the initial values
n=149
GDP_CAN <- CAN$GDP
log_GDP_CAN <- log(GDP_CAN)
filtered_states <- numeric(n)
filtered_sigma <- numeric(n)
vecu <- numeric(n)
vecV <- numeric(n)
vecL <- numeric(n)
vecK <- numeric(n)
# Set the initial value
s0 = 7.8
sigma0 = 100
filtered_states[1] <- s0
filtered_sigma[1] <- sigma0
##################################
# Function to calculate the negative log-likelihood for MLE
#######################################
#estimates with my function
negative_log_likelihood <- function(parameters, log_GDP_CAN) {
sigma_e <- exp(parameters[1])
sigma_eta <- exp(parameters[2])
d <- parameters[3]
# Calculate negative log-likelihood
s0 = 7.8
Sigma0 = 1e6
s <- s0
Sigma <- Sigma0
llh = 0
for (t in 1:n) {
u = log_GDP_CAN[t] - s
V = Sigma + sigma_e
K = Sigma/V
L = (1-K)
s = d + s + K*u
Sigma = Sigma * (1-K) + sigma_eta
}
# Update negative log-likelihood
llh <- llh + - 0.5 * log(2 * pi * V) - 0.5 * (u^2)/V
return(-llh)
}
# Use optim function to maximize the likelihood
mle_result_LBFGSB <- optim(par = c(1, 1, 0), fn = negative_log_likelihood, log_GDP_CAN = log_GDP_CAN, method = "L-BFGS-B")
# Extract estimated parameters
estimated_sigma_e <- exp(mle_result_LBFGSB$par[1])
estimated_sigma_eta <- exp(mle_result_LBFGSB$par[2])
estimated_d <- mle_result_LBFGSB$par[3]
####################################################################
sigma_e <- estimated_sigma_e
sigma_eta <- estimated_sigma_eta
d <- estimated_d
#Kalman Filter
s <- s0
Sigma <- sigma0
for (t in 1:n){
u = log_GDP_CAN[t] - s
V = Sigma + sigma_e
K = Sigma/V
L = (1-K)
s = d + s + K*u
Sigma = Sigma * (1-K) + sigma_eta
vecu[t] <- u
vecV[t] <- V
vecL[t] <- L
vecK[t] <- K
filtered_states[t+1] <- s
filtered_sigma[t+1] <- Sigma
}
filtered_states <- filtered_states[1:n]
filtered_sigma <- filtered_sigma[1:n]
##Smoothing
#q_tt is q_(t-1)
q_t <- numeric(n)
smoothest_mu <- numeric(n)
q_t[n] <- 0
for (t in n:2){
q_t[t-1] = (vecu[t]/vecV[t]) + (vecL[t]*q_t[t])
smooth_s = filtered_states[t] + filtered_sigma[t]*q_t[t-1]
#q_t[t-1] <- q_t[t-1]
smoothest_mu[t] <- smooth_s
}
smoothest_mu[1] <- s0 + vecu[1] + sigma_e * q_t[1] #p. 504 Tsay
time<- c(1870:2018)
df <- data.frame(time, GDP_CAN, log_GDP_CAN, filtered_states, smoothest_mu)
ggplot(df, aes(x = time)) +
geom_line(aes(y = log_GDP_CAN), color = "blue", linetype = "solid") +
geom_line(aes(y = filtered_states), color = "red", linetype = "solid") +
geom_line(aes(y = smoothest_mu), color = "green", linetype = "solid") +
labs(title = "Kalman Filter",
x = "Time",
y = "Value") +
theme_minimal()
What can I do to get a better fit?