I have a code that was given to me that runs an ARIMA model putting weight on more recent errors, it gives excellent results, much better than simple ARIMA, but i do not understand the methodology behind it. If you can understand whats going on and why and how it works then i would really appreciate it :)
The code that i would like explaining is from the #---Weighting---
suppressMessages(library(lmtest))
suppressMessages(library(tseries))
suppressMessages(library(forecast))
suppressMessages(library(TTR))
#-------------------------------------------------------------------------------
Input.data <- matrix(c("8Q1","8Q2","8Q3","8Q4","9Q1","9Q2","9Q3","9Q4","10Q1","10Q2","10Q3","10Q4","11Q1","11Q2","11Q3","11Q4","12Q1","12Q2","12Q3","12Q4","13Q1","13Q2","13Q3","13Q4","14Q1","14Q2","14Q3",5403.675741,6773.504993,7231.117289,7835.55156,5236.709983,5526.619467,6555.781711,11464.72728,7210.068674,7501.610403,8670.903486,10872.93518,8209.022658,8153.393088,10196.44775,13244.50201,8356.732878,10188.44157,10601.32205,12617.82102,11786.52641,10044.98676,11006.0051,15101.9456,10992.27282,11421.18922,10731.31198),ncol=2,byrow=FALSE)
#-------------------------------------------------------------------------------
# Maximum seasonal differences allowed. For typical series, 0 is recommended.
max.sdiff <- 2
#-------------------------------------------------------------------------------
# Force seasonality
arima.force.seasonality <- "y"
#-------------------------------------------------------------------------------
# The frequency of the data. 1/4 for QUARTERLY, 1/12 for MONTHLY
Frequency <- 1/4
#-------------------------------------------------------------------------------
# How many quarters/months to forecast
Forecast.horizon <- 4
#-------------------------------------------------------------------------------
# The first date in the series. Use c(8, 1) to denote 2008 q1
Start.date <- c(8, 1)
#-------------------------------------------------------------------------------
# The dates of the forecasts
Forecast.dates <- c("14Q4", "15Q1", "15Q2", "15Q3")
#-------------------------------------------------------------------------------
# Set if the data should be logged. Takes value "s" (lets script choose logging)
#"level" (forces levels) or "log" (forces logs)
force.log <- "s"
#-------------------------------------------------------------------------------
# Selects the data column from Input.data
Data.col <- as.numeric(Input.data[, length(Input.data[1, ])])
#-------------------------------------------------------------------------------
# Turns the Data.col into a time-series
Data.col.ts <- ts(Data.col, deltat=Frequency, start = Start.date)
#-------------------------------------------------------------------------------
# A character vector of the dates from Input.data
Dates.col <- as.character(Input.data[,1])
#-------------------------------------------------------------------------------
# Starts the testing to see if the data should be logged
transform.method <- round(BoxCox.lambda(Data.col.ts, method = "loglik"), 5)
log.values <- seq(0, 0.24999, by = 0.00001)
sqrt.values <- seq(0.25, 0.74999, by = 0.00001)
which.transform.log <- transform.method %in% log.values
which.transform.sqrt <- transform.method %in% sqrt.values
if (which.transform.log == "TRUE"){
as.log <- "log"
Data.new <- log(Data.col.ts)
} else {
if (which.transform.sqrt == "TRUE"){
as.log <- "sqrt"
Data.new <- sqrt(Data.col.ts)
} else {
as.log <- "no"
Data.new <- Data.col.ts
}
}
#----- Weighting ---------------------------------------------------------------
fweight <- function(x){
PatX <- 0.5+x
return(PatX)
}
integ1 <- integrate(fweight, lower = 0.00, upper = 1)
valinteg <- 2*integ1$value
#Split the integral to several intervals, and pick the weights accordingly
integvals <- rep(0, length.out = length(Data.new))
for (i in 1:length(Data.new)){
integi <- integrate(fweight, lower = (i-1)/length(Data.new), upper= i/length(Data.new))
integvals[i] <- 2*integi$value
}
suppressWarnings(kpssW <- kpss.test(Data.new, null="Level"))
suppressWarnings(ppW <- tryCatch({
ppW <- pp.test(Data.new, alternative = "stationary")},
error = function(ppW){
ppW <- list(error = "TRUE", p.value = 0.99)
}))
suppressWarnings(adfW <- adf.test(Data.new, alternative = "stationary",
k = trunc((length(Data.new) - 1)^(1/3))))
suppressWarnings(if (kpssW$p.value < 0.05 |
ppW$p.value > 0.05 |
adfW$p.value > 0.05){
ndiffsW = 1
} else {
ndiffsW = 0
})
aaw <- auto.arima(Data.new,
max.D = max.sdiff,
d = ndiffsW,
seasonal = TRUE,
allowdrift = FALSE,
stepwise = FALSE,
trace = FALSE,
seasonal.test = "ch")
order.arima <- c(aaw$arma[1], aaw$arma[6] , aaw$arma[2])
order.seasonal.arima <- c(aaw$arma[3], aaw$arma[7], aaw$arma[4])
if (sum(aaw$arma[1:2]) == 0){
order.arima[1] <- 1
} else {
NULL
}
if (arima.force.seasonality == "y"){
if(sum(aaw$arma[3:4]) == 0){
order.seasonal.arima[1] <- 1
} else {
NULL
}
} else {
NULL
}
#----- ARIMA -------------------------------------------------------------------
# Fits an ARIMA model with the orders set
stAW <- Arima(Data.new,
order = order.arima,
seasonal = list(order = order.seasonal.arima),
method ="ML")
parSW <- stAW$coef
WMAEOPT <- function(parSW){
ArimaW <- Arima(Data.new,
order = order.arima,
seasonal = list(order = order.seasonal.arima),
include.drift = FALSE,
method = "ML",
fixed = c(parSW))
errAR <- c(abs(resid(ArimaW)))
WMAE <- t(errAR) %*% integvals
return(WMAE)
}
OPTWMAE <- optim(parSW,
WMAEOPT,
method = "SANN",
set.seed(2),
control = list(fnscale = 1, maxit = 5000))
parS3 <- OPTWMAE$par
Arima.Data.new <- Arima(Data.new, order = order.arima, seasonal=list(order=order.seasonal.arima),
include.drift=FALSE, method = "ML", fixed = c(parS3))