Sales figures for cola are available prior to sales figures for all sodas. Cola sales are correlated with all soda sales, therefore, I would like to forecast soda sales using cola sales.
library(tidyverse)
library(lmtest)
time_series <- as.data.frame(list(date = structure(c(1477958400, 1480550400, 1483228800, 1485907200, 1488326400, 1491004800, 1493596800, 1496275200, 1498867200, 1501545600, 1504224000, 1506816000, 1509494400, 1512086400, 1514764800, 1517443200, 1519862400, 1522540800, 1525132800, 1527811200, 1530403200, 1533081600, 1535760000, 1538352000, 1541030400, 1543622400, 1546300800, 1548979200, 1551398400, 1554076800, 1556668800, 1559347200, 1561939200, 1564617600, 1567296000, 1569888000, 1572566400, 1575158400, 1577836800, 1580515200, 1583020800, 1585699200, 1588291200, 1590969600, 1593561600, 1596240000, 1598918400, 1601510400, 1604188800, 1606780800, 1609459200, 1612137600, 1614556800, 1617235200, 1619827200, 1622505600, 1625097600, 1627776000, 1630454400, 1633046400, 1635724800, 1638316800, 1640995200, 1643673600, 1646092800, 1648771200, 1651363200, 1654041600, 1656633600, 1659312000, 1661990400, 1664582400, 1667260800, 1669852800, 1672531200, 1675209600, 1677628800, 1680307200, 1682899200, 1685577600, 1688169600, 1690848000, 1693526400, 1696118400, 1698796800, 1701388800, 1704067200 ), tzone = "UTC", class = c("POSIXct", "POSIXt")),
soda = c(0.581883547868061, 0.384804907481899, 0.160825623491553, 0.452358205953339, 0.60576729686243, 0.366552695092518, 0.530872888173773, 0.462012268704747, 0.446852373290426, 0.441170555108608, 0.410725060337892, 0.486348551890587, 0.544826025744167, 0.324818986323411, 0.16336484312148, 0.362856999195495, 0.481395816572808, 0.327986725663717, 0.442628720836685, 0.329872284794851, 0.273632341110217, 0.299376508447305, 0.25294147224457, 0.305158889782784, 0.327861021721641, 0.106094127111826, 0.030445494770716, 0.193760056315366, 0.280973451327434, 0.183200925181014, 0.296736725663717, 0.245172968624296, 0.315818584070796, 0.357049477071601, 0.334121078037007, 0.472646822204344, 0.511514481094127, 0.266442075623492, 0.133397023330652, 0.385835679806919, 0.292211383748994, 0, 0.165904062751408, 0.320746178600161, 0.444941673370877, 0.385483708769107, 0.429957763475463, 0.648657481898632, 0.717417538213998, 0.633673572003218, 0.319665124698311, 0.692578439259855, 1, 0.668920957361223, 0.813555913113435, 0.719554505229284, 0.701981094127112, 0.62691069991955, 0.681969026548673, 0.901749798873693, 0.926337489943685, 0.666507441673371, 0.297817779565567, 0.65831154465004, 0.754676186645213, 0.536554706355591, 0.558904867256637, 0.443835478680611, 0.437072606596943, 0.461534593724859, 0.417915325824618, 0.431642196299276, 0.463847546259051, 0.269911504424779, 0.119267900241352, 0.36798572003218, 0.528031979082864, 0.355666733708769, 0.527780571198713, 0.365371078037007, 0.401749798873693, 0.526825221238938, 0.57157582461786, 0.677569388576026, 0.585453539823009, NA, NA),
cola = c(0.548396977741474, 0.196753114151521, 0.648968756381458, 0.474372064529304, 0.580559526240555, 0.595568715540127, 0.485501327343271, 0.386461098631815, 0.548192771084337, 0.656524402695528, 0.475393097814989, 0.348274453747192, 0.235450275678987, 0.0701449867265673, 0.249336328364305, 0.18725750459465, 0.163161119052481, 0.140187870124566, 0.177149275066367, 0.168572595466612, 0.151521339595671, 0.302940575862773, 0.227486216050643, 0.273024300592199, 0.214621196651011, 0.142944659995916, 0.441596896058812, 0.288646109863182, 0.273330610577905, 0.190729017765979, 0.257504594649786, 0.255564631406984, 0.276904227077803, 0.277618950377782, 0.277006330406371, 0.305391055748417, 0.231876659179089, 0.139575250153155, 0.451807228915663, 0.342556667347356, 0.155605472738411, 0, 0.152746579538493, 0.209720236879722, 0.161629569123953, 0.161731672452522, 0.162446395752502, 0.206657137022667, 0.219317949765162, 0.133142740453339, 0.45885235858689, 0.401878701245661, 0.501633653257096, 0.471002654686543, 0.449356749030018, 0.349193383704309, 0.361139473146825, 0.337962017561773, 0.35868899326118, 0.438431692873188, 0.468552174800899, 0.281294670206249, 0.824994894833572, 0.656728609352665, 0.671737798652236, 0.550643250969982, 0.656626506024096, 0.606800081682663, 0.652440269552787, 0.696242597508679, 0.630488053910558, 0.68929957116602, 0.62824178068205, 0.434755973044721, 0.980498264243414, 0.83745150091893, 0.851541760261385, 0.691545844394527, 0.882274862160506, 0.876046559117827, 0.993975903614458, 0.924954053502144, 0.798754339391464, 0.883806412089034, 0.766693894220952, 0.503063099857055, 1))) %>% mutate(diff = cola - soda)
There appears to be a correlation.
ggplot(time_series, aes(x = date)) +
geom_line(aes(y = soda, color = "soda"), size = 1.5) +
geom_line(aes(y = lag(cola,1), color = "cola"), size = 1.5) +
scale_color_manual(values = c("soda" = "#F8766D", "cola" = "#00BFC4"))
soda <- ts(filter(select(time_series, soda), !is.na(soda)), frequency = 12, start = c(2016, 11))
cola <- ts(select(filter(time_series, !is.na(soda)), cola), frequency = 12, start = c(2016, 11))
The difference of each time series appears to produce a stationary process.
adf.test(diff(soda))
adf.test(diff(cola))
Granger Test suggests the time series are correlated.
grangertest(diff(cola) ~ diff(soda), order=2, test="F", data = time_series)
The errors, however, appear to have a cyclical component.
time_series %>%
filter(!is.na(diff)) %>%
ggplot(aes(x = date, y = diff)) +
geom_line()
How can I forecast the soda values, so that the errors do not have the cyclical component?