I am trying to compare an input value against a benchmark input from 2018 until 2023, but I can't figure out the transformation to convert these two input values into the output value listed on that date for all dates listed.
I did notice that the direction of the output values was shifted over ~20 days from the input ones, so I started experimenting with moving averages to try and match the two up, but sadly I could not get this to work right. I tried simple moving average, exponential, triangular weighted, and messing with the window sizes, but nothing worked.
Here is my code, I would greatly appreciate it if someone could help me figure out where I am going wrong or a hint in the right direction for me to follow!
Link to the excel sheet I'm referencing: https://docs.google.com/spreadsheets/d/1Nhfz3K9s7HSXGRjgE26XwzIrXvcsKH41/edit?usp=sharing&ouid=113218159207064534478&rtpof=true&sd=true
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.stats import pearsonr, spearmanr
hs = 14
hs2 = 14
offset = 0
excel_file_path = 'Book2.xlsx'
sheet_name = 'Sheet1'
Table = pd.read_excel(excel_file_path, sheet_name=sheet_name, parse_dates=['Date'])
def est_output(date, value1, value2, value3, start_date, end_date):
a = value1 * 100 / value1.iloc[0]
b = value2 * 100 / value2.iloc[0]
c = a / b
# Calculate 14-day moving average (EMA)
d = pd.Series(c).rolling(hs).mean()
e = pd.Series(c).rolling(hs).std()
f = 100 + 3*((c - d) / e)
f1 = pd.Series(f)
g = f1.rolling(hs2).mean()
lines = {
'Estimated': (date, g),
'Actual': (date, value3)
}
for label, (x, y) in lines.items():
plt.plot(x, y, label=label)
# Set x-axis and y-axis limits for zooming
plt.xlim([start_date, end_date])
plt.legend()
plt.show()
lines2 = {
'Difference': (date, ((g / value3) - 1) * 100)
}
for label, (x, y) in lines2.items():
plt.plot(x, y, label=label)
# Set x-axis and y-axis limits for zooming
plt.xlim([start_date, end_date])
plt.legend()
plt.show()
bench = value3.iloc[hs + hs2 - 2:]
actual = g.dropna()
corr, p = pearsonr(bench, actual)
rho, p = spearmanr(bench, actual)
print('Pearsons correlation: %.3f' % corr, p)
print('Spearmans correlation: %.3f' % rho, p)
# Example: Zoom in on a specific date range (start_date and end_date)
start_date = pd.Timestamp('2019-01-01')
end_date = pd.Timestamp('2023-06-01')
est_output(Table['Date'], Table['Experimental'], Table['Benchmark'], Table['Output'], start_date, end_date)