How to calculate value based on average of previous month and average of same month last year in SQL

426 views Asked by At

I would like to calculate targets for opened rates and clicked rates based on actuals of the last month and the same month last year. My table is aggregated at daily level and I have grouped it by month and year to get the monthly averages. I have then created a self-join to join my current dates on the results of the previous months. This works fine for all months except for January because SQL can't know that it's supposed to join 1 on 12. Is there a way to specify this in my join clause?

Essentially, the results for January 2021 shouldn't be null because I have December 2020 data.

This is my data and my query:

CREATE TABLE exasol_last_year_avg(
    date_col date,
    country text,
    brand text,
    category text,
    delivered integer,
    opened integer,
    clicked integer
)

INSERT INTO exasol_last_year_avg 
(date_col,country,brand,category,delivered,opened,clicked) VALUES
(2021-01-01,'AT','brand1','cat1',100,60,23),
(2021-01-01,'AT','brand1','cat2',200,50,45),
(2021-01-01,'AT','brand2','cat1',300,49,35),
(2021-01-01,'AT','brand2','cat2',400,79,57),
(2021-02-02,'AT','brand1','cat1',130,78,30),
(2021-02-02,'AT','brand1','cat2',260,65,59),
(2021-02-02,'AT','brand2','cat1',390,64,46),
(2021-02-02,'AT','brand2','cat2',520,103,74),
(2020-12-02,'AT','brand1','cat1',130,78,30),
(2020-12-02,'AT','brand1','cat2',260,65,59),
(2020-12-02,'AT','brand2','cat1',390,64,46),
(2020-12-02,'AT','brand2','cat2',520,103,74),
(2020-02-02,'AT','brand1','cat2',236,59,53),
(2020-02-02,'AT','brand2','cat1',355,58,41),
(2020-02-02,'AT','brand2','cat2',473,93,67),
(2020-02-02,'AT','brand1','cat1',118,71,27)

This is written in PostgresSQL because I think it's more accessible to most people, but my production database is Exasol!

select *
from
(Select month_col,
           year_col,
           t_campaign_cmcategory,
           t_country,
           t_brand,
           (t2_clicktoopenrate + t3_clicktoopenrate)/2 as target_clicktoopenrate,
           (t2_openrate + t3_openrate)/2 as target_openrate
    from (
with CTE as (
select extract(month from date_col) as month_col,
       extract(year from date_col) as year_col, 
        category as t_campaign_cmcategory,
        country as t_country,
        brand as t_brand,
        round(sum(opened)/nullif(sum(delivered),0),3) as OpenRate,
        round(sum(clicked)/nullif(sum(opened),0),3) as ClickToOpenRate
from public.exasol_last_year_avg
group by 1, 2, 3, 4, 5)
select t1.month_col,
           t1.year_col,
           t2.month_col as t2_month_col,
           t2.year_col as t2_year_col,
           t3.month_col as t3_month_col,
           t3.year_col as t3_year_col,
           t1.t_campaign_cmcategory,
           t1.t_country,
           t1.t_brand,
           t1.OpenRate,
           t1.ClickToOpenRate,
           t2.OpenRate as t2_OpenRate,
           t2.ClickToOpenRate as t2_ClickToOpenRate,
           t3.OpenRate as t3_OpenRate,
           t3.ClickToOpenRate as t3_ClickToOpenRate
    from CTE t1
    left join CTE t2
    on t1.month_col = t2.month_col + 1
    and t1.year_col = t2.year_col
    and t1.t_campaign_cmcategory = t2.t_campaign_cmcategory
    and t1.t_country = t2.t_country
    and t1.t_brand = t2.t_brand
    left join CTE t3
    on t1.month_col = t3.month_col
    and t1.year_col = t3.year_col + 1
    and t1.t_campaign_cmcategory = t3.t_campaign_cmcategory
    and t1.t_country = t3.t_country
    and t1.t_brand = t3.t_brand) as target_base) as final_tbl
2

There are 2 answers

2
Gordon Linoff On

Start with an aggregation query:

select date_trunc('month', date_col), country, brand, 
       sum(opened) * 1.0 / nullif(sum(delivered), 0) as OpenRate,
       sum(clicked) * 1.0 / nullif(sum(opened), 0) as ClickToOpenRate
from exasol_last_year_avg
group by 1, 2, 3;

Then, use window functions. Assuming you have a value for every month (with no gaps). you can just use lag(). I'm not sure what your final calculation is, but this brings in the data:

with mcb as (
      select date_trunc('month', date_col) as yyyymm, country, brand, 
             sum(opened) * 1.0 / nullif(sum(delivered), 0) as OpenRate,
             sum(clicked) * 1.0 / nullif(sum(opened), 0) as ClickToOpenRate
      from exasol_last_year_avg
      group by 1, 2, 3
     )
select mcb.*,
       lag(openrate, 1) over (partition by country, brand order by yyyymm) as prev_month_openrate,
       lag(ClickToOpenRate, 1) over (partition by country, brand order by yyyymm) as prev_month_ClickToOpenRate,
       lag(openrate, 12) over (partition by country, brand order by yyyymm) as prev_year_openrate,
       lag(ClickToOpenRate, 12) over (partition by country, brand order by yyyymm) as prev_year_ClickToOpenRate
from mcb;
0
Omega On

This works with a different join condition:

select *
from
(Select month_col,
           year_col,
           t_campaign_cmcategory,
           t_country,
           t_brand,
           (t2_clicktoopenrate + t3_clicktoopenrate)/2 as target_clicktoopenrate,
           (t2_openrate + t3_openrate)/2 as target_openrate
    from (
with CTE as (
select extract(month from date_col) as month_col,
       extract(year from date_col) as year_col, 
        category as t_campaign_cmcategory,
        country as t_country,
        brand as t_brand,
        round(sum(opened)/nullif(sum(delivered),0),3) as OpenRate,
        round(sum(clicked)/nullif(sum(opened),0),3) as ClickToOpenRate
from public.exasol_last_year_avg
group by 1, 2, 3, 4, 5)
select t1.month_col,
           t1.year_col,
           t2.month_col as t2_month_col,
           t2.year_col as t2_year_col,
           t3.month_col as t3_month_col,
           t3.year_col as t3_year_col,
           t1.t_campaign_cmcategory,
           t1.t_country,
           t1.t_brand,
           t1.OpenRate,
           t1.ClickToOpenRate,
           t2.OpenRate as t2_OpenRate,
           t2.ClickToOpenRate as t2_ClickToOpenRate,
           t3.OpenRate as t3_OpenRate,
           t3.ClickToOpenRate as t3_ClickToOpenRate
    from CTE t1
    left join CTE t2
-- adjusted join condition
    on ((t1.month_col = (CASE WHEN t1.month_col = 1 then t2.month_col - 11 END) and t1.year_col = t2.year_col + 1)
or (t1.month_col = (CASE WHEN t1.month_col != 1 then t2.month_col + 1 END) and t1.year_col = t2.year_col))
    and t1.t_campaign_cmcategory = t2.t_campaign_cmcategory
    and t1.t_country = t2.t_country
    and t1.t_brand = t2.t_brand
    left join CTE t3
    on t1.month_col = t3.month_col
    and t1.year_col = t3.year_col + 1
    and t1.t_campaign_cmcategory = t3.t_campaign_cmcategory
    and t1.t_country = t3.t_country
    and t1.t_brand = t3.t_brand) as target_base) as final_tbl