Sum in range defined by column

133 views Asked by At

I have a table that is something like this:

amount status timestamp
10 A 0
10 B 1
15 B 2
10 C 3
12 D 4
20 A 5
25 B 6
17 C 7
19 D 8

The amounts have no restriction (other than being a number). And status lines can have duplicates (the 'B' in the example).

What I want is to sum over everything between 'A' status. So the result should be

sum timestamp
57 1
81 5

I need this for ansi-sql (Spark)

3

There are 3 answers

3
p3consulting On BEST ANSWER

Once you will have decided about your "order" column, one possible solution:

with data(ord, amount, status) as (
    select 1, 10, 'A' from dual union all
    select 2, 10, 'B' from dual union all
    select 3, 15, 'B' from dual union all
    select 4, 10, 'C' from dual union all
    select 5, 12, 'D' from dual union all
    select 6, 20, 'A' from dual union all
    select 7, 25, 'B' from dual union all
    select 8, 17, 'C' from dual union all
    select 9, 19, 'D' from dual 
),
pdata as (
    select d.*, case status when 'A' then lv else last_value(lv) ignore nulls over(order by ord) end as llv
    from (
        select d.*, 
            nvl(last_value(case status when 'A' then ord end) over(partition by status order by ord
                RANGE BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
            ), case status when 'A' then ord - 1 
            end) as lv
        from data d
    ) d
)
select sum(amount) from pdata
where llv is not null
group by llv
;

sum(amount)
57
81

Note that replacing "when 'A' then lv else" by "when 'A' then null else" will give you the sum of rows strictly between the 2 'A' (not including the first one).

0
Rodrigue On

This is another possibility, assuming table name is Tmp:


DROP PROCEDURE IF EXISTS summing;
DELIMITER |
CREATE PROCEDURE summing()
    BEGIN
        DECLARE _begin INT DEFAULT NULL;
        DECLARE _end INT DEFAULT NULL;
        
        SELECT `timestamp` INTO _begin FROM Tmp WHERE status='A' ORDER BY `timestamp` LIMIT 1;
        
        IF _begin IS NOT NULL THEN
            
            SELECT `timestamp` INTO _end FROM Tmp WHERE  status='A' AND `timestamp` > _begin ORDER BY `timestamp` LIMIT 1;
            WHILE _end IS NOT NULL DO
                SELECT SUM(amount) FROM Tmp WHERE `timestamp` > _begin AND `timestamp` < _end;
                SET _begin = _end;
                SET _end = NULL;
                SELECT `timestamp` INTO _end FROM Tmp WHERE  status='A' AND `timestamp` > _begin ORDER BY `timestamp` LIMIT 1;
            END WHILE;
        END IF;
    END|
DELIMITER ;

CALL summing ();

I have test it on MySQL server.

0
p3consulting On

Another solution :

with data(ts, amount, status) as (
    select to_timestamp('27-11-2022 12:00:00.00', 'DD-MM-YYYY HH24:MI:SS.FF'), 10, 'A' from dual union all
    select to_timestamp('27-11-2022 12:00:00.00', 'DD-MM-YYYY HH24:MI:SS.FF')+1/24, 10, 'B' from dual union all
    select to_timestamp('27-11-2022 12:00:00.00', 'DD-MM-YYYY HH24:MI:SS.FF')+2/24, 15, 'B' from dual union all
    select to_timestamp('27-11-2022 12:00:00.00', 'DD-MM-YYYY HH24:MI:SS.FF')+3/24, 10, 'C' from dual union all
    select to_timestamp('27-11-2022 12:00:00.00', 'DD-MM-YYYY HH24:MI:SS.FF')+4/24, 12, 'D' from dual union all
    select to_timestamp('27-11-2022 12:00:00.00', 'DD-MM-YYYY HH24:MI:SS.FF')+5/24, 20, 'A' from dual union all
    select to_timestamp('27-11-2022 12:00:00.00', 'DD-MM-YYYY HH24:MI:SS.FF')+6/24, 25, 'B' from dual union all
    select to_timestamp('27-11-2022 12:00:00.00', 'DD-MM-YYYY HH24:MI:SS.FF')+7/24, 17, 'C' from dual union all
    select to_timestamp('27-11-2022 12:00:00.00', 'DD-MM-YYYY HH24:MI:SS.FF')+8/24, 19, 'D' from dual 
)
select res from (
    select 
        status, ts, sum(amount) over(partition by s) as res
    from (
        select 
            d.*, sum(flag) over(order by ts) as s
        from (select d.*, decode(status,'A',1,0) as flag from data d) d
    ) d
)
where status = 'A'
order by ts
;