SQL Delete specific rows based on date and criteria

83 views Asked by At

I've got a table that has duplicate data that needs to be cleaned up. Consider the following example:

CREATE TABLE #StackOverFlow
(
    [ctrc_num] int, 
    [Ctrc_name] varchar(6),
    [docu] bit, 
    [adj] bit, 
    new bit, 
    [some_date] datetime
);
    
INSERT INTO #StackOverFlow
    ([ctrc_num], [Ctrc_name], [docu], [adj], [new], [some_date])
VALUES
    (12345, 'John R', null, null, 1, '2023-12-11 09:05:13.003'),
    (12345, 'John R', 1, null, 0, '2023-12-11 09:05:12.987'),
    (12345, 'John R', null, null, 1, '2023-12-11 09:05:12.947'),
    (56789, 'Sam S', null, null, 1, '2023-12-11 09:05:13.003'),
    (56789, 'Sam S', null, null, 1, '2023-12-11 09:05:12.987'),
    (56789, 'Sam S', 1, null, 0, '2023-12-11 09:05:12.947'),
    (78945, 'Pat P', null, null, 1, '2023-12-11 09:05:13.003'),
    (78945, 'Pat P', null, null, 1, '2023-12-11 09:05:12.987'),
    (78945, 'Pat P', null, null, 1, '2023-12-11 09:05:12.947');

This gives me:

[ctrc_num]  [Ctrc_name] [docu]  [adj]   [new]   [some_date]
-----------------------------------------------------------------------
12345        John R     NULL    NULL    1       2023-12-11 09:05:13.003
12345        John R     1       NULL    0       2023-12-11 09:05:12.987
12345        John R     NULL    NULL    1       2023-12-11 09:05:12.947
56789        Sam S      NULL    NULL    1       2023-12-11 09:05:13.003
56789        Sam S      NULL    NULL    1       2023-12-11 09:05:12.987
56789        Sam S      1       NULL    0       2023-12-11 09:05:12.947
78945        Pat P      NULL    NULL    1       2023-12-11 09:05:13.003
78945        Pat P      NULL    NULL    1       2023-12-11 09:05:12.987
78945        Pat P      NULL    NULL    1       2023-12-11 09:05:12.947

What I need to do is delete from the table duplicates. If new is 0, delete the records where new is 1. If all records have new = 1 keep the newest record and delete the older ones.

The result should look like this:

[ctrc_num]  [Ctrc_name] [docu]  [adj]  [new]    [some_date]
-----------------------------------------------------------------------
12345        John R     1       NULL    0       2023-12-11 09:05:12.987
56789        Sam S      1       NULL    0       2023-12-11 09:05:12.947
78945        Pat P      NULL    NULL    1       2023-12-11 09:05:13.003

I've tried ROW_NUMBER:

;WITH RankedByDate AS
(
    SELECT 
        ctrc_num, Ctrc_name,
        docu, adj, new, some_date,
        ROW_NUMBER() OVER (PARTITION BY Ctrc_num, Ctrc_name, [docu],[adj], [new] 
                           ORDER BY some_date DESC) AS rNum
    FROM 
        #StackOverFlow
)
SELECT * 
FROM RankedByDate

This separates the ones with new = 0, but I still have the ones with new = 1 that are ordered.

Grouping gives me the records that are duplicated but no way to delete the ones needed to be deleted:

SELECT [ctrc_num]
    ,[Ctrc_name]
    ,[docu]
    ,[adj]
    ,[new]
FROM 
    #StackOverFlow
GROUP BY 
    [ctrc_num]
    ,[Ctrc_name]
    ,[docu]
    ,[adj]
    ,[new]
HAVING 
    COUNT(*) > 1
2

There are 2 answers

6
CHill60 On BEST ANSWER

Break the problem down into it's parts

  1. "If new is 0, delete the records where new is 1"

    delete from #StackOverFlow
    where [new] = 1
    and [ctrc_num] in (select [ctrc_num]
                       from #StackOverFlow
                       where [new] = 0);
    
  2. "If all records have new = 1 keep the newest record and delete the older ones" Use a CTE to add a row number based on the date and partitioned by the [ctrc_num] such that the "first" record in each group is the one you want to keep - if there is only 1 row in a group that's the one you want to keep anyway. Then delete everything else

    ;with cte as
    (
        select 
             [ctrc_num]  
             ,ROW_NUMBER() OVER (PARTITION BY [ctrc_num] ORDER BY [ctrc_num], [some_date] DESC) as rw
        from #StackOverFlow
    )
    DELETE FROM cte where rw <> 1;
    
0
Alex Kudryashev On

It is possible to do what you want is a single query.

;with cte as(
    select [ctrc_num], [Ctrc_name], [docu],[adj], [new], [some_date]
    ,ROW_NUMBER() over(partition by [ctrc_num] -- group by [ctrc_num]
        order by [new], --0 then 1
        [some_date] desc --newest first
        ) rn
    from #StackOverFlow
)
delete cte
where rn>1
;

select * from #StackOverFlow