SCD2 one-pass merge violates primary key in Oracle 12

591 views Asked by At

After some time I figured imho quite fast one-pass merge statement to historize slowly changing dimension type 2.

It works perfectly on tables without unique constraint.

Mostly it works also on tables with unique constraint. But sometimes (usually when the change in history is bigger) I get error ORA-00001, unique constraint violation. Of course, I know there are two-pass methods which work, but they are slower.

My only guess is that Oracle does sometimes the INSERT before UPDATE which duplicates the TH_Valid_To_Date for a moment.

Any ideas how to avoid it (while keeping primary key)?

Source table:

CREATE TABLE TESTS 
  (
     T_Key_1                        NUMBER(38,0)    DEFAULT -1 NOT NULL ENABLE /*  */
    ,T_Key_2                        NUMBER(38,0)    DEFAULT -1 NOT NULL ENABLE /*  */
    ,Text_Value                     VARCHAR2(100)    /*  */
    ,Number_Value                   NUMBER(38,0)    DEFAULT -1 NOT NULL ENABLE /*  */
    ,Amount                         NUMBER           /*  */

    ,CONSTRAINT T_PK PRIMARY KEY (T_Key_1, T_Key_2)  /* Primární klíč */    
  )
;

History table:

CREATE TABLE TEST_HISTORY 
  (
     T_Key_1                        NUMBER(38,0)    DEFAULT -1 NOT NULL
    ,T_Key_2                        NUMBER(38,0)    DEFAULT -1 NOT NULL
    ,Text_Value                     VARCHAR2(100)    
    ,Number_Value                   NUMBER(38,0)    DEFAULT -1 NOT NULL
    ,Amount                         NUMBER          
    ,TH_Valid_From_Date             DATE            DEFAULT to_date('1000-01-01','yyyy-mm-dd') NOT NULL /* SCD2 - Start of validity of record. */
    ,TH_Valid_To_Date               DATE            DEFAULT to_date('3000-01-01','yyyy-mm-dd') NOT NULL /* SCD2 - End of validity of record. */

    ,CONSTRAINT TH_PK PRIMARY KEY (T_Key_1, T_Key_2, TH_Valid_To_Date) using index local 
  )
/** Physical Options **************************************************************************************************/
partition by range (TH_Valid_To_Date) interval (NUMTOYMINTERVAL (1, 'MONTH'))
(partition P_10000000 values less than (TO_DATE ('01-01-1000', 'DD-MM-YYYY')))
ENABLE ROW MOVEMENT 
;

Merge:

MERGE INTO (SELECT * FROM TEST_HISTORY WHERE TH_Valid_To_Date = to_date('3000-01-01','yyyy-mm-dd'))             Hst /*change only current records which are identified by TH_Valid_To_Date = to_date('3000-01-01','yyyy-mm-dd') */
   USING (
      SELECT * FROM (
         SELECT NVL(Src.T_Key_1, Dst.T_Key_1)                                AS T_Key_1
            ,NVL(Src.T_Key_2, Dst.T_Key_2)                                   AS T_Key_2
            ,Src.Text_Value
            ,Src.Number_Value
            ,Src.Amount
            ,CASE WHEN Src.T_Key_1 is null                                   THEN 'D' /*delete*/
                  WHEN Dst.T_Key_1 is null                                   THEN 'I' /*insert*/
                  WHEN (Src.Text_Value=Dst.Text_Value OR (Src.Text_Value is null AND Dst.Text_Value is null))
                       AND (Src.Number_Value=Dst.Number_Value OR (Src.Number_Value is null AND Dst.Number_Value is null))
                       AND (Src.Amount=Dst.Amount OR (Src.Amount is null AND Dst.Amount is null))
                                                                             THEN 'X' /*no change*/
                                                                             ELSE 'U' /*update*/ END  AS Operation  
         FROM TESTS                                         Src
         FULL JOIN (SELECT * FROM TEST_HISTORY WHERE TH_Valid_To_Date = to_date('3000-01-01','yyyy-mm-dd'))     Dst
            ON (Src.T_Key_1 = Dst.T_Key_1 AND Src.T_Key_2 = Dst.T_Key_2)
      )
      INNER JOIN (SELECT LEVEL AS duplication FROM DUAL CONNECT BY LEVEL BETWEEN 1 AND 2) ON (duplication=1 OR Operation='U') /*need to duplicate update records so that they can go to both matched and not matched parts*/
      WHERE Operation<>'X'
   )                                                                          Act
   ON (Act.T_Key_1 = Hst.T_Key_1 AND Act.T_Key_2 = Hst.T_Key_2 AND Act.duplication=1 AND Act.operation<>'I')

WHEN MATCHED THEN UPDATE
   SET
      TH_Valid_To_Date                              = p_Load_Date - 1,
   WHERE Hst.TH_Valid_To_Date                       = to_date('3000-01-01','yyyy-mm-dd')

WHEN NOT MATCHED THEN INSERT /*+ append */
   (
       T_Key_1                       
      ,T_Key_2                       
      ,Text_Value                    
      ,Number_Value                  
      ,Amount                        
      ,TH_Valid_From_Date             /*Auditní sloupec*/
      ,TH_Valid_To_Date               /*Auditní sloupec*/
   ) VALUES (
       Act.T_Key_1                       
      ,Act.T_Key_2                       
      ,Act.Text_Value                    
      ,Act.Number_Value                  
      ,Act.Amount                        
      ,p_Load_Date                    /*Auditní sloupec*/
      ,to_date('3000-01-01','yyyy-mm-dd') /*Auditní sloupec*/
   )
;
1

There are 1 answers

5
Brian Leach On

I ran into this same issue myself: you cannot both insert and update a record. Oracle determines whether the record is to be inserted or updated when it first performs the join. Here is a simplified test case that illustrates this:

-- This is the table into which we will be merging data

CREATE TABLE merge_table
(
    mt_col1   INTEGER NOT NULL
  , mt_col2   INTEGER
);

-- Make mt_col1 unique
ALTER TABLE merge_table ADD (
  UNIQUE (mt_col1)
  USING INDEX);

-- This is the table from which we will be drawing the data

CREATE TABLE datasource_table
(
    ds_col1   INTEGER
  , ds_col2   INTEGER
);

-- Load up the data source with 10 rows (1-10)

INSERT INTO datasource_table (
           ds_col1, ds_col2
            )
    SELECT ROWNUM r, ROWNUM r
      FROM all_objects
     WHERE ROWNUM < 11;

-- Create a duplicate record

INSERT INTO datasource_table (
           ds_col1, ds_col2
            )
     VALUES (1, 1);

-- Create a record to be updated

INSERT INTO merge_table (
           mt_col1, mt_col2
            )
     VALUES (2, 2);

COMMIT;

-- This merge will fail with a unique constraint violation because
-- an mt_col1 value of 1 does not exist. The datasource table contains
-- two entries for 1, so the merge will try to insert two mt_col1=1 records,
-- violating the unique constraint.

MERGE INTO merge_table dt
     USING (SELECT ds_col1, ds_col2
              FROM datasource_table) a
        ON (a.ds_col1 = dt.mt_col1)
WHEN MATCHED
THEN
    UPDATE SET mt_col2   = a.ds_col2 + 10
WHEN NOT MATCHED
THEN
    INSERT     (
               mt_col1, mt_col2
               )
        VALUES (ds_col1, ds_col2);

-- ORA-00001: unique constraint (SYS_C0013990) violated

-- Delete one of the duplicate records and the merge succeeds

DELETE FROM datasource_table
      WHERE ds_col1 = 1
        AND ROWNUM < 2;

-- Merge is now successful

MERGE INTO merge_table dt
     USING (SELECT ds_col1, ds_col2
              FROM datasource_table) a
        ON (a.ds_col1 = dt.mt_col1)
WHEN MATCHED
THEN
    UPDATE SET mt_col2   = a.ds_col2 + 10
WHEN NOT MATCHED
THEN
    INSERT     (
               mt_col1, mt_col2
               )
        VALUES (ds_col1, ds_col2);

-- 10 rows updated