Generate columns based on hierarchy

204 views Asked by At

I have an Input table with two columns as below. The Parent ID and Child ID columns will have a hierarchical structure.

##Input Table

ParentID ChildID
PR001 CR001
PR001 CR002
PR001 CR003
CR001 DR001
CR001 DR002
CR002 DR003
DR001 ER001
DR002 ER002
....... ......
XXXXX YYYYY

I've applied recursive CTE method to generate the intermediate output to include level column based on the hierarchical structure

##Intermediate Output Table:

ParentID ChildID Level
PR001 CR001 1
PR001 CR002 1
PR001 CR003 1
CR001 DR001 2
CR001 DR002 2
CR002 DR003 3
DR001 ER001 3
DR002 ER002 3
....... ...... ...
XXXXX YYYYY N

I'm trying to generate the final output by pivoting the values based on the hierarchical structure of the intermediate output. The number of columns with level prefixed by its value will be based on the level column.

##Desired Result:

ID Level 1 Level 2 Level 3 Level....N
PR001 CR001 DR001 ER001
PR001 CR001 DR002 ER002
PR001 CR002 DR003
PR001 CR003
XXXX XXXX YYYY

Tried to apply pivot as below but it's not helping

SELECT ParentID, ChildID 
FROM 
(SELECT ParentID, ChildID, Level from table_1>) 
AS src 
PIVOT 
( 
count (ParentID) 
FOR [Level] 
IN ( '1' as 'Level_1', '2' as 'Level_2', '3' as 'Level_3' ]) 
) AS pvt ; 

Is there any functions to apply in SQL Server for this case?

3

There are 3 answers

0
ValNik On BEST ANSWER

I will propose a solution with recursion and a fixed number of columns (levels). Increasing the number of levels is not difficult.
A solution with an unlimited number of levels can only be performed through a dynamic query.

with r as(
  select ParentId HeadId,1 lvl,ParentId,ChildId
         ,cast(ChildId as varchar) Level_1
         ,cast('' as varchar) Level_2    ,cast('' as varchar) Level_3
         ,cast('' as varchar) Level_4    ,cast('' as varchar) Level_5
         ,cast('' as varchar) Level_6    ,cast('' as varchar) Level_7
         ,cast('' as varchar) Level_8
         ,cast(concat(ParentId,'-',ChildId) as varchar(1000)) path
  from test t1
  where not exists(select 1 from test t2 where t2.ChildId=t1.ParentId)
  union all
  select r.HEadId,lvl+1 lvl,t.ParentId,t.ChildId
     ,Level_1
     ,case when lvl=1 then cast(t.ChildId as varchar) else Level_2 end 
     ,case when lvl=2 then cast(t.ChildId as varchar) else Level_3 end 
     ,case when lvl=3 then cast(t.ChildId as varchar) else Level_4 end
     ,case when lvl=4 then cast(t.ChildId as varchar) else Level_5 end
     ,case when lvl=5 then cast(t.ChildId as varchar) else Level_6 end
     ,case when lvl=6 then cast(t.ChildId as varchar) else Level_7 end
     ,case when lvl=7 then cast(t.ChildId as varchar) else Level_8 end
     ,cast(concat(path,'-',t.ChildId) as varchar(1000)) path
  from r inner join test t on r.ChildId=t.ParentId
)
, tp as(
select *
  ,case when path=left(lag(path)over(order by path desc),len(path)) then 0
   else 1 
   end fp
from r
)
select HeadId,lvl,Level_1 ,Level_2,Level_3,Level_4,Level_5,Level_6,Level_7,Level_8
  ,path
from tp
where fp=1
order by path;

Output

HeadId lvl Level_1 Level_2 Level_3 Level_4 Level_5 Level_6 Level_7 Level_8 path
PR001 8 CR001 DR001 ER001 FR001 GR001 HR001 IR001 JR001 PR001-CR001-DR001-ER001-FR001-GR001-HR001-IR001-JR001
PR001 2 CR001 DR002 PR001-CR001-DR002
PR001 3 CR002 DR003 ER003 PR001-CR002-DR003-ER003
PR001 1 CR003 PR001-CR003

with test data

Create Table test(ParentID NVARCHAR(10), ChildID  NVARCHAR(10));
INSERT INTO test VALUES
 ('PR001', 'CR001')
,('PR001', 'CR002')
,('PR001', 'CR003')
,('CR001', 'DR001')
,('CR001', 'DR002')
,('CR002', 'DR003')
,('DR001', 'ER001')
,('DR003', 'ER003')
,('ER001', 'FR001')
,('FR001', 'GR001')
,('GR001', 'HR001')
,('HR001', 'IR001')
,('IR001', 'JR001')
;

Test fiddle here

0
Sterner On

DATA

IF(OBJECT_ID('tempdb..#tmp_Intermediate') is not null)
    DROP TABLE #tmp_Intermediate

SELECT
    *
INTO #tmp_Intermediate
FROM(
VALUES
    ('PR001', 'CR001', 1),
    ('PR001', 'CR002', 1),
    ('PR001', 'CR003', 1),
    ('CR001', 'DR001', 2),
    ('CR001', 'DR002', 2),
    ('CR002', 'DR003', 3), --LEVEL 3 or 2??
    ('DR001', 'ER001', 3),
    ('DR002', 'ER002', 3),
    ('ER002', 'DEMO4', 4) --added for demo
) x (ParentID, ChildID, Level)

Option 1: Dynamic with a WHILE Based on your intermediate table.

SET @selectTemplate = char(13)+char(10)+char(9)+',l{{iteration}}.ChildID AS [LEVEL{{iteration}}]';
SET @joinTemplate = char(13)+char(10)+'LEFT JOIN #tmp_Intermediate l{{iteration}} on l{{iteration}}.ParentID = l{{parent_iteration}}.ChildID';
SET @sqlTemplate = N'SELECT 
    l1.ParentID
    ,l1.ChildID [Level1]{{selects}}
FROM #tmp_Intermediate l1{{leftjoins}}
WHERE l1.ParentID not in (SELECT ChildId FROM #tmp_Intermediate)';


DECLARE @leftJoins nvarchar(max) = '';
DECLARE @selects nvarchar(max) = '';
DECLARE @interation int = 2;
DECLARE @maxLevel int = (SELECT MAX(LEVEL) FROM #tmp_Intermediate);
WHILE (@interation <= @maxLevel)
BEGIN   
    SET @selects += REPLACE(@selectTemplate, '{{iteration}}', CAST(@interation AS NVARCHAR))
    SET @leftJoins += REPLACE(REPLACE(
                        @joinTemplate, 
                        '{{iteration}}', CAST(@interation AS NVARCHAR)), 
                        '{{parent_iteration}}', CAST(@interation - 1 AS NVARCHAR))
    SET @interation += 1;
END

SET @sqlTemplate = REPLACE(REPLACE(
                    @sqlTemplate,
                    '{{selects}}', isnull(@selects, '')),
                    '{{leftjoins}}', isnull(@leftJoins, ''))

EXEC(@sqlTemplate);

Option 2: Dynamic with a CTE You can replace the ChainedIds with your own CTE.

DECLARE @selectTemplate nvarchar(max) = char(13)+char(10)+char(9)+',l{{iteration}}.ChildID AS [LEVEL{{iteration}}]';
DECLARE @joinTemplate nvarchar(max) = char(13)+char(10)+'LEFT JOIN #temp l{{iteration}} on l{{iteration}}.ParentID = l{{parent_iteration}}.ChildID';
DECLARE @sqlTemplate nvarchar(max) = N'SELECT 
    l1.ParentID
    ,l1.ChildID [Level1]{{selects}}
FROM #temp l1{{leftjoins}}
WHERE l1.ParentID not in (SELECT ChildId FROM #temp)';
;WITH ChainedIds AS (
        SELECT 
            ParentID,
            ChildID,
            1 [Level]
        FROM #tmp_Intermediate
        WHERE ParentID not in(SELECT ChildID FROM #tmp_Intermediate)

        UNION ALL

        SELECT 
            i.ParentID,
            i.ChildID,
            c.[Level] + 1
        FROM ChainedIds c
        JOIN #tmp_Intermediate i on i.ParentID = c.ChildID
),SqlBuilder AS(
    SELECT
        STRING_AGG(REPLACE(@selectTemplate, '{{iteration}}', CAST([Level] AS NVARCHAR)), '')    Selects,
        STRING_AGG(REPLACE(REPLACE(@joinTemplate, 
                        '{{iteration}}', CAST([Level] AS NVARCHAR)), 
                        '{{parent_iteration}}', CAST([Level] - 1 AS NVARCHAR)), '')     LeftJoins
    FROM (
        SELECT
            [Level]
        FROM ChainedIds
        WHERE Level > 1
        GROUP BY Level 
    )x
)

SELECT
    @sqlTemplate = REPLACE(REPLACE(@sqlTemplate, 
                        '{{selects}}', isnull(Selects, '')),
                        '{{leftjoins}}', isnull(LeftJoins, ''))
FROM SqlBuilder

EXEC(@sqlTemplate);

Generated Query The same query is produced by either method.

SELECT 
    l1.ParentID
    ,l1.ChildID [Level1]
    ,l2.ChildID AS [LEVEL2]
    ,l3.ChildID AS [LEVEL3]
    ,l4.ChildID AS [LEVEL4]
FROM #tmp_Intermediate l1
LEFT JOIN #tmp_Intermediate l2 on l2.ParentID = l1.ChildID
LEFT JOIN #tmp_Intermediate l3 on l3.ParentID = l2.ChildID
LEFT JOIN #tmp_Intermediate l4 on l4.ParentID = l3.ChildID
WHERE l1.ParentID not in (SELECT ChildId FROM #tmp_Intermediate)

Results

ParentID ChildID LEVEL2 LEVEL3 LEVEL4
PR001 CR001 DR001 ER001 NULL
PR001 CR001 DR002 ER002 DEMO4
PR001 CR002 DR003 NULL NULL
PR001 CR003 NULL NULL NULL

Both approaches build the query that left joins children to the parent and maintains the chained ids.

If you do something like this, I would put the code in a stored procedure to keep it more orderly.

1
Sohail Aslam On
Create Table temp(ParentID NVARCHAR(10), ChildID  NVARCHAR(10))


INSERT INTO temp VALUES('PR001', 'CR001');
INSERT INTO temp VALUES('PR001', 'CR002');
INSERT INTO temp VALUES('PR001', 'CR003');
INSERT INTO temp VALUES('CR001', 'DR001');
INSERT INTO temp VALUES('CR001', 'DR002');
INSERT INTO temp VALUES('CR002', 'DR003');
INSERT INTO temp VALUES('DR001', 'ER001');
INSERT INTO temp VALUES('ER001', 'ER003');
INSERT INTO temp VALUES('ER001', 'ER004');
INSERT INTO temp VALUES('ER001', 'ER005');
INSERT INTO temp VALUES('ER002', 'ER006');
INSERT INTO temp VALUES('PR001', 'ER007');
INSERT INTO temp VALUES('PR001', 'ER007');
INSERT INTO temp VALUES('PR001', 'ER007');

DECLARE @DynamicPivotQuery AS NVARCHAR(MAX);
DECLARE @ColumnList AS NVARCHAR(MAX);

SET @ColumnList = (
    SELECT DISTINCT 
        ', MAX(CASE WHEN Level = ' + CAST(Level AS NVARCHAR(MAX)) + ' THEN ChildID END) AS Level_' + CAST(Level AS NVARCHAR(MAX))
    FROM (
        SELECT ROW_NUMBER() OVER (PARTITION BY ParentID ORDER BY ChildID) AS Level
        FROM temp
    ) AS Levels
    FOR XML PATH('')
);

SET @DynamicPivotQuery = '
WITH NumberedHierarchy AS (
    SELECT 
        ParentID,
        ChildID,
        ROW_NUMBER() OVER (PARTITION BY ParentID ORDER BY ChildID) AS Level
    FROM 
        temp
        
),
PivotedData AS (
    SELECT 
        ParentID' + @ColumnList + '
    FROM 
        NumberedHierarchy
    GROUP BY 
        ParentID
        
        
)
SELECT * FROM PivotedData';

EXEC sp_executesql @DynamicPivotQuery;

Demo: https://dbfiddle.uk/7a1N_jxW

Hope it solves the issue.