Get missing columns from a list of expected columns in a table

943 views Asked by At

The test has a list of expected columns which verifies if the actual table has them. The assertion is if the counts are equal in expected and actual, then all columns are present.

How to extract which columns are missing to put into a more meaningful error message?

Thanks for your help.

-- Verify added columns exist
CREATE PROC [testComplianceMaintenanceReporting].[test FactPropertyLatestRepairJobAgg_ColumnsExist]
AS
BEGIN
    SET NOCOUNT ON

    -- Assemble
    IF OBJECT_ID('tempdb..#TempExpected') IS NOT NULL
    BEGIN
        DROP TABLE #TempExpected
    END
    CREATE TABLE #TempExpected
    (
        Id INT,
        ColumnName NVARCHAR(50)
    )
    INSERT INTO #TempExpected
        VALUES
            (1, 'TenancyKey'),
            (2, 'TenancyHouseholdTypeKey'),
            (3, 'HomeVisitKeyLatest'),
            (4, 'HomeVisitDueStatusKey'),
            (5, 'HousingApplicationKey'),
            (6, 'RepairAreaKeyPropertyDefault'),
            (7, 'ContractKeyPropertyDefaultPMSCService'),
            (8, 'ContractorKeyPropertyDefaultHeadContractor'),
            (9, 'NextPeriodicHomeVisitDate'),
            (10, 'CountOfActiveLeaseAgreements')

    -- Action
    IF OBJECT_ID('tempdb..#TempActual') IS NOT NULL
    BEGIN
        DROP TABLE #TempActual
    END
    SELECT * INTO #TempActual
    FROM
    (
        SELECT
            OBJECT_ID,
            NAME
        FROM SYS.COLUMNS
        WHERE
            OBJECT_ID = OBJECT_ID('dm.Fact_PropertyLatestRepairJobAgg')
            AND NAME IN (SELECT ColumnName FROM #TempExpected)
    ) [TempActual]

    -- Assert
    DECLARE @expectedCount INT = (
        SELECT COUNT(*) FROM #TempExpected
    )
    DECLARE @actualCount INT = (
        SELECT COUNT(*) FROM #TempActual
    )
    EXEC tSQLt.AssertEquals
    @Expected = @expectedCount,
    @Actual = @actualCount,
    @Message = 'There are missing columns'
END
GO
3

There are 3 answers

1
SteveC On BEST ANSWER

Where you assert the test condition for @actualCount I believe you mean to INNER JOIN to the #TempExpected table. That would confirm that the actual values are the correct values. Otherwise the actual rows might not contain the expected ColumnNames. To create the list of missing column names the query uses STRING_AGG and NOT EXISTS. Something like this

    -- Assert
    DECLARE @expectedCount INT = (
        SELECT COUNT(*) FROM #TempExpected
    );
    DECLARE @actualCount INT = (
        SELECT COUNT(*) 
        FROM #TempActual ta
             join #expectedCount ec on ta.ColumnName=ec.ColumnName 
    );
    DECLARE @missing_ColumnName_message varchar(200);
    select @missing_ColumnName_message = (
        select concat('There are missing columns: ', string_agg(ec.ColumnName, ','))
        from #expectedCount ec
        where not exists(select 1
                         from #TempActual ta
                         where ta.ColumnName=ec.ColumnName)
    );
    EXEC tSQLt.AssertEquals
    @Expected = @expectedCount,
    @Actual = @actualCount,
    @Message = @missing_ColumnName_message;

Older versions of SQL Server without STRING_AGG could use STUFF and FOR XML

-- Assert
    DECLARE @expectedCount INT = (
        SELECT COUNT(*) FROM #TempExpected
    );
    DECLARE @actualCount INT = (
        SELECT COUNT(*) 
        FROM #TempActual ta
             join #expectedCount ec on ta.ColumnName=ec.ColumnName 
    );
    DECLARE @missing_ColumnName_message varchar(200);
    select @missing_ColumnName_message = (
        select concat('There are missing columns: ',
                      (stuff((select ', ' + ec.ColumnName
                       from #expectedCount ec
                       where not exists(select 1
                                        from #TempActual ta
                                        where ta.ColumnName=ec.ColumnName)
                       order by 1
                       for xml path('')),1,1,'')))
    );
    EXEC tSQLt.AssertEquals
    @Expected = @expectedCount,
    @Actual = @actualCount,
    @Message = @missing_ColumnName_message;
2
datacentricity On

tSQLt already has a built in assert for this, which validates column names, position, data type and nullability. Also, in my opinion a more elegant solution:

create procedure [UserProfileTests].[test Attribute column structure]
as
begin
    create table [UserProfileTests].[expected]
    (
      AttributeId int not null
    , AttributeName varchar(50) not null
    , DotNetType varchar(100) null
    , Narrative varchar(500) null,
    );

    exec tSQLt.AssertEqualsTableSchema '[UserProfileTests].[expected]', 'UserProfile.Attribute';
end;
go

... and the results you get back will allow you to quickly identify what's missing/invalid

0
k_rollo On

This is eventually what I ended up with thanks to @SteveC:

    -- Assert
    DECLARE @expectedCount INT = (
        SELECT COUNT(*) FROM #TempExpected
    )
    DECLARE @actualCount INT = (
        SELECT COUNT(*) 
        FROM #TempActual [TA]
        JOIN #TempExpected [TE] ON TA.Name = TE.ColumnName 
    )
    DECLARE @missingColumns NVARCHAR(MAX)
    SELECT @missingColumns = COALESCE(@missingColumns + ', ' + MissingColumn, MissingColumn) 
        FROM
        (
            SELECT TE.ColumnName [MissingColumn]
            FROM #TempExpected [TE]
            WHERE NOT EXISTS
            (
                SELECT 1
                FROM #TempActual [TA]
                WHERE TA.Name = TE.ColumnName
            )           
        ) [MissingColumns]
    DECLARE @errMsg NVARCHAR(MAX) = (
        SELECT CONCAT('Missing columns: ', @missingColumns)
    )
    EXEC tSQLt.AssertEquals
        @Expected = @expectedCount,
        @Actual = @actualCount,
        @Message = @errMsg

Output:

enter image description here

Reference using COALESCE:

https://www.mytecbits.com/microsoft/sql-server/concatenate-multiple-rows-into-single-string