sql server graph query, find all paths to node

2.1k views Asked by At

I have a very common problem I am trying to solve using graph queries (sql server 2017).

enter image description here

  1. I want to build a query and find how anyone in the nodes is connected to C.
  2. I want to build a query and find how anyone in the nodes is connected to C (with 1 or 2 connections).

here is the full script to create this graph:

DROP TABLE IF EXISTS Person;
CREATE TABLE Person (userName VARCHAR(100)  PRIMARY KEY) AS NODE;

INSERT INTO Person (userName) VALUES ('A'),('B'),('C'),('D'),('E'),('F'); 

DROP TABLE IF EXISTS Follow; 
CREATE TABLE Follow AS EDGE;

INSERT INTO Follow ($from_id, $to_id) VALUES (
   (SELECT $node_id FROM dbo.Person WHERE userName = 'A'),
   (SELECT $node_id FROM dbo.Person WHERE userName = 'E')),

   ((SELECT $node_id FROM dbo.Person WHERE userName = 'E'),
   (SELECT $node_id FROM dbo.Person WHERE userName = 'C')),


   ((SELECT $node_id FROM dbo.Person WHERE userName = 'C'),
   (SELECT $node_id FROM dbo.Person WHERE userName = 'A')),


   ((SELECT $node_id FROM dbo.Person WHERE userName = 'A'),
   (SELECT $node_id FROM dbo.Person WHERE userName = 'F')),

   ((SELECT $node_id FROM dbo.Person WHERE userName = 'F'),
   (SELECT $node_id FROM dbo.Person WHERE userName = 'B')),

   ((SELECT $node_id FROM dbo.Person WHERE userName = 'B'),
   (SELECT $node_id FROM dbo.Person WHERE userName = 'F')),

   ((SELECT $node_id FROM dbo.Person WHERE userName = 'B'),
   (SELECT $node_id FROM dbo.Person WHERE userName = 'E')),

   ((SELECT $node_id FROM dbo.Person WHERE userName = 'E'),
   (SELECT $node_id FROM dbo.Person WHERE userName = 'B'));

this query is not working since it is giving me only the direct relation:

SELECT Person1.userName as userName1, Person2.userName as userName2   
FROM Person as Person1, Follow, Person as Person2 
WHERE MATCH(Person1-(Follow)->Person2)
AND Person2.userName = 'C'
1

There are 1 answers

3
sacse On

you can try something like below:

SELECT 
        p1.userName, 
        p1.userName as StartNode,
        LAST_VALUE(p2.userName) WITHIN GROUP (GRAPH PATH) AS FinalNode,
        STRING_AGG(p2.userName,'->') WITHIN GROUP (GRAPH PATH) AS [Edges Path],
        COUNT(p2.userName) WITHIN GROUP (GRAPH PATH) AS Levels
    FROM
        dbo.Person p1,
        dbo.Person FOR PATH p2,
        dbo.Follow FOR PATH Follow
    WHERE 
        MATCH(SHORTEST_PATH(p1(-(Follow)->p2)+))
        AND p1.userName = 'C';

to find all the incoming connection for a node, we need to wrap the query and filter for the final node like below:

SELECT
    username, StartNode, [Edges Path], FinalNode, Levels
FROM (
    SELECT 
        P1.username, 
        P1.username as StartNode, 
        STRING_AGG(P2.userName,'->') WITHIN GROUP (GRAPH PATH) AS [Edges Path],
        LAST_VALUE(P2.userName) WITHIN GROUP (GRAPH PATH) AS FinalNode,
        COUNT(P2.userName) WITHIN GROUP (GRAPH PATH) AS Levels
    FROM
        Person P1,
        Person FOR PATH P2,
        Follow FOR PATH Follow
    WHERE 
        MATCH(SHORTEST_PATH(P1(-(Follow)->P2)))
 ) AS Q
 WHERE Q.FinalNode = 'C'

to limit the levels or the number of hops, we can provide the recursion quantifiers in place of (+ --- one or more) like below:

SELECT
    username, StartNode, [Edges Path], FinalNode, Levels
FROM (
    SELECT 
        P1.username, 
        P1.username as StartNode, 
        STRING_AGG(P2.userName,'->') WITHIN GROUP (GRAPH PATH) AS [Edges Path],
        LAST_VALUE(P2.userName) WITHIN GROUP (GRAPH PATH) AS FinalNode,
        COUNT(P2.userName) WITHIN GROUP (GRAPH PATH) AS Levels
    FROM
        Person P1,
        Person FOR PATH P2,
        Follow FOR PATH Follow
    WHERE 
        MATCH(SHORTEST_PATH(P1(-(Follow)->P2){1,3}))
 ) AS Q
 WHERE Q.FinalNode = 'C'