I use rdflib python library to model a graph of contacts, and perform sparql queries to retrieve who knows who. This works fine when people as added as URIRef, but not when using BNode.
The example graph can be represented as follow:
bob - knows -> linda
alice - knows -> linda
tom - knows -> linda
knows -> bob
Only Tom knows Bob, and no one knows Tom.
I perform the following 2 queries:
- The first one to retrieves Tom; it works as expected.
- In the second query, I use Tom node id to retrieve who knows him. I expect an empty list. When Tom is added as a
URIRef, it works as expected. However, when Tom is added as aBNode, this query returns 3 names!
use_blank_node = True # switch to see the undesired behavior happens only with blank node
pred_knows = URIRef("http://example.org/knows")
pred_named = URIRef("http://example.org/named")
def create_graph() -> Graph:
graph = Graph()
bob = URIRef("http://example.org/people/Bob")
linda = BNode() # a GUID is generated
alice = BNode()
tom = BNode() if use_blank_node else URIRef("http://example.org/people/Tom")
print(f"{str(tom)=}")
remy = BNode()
graph.add((bob, pred_named, Literal("Bob")))
graph.add((alice, pred_named, Literal("Alice")))
graph.add((tom, pred_named, Literal("Tom")))
graph.add((linda, pred_named, Literal("Linda")))
graph.add((remy, pred_named, Literal("Remy")))
graph.add((bob, pred_knows, linda))
graph.add((alice, pred_knows, linda))
graph.add((tom, pred_knows, linda))
graph.add((tom, pred_knows, bob))
return graph
find_tom_who_knows_bob_query = f"""SELECT DISTINCT ?knowsbob ?nameofwhoknowsbob
WHERE
{{ ?knowsbob <{pred_knows}> <http://example.org/people/Bob> ;
<{pred_named}> ?nameofwhoknowsbob .
}}"""
def find_who_know_tom(tom_id) -> str:
tom_query = f"_:{tom_id}" if type(tom_id) is BNode else f"<{tom_id}>"
return f"""SELECT DISTINCT ?nameOfWhoKnowsTom
WHERE
{{ ?iriOfWhoKnowsTom <{pred_knows}> {tom_query} ;
<{pred_named}> ?nameOfWhoKnowsTom}}"""
def main():
graph = create_graph()
print("=" * 60, "\n", graph.serialize(), "\n", "=" * 60)
result = list(graph.query(find_tom_who_knows_bob_query))
assert len(result) == 1 and len(result[0]) == 2
tom_id = result[0][0]
print(f"{str(tom_id)=}")
assert (type(tom_id) == BNode and use_blank_node) or (type(tom_id) == URIRef and use_blank_node is False)
assert str(result[0][1]) == "Tom"
query = find_who_know_tom(tom_id)
print(query)
result = list(graph.query(query))
print(
"They know Tom:", ", ".join([str(r[0]) for r in result])
) # why is it not empty when use_blank_node = True
# prints: "They know Tom: Bob, Alice, Tom"
if __name__ == "__main__":
main()
My question: how to correctly use sparql so that the query also works with blank node ?
Blank nodes are similar to free variables. From SPARQL 1.1 Query Language:
Your second query, instead of
should be like this one:
See also the blank-nodes tag info.