Skip to content

Commit 051c03b

Browse files
Blank node replace fix (#71)
2 parents 160e371 + f7093ac commit 051c03b

File tree

1 file changed

+60
-21
lines changed

1 file changed

+60
-21
lines changed

dkg/utils/knowledge_collection_tools.py

Lines changed: 60 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from pyld import jsonld
77
from dkg.constants import DEFAULT_RDF_FORMAT, DEFAULT_CANON_ALGORITHM, ESCAPE_MAP
88
from rdflib import Graph, BNode, URIRef, Literal as RDFLiteral
9+
from rdflib.exceptions import ParserError as RDFParserError
910
from uuid import uuid4
1011
from web3 import Web3
1112
import math
@@ -154,29 +155,26 @@ def replace_blank_node(term):
154155

155156
return term # Return IRIs or Literals unchanged
156157

157-
# Create a temporary graph for parsing individual quads
158-
result = []
159-
160-
# Process each N-Quad string individually to maintain order
161-
for nquad in nquads_list:
162-
if not nquad.strip():
163-
continue
158+
all_nquads = "\n".join(nquad for nquad in nquads_list if nquad.strip())
164159

165-
# Parse single N-Quad
166-
g = Graph()
167-
g.parse(data=nquad, format="nquads")
160+
# Create a single Dataset
161+
g = Graph()
162+
try:
163+
g.parse(data=all_nquads, format="nt")
164+
except RDFParserError:
165+
raise UnsupportedJSONLD(nquads_list)
168166

169-
# Get the triple and replace blank nodes
170-
for s, p, o in g:
171-
updated_quad = (
172-
replace_blank_node(s),
173-
replace_blank_node(p),
174-
replace_blank_node(o),
175-
)
176-
# Format as N-Quad string
177-
result.append(
178-
f"{updated_quad[0].n3()} {updated_quad[1].n3()} {updated_quad[2].n3()} ."
179-
)
167+
# Process all quads
168+
result = []
169+
for s, p, o in g:
170+
updated_quad = (
171+
replace_blank_node(s),
172+
replace_blank_node(p),
173+
replace_blank_node(o),
174+
)
175+
result.append(
176+
f"{updated_quad[0].n3()} {updated_quad[1].n3()} {updated_quad[2].n3()} ."
177+
)
180178

181179
return result
182180

@@ -266,3 +264,44 @@ def escape_literal_dict(obj):
266264
return escape_literal_string(s=obj)
267265
else:
268266
return obj
267+
268+
269+
# Used when JSON-LD parsing fails due to quads being passed instead of triples
270+
class UnsupportedJSONLD(Exception):
271+
def __init__(self, nquads_list):
272+
self.nquads_list = nquads_list
273+
self.message = f"""
274+
Unsupported JSON-LD input detected
275+
276+
After parsing the JSON-LD input, the parser detected creation of new named graphs.
277+
The DKG does not support custom named graphs.
278+
279+
Problematic Quads:
280+
281+
{self.find_problematic_quads()}
282+
283+
Full Parsed N-Quads Array:
284+
285+
{self.format_nquads_list()}
286+
287+
"""
288+
super().__init__(self.message)
289+
290+
def __str__(self):
291+
return f"{self.__class__.__name__}: {self.message}"
292+
293+
def format_nquads_list(self):
294+
return "\n".join(nquad.strip() for nquad in self.nquads_list)
295+
296+
def find_problematic_quads(self):
297+
problematic = []
298+
g = Graph()
299+
for quad in self.nquads_list:
300+
if not quad.strip():
301+
continue
302+
try:
303+
g.parse(data=quad, format="nt")
304+
except RDFParserError:
305+
problematic.append(quad)
306+
307+
return "\n".join(f"{i + 1}. {quad}" for i, quad in enumerate(problematic))

0 commit comments

Comments
 (0)