|
6 | 6 | from pyld import jsonld |
7 | 7 | from dkg.constants import DEFAULT_RDF_FORMAT, DEFAULT_CANON_ALGORITHM, ESCAPE_MAP |
8 | 8 | from rdflib import Graph, BNode, URIRef, Literal as RDFLiteral |
| 9 | +from rdflib.exceptions import ParserError as RDFParserError |
9 | 10 | from uuid import uuid4 |
10 | 11 | from web3 import Web3 |
11 | 12 | import math |
@@ -154,29 +155,26 @@ def replace_blank_node(term): |
154 | 155 |
|
155 | 156 | return term # Return IRIs or Literals unchanged |
156 | 157 |
|
157 | | - # Create a temporary graph for parsing individual quads |
158 | | - result = [] |
159 | | - |
160 | | - # Process each N-Quad string individually to maintain order |
161 | | - for nquad in nquads_list: |
162 | | - if not nquad.strip(): |
163 | | - continue |
| 158 | + all_nquads = "\n".join(nquad for nquad in nquads_list if nquad.strip()) |
164 | 159 |
|
165 | | - # Parse single N-Quad |
166 | | - g = Graph() |
167 | | - g.parse(data=nquad, format="nquads") |
| 160 | + # Create a single Dataset |
| 161 | + g = Graph() |
| 162 | + try: |
| 163 | + g.parse(data=all_nquads, format="nt") |
| 164 | + except RDFParserError: |
| 165 | + raise UnsupportedJSONLD(nquads_list) |
168 | 166 |
|
169 | | - # Get the triple and replace blank nodes |
170 | | - for s, p, o in g: |
171 | | - updated_quad = ( |
172 | | - replace_blank_node(s), |
173 | | - replace_blank_node(p), |
174 | | - replace_blank_node(o), |
175 | | - ) |
176 | | - # Format as N-Quad string |
177 | | - result.append( |
178 | | - f"{updated_quad[0].n3()} {updated_quad[1].n3()} {updated_quad[2].n3()} ." |
179 | | - ) |
| 167 | + # Process all quads |
| 168 | + result = [] |
| 169 | + for s, p, o in g: |
| 170 | + updated_quad = ( |
| 171 | + replace_blank_node(s), |
| 172 | + replace_blank_node(p), |
| 173 | + replace_blank_node(o), |
| 174 | + ) |
| 175 | + result.append( |
| 176 | + f"{updated_quad[0].n3()} {updated_quad[1].n3()} {updated_quad[2].n3()} ." |
| 177 | + ) |
180 | 178 |
|
181 | 179 | return result |
182 | 180 |
|
@@ -266,3 +264,44 @@ def escape_literal_dict(obj): |
266 | 264 | return escape_literal_string(s=obj) |
267 | 265 | else: |
268 | 266 | return obj |
| 267 | + |
| 268 | + |
| 269 | +# Used when JSON-LD parsing fails due to quads being passed instead of triples |
| 270 | +class UnsupportedJSONLD(Exception): |
| 271 | + def __init__(self, nquads_list): |
| 272 | + self.nquads_list = nquads_list |
| 273 | + self.message = f""" |
| 274 | +Unsupported JSON-LD input detected |
| 275 | +
|
| 276 | +After parsing the JSON-LD input, the parser detected creation of new named graphs. |
| 277 | +The DKG does not support custom named graphs. |
| 278 | +
|
| 279 | +Problematic Quads: |
| 280 | +
|
| 281 | +{self.find_problematic_quads()} |
| 282 | +
|
| 283 | +Full Parsed N-Quads Array: |
| 284 | +
|
| 285 | +{self.format_nquads_list()} |
| 286 | +
|
| 287 | +""" |
| 288 | + super().__init__(self.message) |
| 289 | + |
| 290 | + def __str__(self): |
| 291 | + return f"{self.__class__.__name__}: {self.message}" |
| 292 | + |
| 293 | + def format_nquads_list(self): |
| 294 | + return "\n".join(nquad.strip() for nquad in self.nquads_list) |
| 295 | + |
| 296 | + def find_problematic_quads(self): |
| 297 | + problematic = [] |
| 298 | + g = Graph() |
| 299 | + for quad in self.nquads_list: |
| 300 | + if not quad.strip(): |
| 301 | + continue |
| 302 | + try: |
| 303 | + g.parse(data=quad, format="nt") |
| 304 | + except RDFParserError: |
| 305 | + problematic.append(quad) |
| 306 | + |
| 307 | + return "\n".join(f"{i + 1}. {quad}" for i, quad in enumerate(problematic)) |
0 commit comments