Merge branch 'main' into n3-no-norm-float

nicholascar · web-flow · commit fbe7881a3957 · 2025-01-06T18:16:19.000+10:00
diff --git a/poetry.lock b/poetry.lock
diff --git a/rdflib/plugins/parsers/jsonld.py b/rdflib/plugins/parsers/jsonld.py
@@ -34,6 +34,7 @@
 # we should consider streaming the input to deal with arbitrarily large graphs.
 from __future__ import annotations
 
+import secrets
 import warnings
 from collections.abc import Iterable
 from typing import TYPE_CHECKING, Any, Union
@@ -215,6 +216,7 @@ def __init__(
             if allow_lists_of_lists is not None
             else ALLOW_LISTS_OF_LISTS
         )
+        self.invalid_uri_to_bnode: dict[str, BNode] = {}
 
     def parse(self, data: Any, context: Context, dataset: Graph) -> Graph:
         topcontext = False
@@ -623,7 +625,12 @@ def _to_rdf_id(self, context: Context, id_val: str) -> IdentifiedNode | None:
             uri = context.resolve(id_val)
             if not self.generalized_rdf and ":" not in uri:
                 return None
-            return URIRef(uri)
+            node: IdentifiedNode = URIRef(uri)
+            if not str(node):
+                if id_val not in self.invalid_uri_to_bnode:
+                    self.invalid_uri_to_bnode[id_val] = BNode(secrets.token_urlsafe(20))
+                node = self.invalid_uri_to_bnode[id_val]
+            return node
 
     def _get_bnodeid(self, ref: str) -> str | None:
         if not ref.startswith("_:"):
diff --git a/rdflib/plugins/serializers/longturtle.py b/rdflib/plugins/serializers/longturtle.py
@@ -20,7 +20,9 @@
 
 from typing import IO, Any, Optional
 
+from rdflib.compare import to_canonical_graph
 from rdflib.exceptions import Error
+from rdflib.graph import Graph
 from rdflib.namespace import RDF
 from rdflib.term import BNode, Literal, URIRef
 
@@ -42,7 +44,16 @@ class LongTurtleSerializer(RecursiveSerializer):
 
     def __init__(self, store):
         self._ns_rewrite = {}
-        super(LongTurtleSerializer, self).__init__(store)
+        store = to_canonical_graph(store)
+        content = store.serialize(format="application/n-triples")
+        lines = content.split("\n")
+        lines.sort()
+        graph = Graph()
+        graph.parse(
+            data="\n".join(lines), format="application/n-triples", skolemize=True
+        )
+        graph = graph.de_skolemize()
+        super(LongTurtleSerializer, self).__init__(graph)
         self.keywords = {RDF.type: "a"}
         self.reset()
         self.stream = None
@@ -186,7 +197,7 @@ def s_squared(self, subject):
             return False
         self.write("\n" + self.indent() + "[]")
         self.predicateList(subject, newline=False)
-        self.write(" ;\n.")
+        self.write("\n.")
         return True
 
     def path(self, node, position, newline=False):
@@ -293,34 +304,7 @@ def predicateList(self, subject, newline=False):
     def verb(self, node, newline=False):
         self.path(node, VERB, newline)
 
-    def sortObjects(
-        self, values: list[URIRef | BNode | Literal]
-    ) -> list[URIRef | BNode | Literal]:
-        """
-        Perform a sort on the values where each value is a blank node. Grab the CBD of the
-        blank node and sort it by its longturtle serialization value.
-
-        Identified nodes come first and the sorted blank nodes are tacked on after.
-        """
-        bnode_map: dict[BNode, list[str]] = {}
-        objects = []
-        for value in values:
-            if isinstance(value, BNode):
-                bnode_map[value] = []
-            else:
-                objects.append(value)
-
-        for bnode in bnode_map:
-            cbd = self.store.cbd(bnode).serialize(format="longturtle")
-            bnode_map[bnode].append(cbd)
-
-        sorted_bnodes = sorted(
-            [(k, v) for k, v in bnode_map.items()], key=lambda x: x[1]
-        )
-        return objects + [x[0] for x in sorted_bnodes]
-
     def objectList(self, objects):
-        objects = self.sortObjects(objects)
         count = len(objects)
         if count == 0:
             return
diff --git a/test/data/longturtle/longturtle-target.ttl b/test/data/longturtle/longturtle-target.ttl
@@ -1,74 +1,72 @@
-PREFIX cn: <https://linked.data.gov.au/def/cn/>
-PREFIX ex: <http://example.com/>
 PREFIX geo: <http://www.opengis.net/ont/geosparql#>
 PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
-PREFIX sdo: <https://schema.org/>
+PREFIX schema: <https://schema.org/>
 PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
 
-ex:nicholas
-    a sdo:Person ;
-    sdo:age 41 ;
-    sdo:alternateName
-        "N.J. Car" ,
-        "Nick Car" ,
+<http://example.com/nicholas>
+    a schema:Person ;
+    schema:age 41 ;
+    schema:alternateName 
         [
-            sdo:name "Dr N.J. Car" ;
-        ] ;
-    sdo:name
+            schema:name "Dr N.J. Car" ;
+        ] ,
+        "N.J. Car" ,
+        "Nick Car" ;
+    schema:name
         [
-            a cn:CompoundName ;
-            sdo:hasPart 
-                [
-                    a cn:CompoundName ;
-                    rdf:value "John" ;
-                ] ,
+            a <https://linked.data.gov.au/def/cn/CompoundName> ;
+            schema:hasPart 
                 [
-                    a cn:CompoundName ;
-                    rdf:value "Nicholas" ;
-                ] ,
-                [
-                    a cn:CompoundName ;
-                    sdo:hasPart 
+                    a <https://linked.data.gov.au/def/cn/CompoundName> ;
+                    schema:hasPart 
                         [
-                            a cn:CompoundName ;
+                            a <https://linked.data.gov.au/def/cn/CompoundName> ;
                             rdf:value "Car" ;
                         ] ,
                         [
-                            a cn:CompoundName ;
+                            a <https://linked.data.gov.au/def/cn/CompoundName> ;
                             rdf:value "Maxov" ;
                         ] ;
+                ] ,
+                [
+                    a <https://linked.data.gov.au/def/cn/CompoundName> ;
+                    rdf:value "Nicholas" ;
+                ] ,
+                [
+                    a <https://linked.data.gov.au/def/cn/CompoundName> ;
+                    rdf:value "John" ;
                 ] ;
         ] ;
-    sdo:worksFor <https://kurrawong.ai> ;
+    schema:worksFor <https://kurrawong.ai> ;
 .
 
 <https://kurrawong.ai>
-    a sdo:Organization ;
-    sdo:location <https://kurrawong.ai/hq> ;
+    a schema:Organization ;
+    schema:location <https://kurrawong.ai/hq> ;
 .
 
 <https://kurrawong.ai/hq>
-    a sdo:Place ;
-    sdo:address
+    a schema:Place ;
+    schema:address
         [
-            a sdo:PostalAddress ;
-            sdo:addressCountry
+            a schema:PostalAddress ;
+            schema:addressCountry
                 [
-                    sdo:identifier "au" ;
-                    sdo:name "Australia" ;
+                    schema:identifier "au" ;
+                    schema:name "Australia" ;
                 ] ;
-            sdo:addressLocality "Shorncliffe" ;
-            sdo:addressRegion "QLD" ;
-            sdo:postalCode 4017 ;
-            sdo:streetAddress (
+            schema:addressLocality "Shorncliffe" ;
+            schema:addressRegion "QLD" ;
+            schema:postalCode 4017 ;
+            schema:streetAddress (
                 72
                 "Yundah"
                 "Street"
             ) ;
         ] ;
-    sdo:geo
+    schema:geo
         [
-            sdo:polygon "POLYGON((153.082403 -27.325801, 153.08241 -27.32582, 153.082943 -27.325612, 153.083010 -27.325742, 153.083543 -27.325521, 153.083456 -27.325365, 153.082403 -27.325801))"^^geo:wktLiteral ;
+            schema:polygon "POLYGON((153.082403 -27.325801, 153.08241 -27.32582, 153.082943 -27.325612, 153.083010 -27.325742, 153.083543 -27.325521, 153.083456 -27.325365, 153.082403 -27.325801))"^^geo:wktLiteral ;
         ] ;
-    sdo:name "KurrawongAI HQ" ;
+    schema:name "KurrawongAI HQ" ;
 .
diff --git a/test/jsonld/local-suite/manifest.jsonld b/test/jsonld/local-suite/manifest.jsonld
@@ -27,6 +27,17 @@
       "purpose": "Multiple @id aliases.  Issue #2164",
       "input": "toRdf-twoimports-in.jsonld",
       "expect": "toRdf-twoimports-out.nq"
+    },
+    {
+      "@id": "#toRdf-two-invalid-ids",
+      "@type": ["jld:PositiveEvaluationTest", "jld:ToRDFTest"],
+      "name": "Two invalid identifiers",
+      "purpose": "Multiple nodes with invalid @ids are not merged together.",
+      "option": {
+        "produceGeneralizedRdf": true
+      },
+      "input": "toRdf-twoinvalidids-in.jsonld",
+      "expect": "toRdf-twoinvalidids-out.nq"
     }
   ]
 }
diff --git a/test/jsonld/local-suite/toRdf-twoinvalidids-in.jsonld b/test/jsonld/local-suite/toRdf-twoinvalidids-in.jsonld
@@ -0,0 +1,20 @@
+{
+    "@id": "https://example.org/root-object",
+    "https://schema.org/author": [
+        {
+            "@id": "https://example.org/ invalid url 1",
+            "https://schema.org/name": "Jane Doe"
+        },
+        {
+            "@id": "https://example.org/ invalid url 1",
+            "https://schema.org/givenName": "Jane",
+            "https://schema.org/familyName": "Doe"
+        },
+        {
+            "@id": "https://example.org/ invalid url 2",
+            "https://schema.org/name": "John Doe",
+            "https://schema.org/givenName": "John",
+            "https://schema.org/familyName": "Doe"
+        }
+    ]
+}
diff --git a/test/jsonld/local-suite/toRdf-twoinvalidids-out.nq b/test/jsonld/local-suite/toRdf-twoinvalidids-out.nq
@@ -0,0 +1,10 @@
+
+<https://example.org/root-object> <https://schema.org/author> _:b1.
+<https://example.org/root-object> <https://schema.org/author> _:b2.
+
+_:b1 <https://schema.org/name> "Jane Doe".
+_:b1 <https://schema.org/givenName> "Jane".
+_:b1 <https://schema.org/familyName> "Doe".
+_:b2 <https://schema.org/name> "John Doe".
+_:b2 <https://schema.org/givenName> "John".
+_:b2 <https://schema.org/familyName> "Doe".
diff --git a/test/test_serializers/test_serializer_longturtle_sort.py b/test/test_serializers/test_serializer_longturtle_sort.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+
+# Portions of this file contributed by NIST are governed by the
+# following statement:
+#
+# This software was developed at the National Institute of Standards
+# and Technology by employees of the Federal Government in the course
+# of their official duties. Pursuant to Title 17 Section 105 of the
+# United States Code, this software is not subject to copyright
+# protection within the United States. NIST assumes no responsibility
+# whatsoever for its use by other parties, and makes no guarantees,
+# expressed or implied, about its quality, reliability, or any other
+# characteristic.
+#
+# We would appreciate acknowledgement if the software is used.
+
+import random
+from collections import defaultdict
+
+from rdflib import RDFS, BNode, Graph, Literal, Namespace, URIRef
+
+EX = Namespace("http://example.org/ex/")
+
+
+def test_sort_semiblank_graph() -> None:
+    """
+    This test reviews whether the output of the Turtle form is
+    consistent when involving repeated generates with blank nodes.
+    """
+
+    serialization_counter: defaultdict[str, int] = defaultdict(int)
+
+    first_graph_text: str = ""
+
+    # Use a fixed sequence of once-but-no-longer random values for more
+    # consistent test results.
+    nonrandom_shuffler = random.Random(1234)
+    for x in range(1, 10):
+        graph = Graph()
+        graph.bind("ex", EX)
+        graph.bind("rdfs", RDFS)
+
+        graph.add((EX.A, RDFS.comment, Literal("Thing A")))
+        graph.add((EX.B, RDFS.comment, Literal("Thing B")))
+        graph.add((EX.C, RDFS.comment, Literal("Thing C")))
+
+        nodes: list[URIRef] = [EX.A, EX.B, EX.C, EX.B]
+        nonrandom_shuffler.shuffle(nodes)
+        for node in nodes:
+            # Instantiate one bnode per URIRef node.
+            graph.add((BNode(), RDFS.seeAlso, node))
+
+        nesteds: list[URIRef] = [EX.A, EX.B, EX.C]
+        nonrandom_shuffler.shuffle(nesteds)
+        for nested in nesteds:
+            # Instantiate a nested node reference.
+            outer_node = BNode()
+            inner_node = BNode()
+            graph.add((outer_node, EX.has, inner_node))
+            graph.add((inner_node, RDFS.seeAlso, nested))
+
+        graph_text = graph.serialize(format="longturtle", sort=True)
+        if first_graph_text == "":
+            first_graph_text = graph_text
+
+        serialization_counter[graph_text] += 1
+
+    expected_serialization = """\
+PREFIX ns1: <http://example.org/ex/>
+PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+
+ns1:A
+    rdfs:comment "Thing A" ;
+.
+
+ns1:C
+    rdfs:comment "Thing C" ;
+.
+
+ns1:B
+    rdfs:comment "Thing B" ;
+.
+
+[]    ns1:has
+        [
+            rdfs:seeAlso ns1:A ;
+        ] ;
+.
+
+[]    rdfs:seeAlso ns1:B ;
+.
+
+[]    ns1:has
+        [
+            rdfs:seeAlso ns1:C ;
+        ] ;
+.
+
+[]    rdfs:seeAlso ns1:A ;
+.
+
+[]    rdfs:seeAlso ns1:C ;
+.
+
+[]    rdfs:seeAlso ns1:B ;
+.
+
+[]    ns1:has
+        [
+            rdfs:seeAlso ns1:B ;
+        ] ;
+.
+
+"""
+
+    assert expected_serialization.strip() == first_graph_text.strip()
+    assert 1 == len(serialization_counter)

Original file line number	Diff line number	Diff line change
`@@ -27,6 +27,17 @@`
`27`	`27`	`"purpose": "Multiple @id aliases. Issue #2164",`
`28`	`28`	`"input": "toRdf-twoimports-in.jsonld",`
`29`	`29`	`"expect": "toRdf-twoimports-out.nq"`
	`30`	`+ },`
	`31`	`+ {`
	`32`	`+ "@id": "#toRdf-two-invalid-ids",`
	`33`	`+ "@type": ["jld:PositiveEvaluationTest", "jld:ToRDFTest"],`
	`34`	`+ "name": "Two invalid identifiers",`
	`35`	`+ "purpose": "Multiple nodes with invalid @ids are not merged together.",`
	`36`	`+ "option": {`
	`37`	`+ "produceGeneralizedRdf": true`
	`38`	`+ },`
	`39`	`+ "input": "toRdf-twoinvalidids-in.jsonld",`
	`40`	`+ "expect": "toRdf-twoinvalidids-out.nq"`
`30`	`41`	`}`
`31`	`42`	`]`
`32`	`43`	`}`