Skip to content

Commit fbe7881

Browse files
authored
Merge branch 'main' into n3-no-norm-float
2 parents 79496fd + bc17fb7 commit fbe7881

File tree

8 files changed

+317
-170
lines changed

8 files changed

+317
-170
lines changed

poetry.lock

Lines changed: 98 additions & 98 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

rdflib/plugins/parsers/jsonld.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
# we should consider streaming the input to deal with arbitrarily large graphs.
3535
from __future__ import annotations
3636

37+
import secrets
3738
import warnings
3839
from collections.abc import Iterable
3940
from typing import TYPE_CHECKING, Any, Union
@@ -215,6 +216,7 @@ def __init__(
215216
if allow_lists_of_lists is not None
216217
else ALLOW_LISTS_OF_LISTS
217218
)
219+
self.invalid_uri_to_bnode: dict[str, BNode] = {}
218220

219221
def parse(self, data: Any, context: Context, dataset: Graph) -> Graph:
220222
topcontext = False
@@ -623,7 +625,12 @@ def _to_rdf_id(self, context: Context, id_val: str) -> IdentifiedNode | None:
623625
uri = context.resolve(id_val)
624626
if not self.generalized_rdf and ":" not in uri:
625627
return None
626-
return URIRef(uri)
628+
node: IdentifiedNode = URIRef(uri)
629+
if not str(node):
630+
if id_val not in self.invalid_uri_to_bnode:
631+
self.invalid_uri_to_bnode[id_val] = BNode(secrets.token_urlsafe(20))
632+
node = self.invalid_uri_to_bnode[id_val]
633+
return node
627634

628635
def _get_bnodeid(self, ref: str) -> str | None:
629636
if not ref.startswith("_:"):

rdflib/plugins/serializers/longturtle.py

Lines changed: 13 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@
2020

2121
from typing import IO, Any, Optional
2222

23+
from rdflib.compare import to_canonical_graph
2324
from rdflib.exceptions import Error
25+
from rdflib.graph import Graph
2426
from rdflib.namespace import RDF
2527
from rdflib.term import BNode, Literal, URIRef
2628

@@ -42,7 +44,16 @@ class LongTurtleSerializer(RecursiveSerializer):
4244

4345
def __init__(self, store):
4446
self._ns_rewrite = {}
45-
super(LongTurtleSerializer, self).__init__(store)
47+
store = to_canonical_graph(store)
48+
content = store.serialize(format="application/n-triples")
49+
lines = content.split("\n")
50+
lines.sort()
51+
graph = Graph()
52+
graph.parse(
53+
data="\n".join(lines), format="application/n-triples", skolemize=True
54+
)
55+
graph = graph.de_skolemize()
56+
super(LongTurtleSerializer, self).__init__(graph)
4657
self.keywords = {RDF.type: "a"}
4758
self.reset()
4859
self.stream = None
@@ -186,7 +197,7 @@ def s_squared(self, subject):
186197
return False
187198
self.write("\n" + self.indent() + "[]")
188199
self.predicateList(subject, newline=False)
189-
self.write(" ;\n.")
200+
self.write("\n.")
190201
return True
191202

192203
def path(self, node, position, newline=False):
@@ -293,34 +304,7 @@ def predicateList(self, subject, newline=False):
293304
def verb(self, node, newline=False):
294305
self.path(node, VERB, newline)
295306

296-
def sortObjects(
297-
self, values: list[URIRef | BNode | Literal]
298-
) -> list[URIRef | BNode | Literal]:
299-
"""
300-
Perform a sort on the values where each value is a blank node. Grab the CBD of the
301-
blank node and sort it by its longturtle serialization value.
302-
303-
Identified nodes come first and the sorted blank nodes are tacked on after.
304-
"""
305-
bnode_map: dict[BNode, list[str]] = {}
306-
objects = []
307-
for value in values:
308-
if isinstance(value, BNode):
309-
bnode_map[value] = []
310-
else:
311-
objects.append(value)
312-
313-
for bnode in bnode_map:
314-
cbd = self.store.cbd(bnode).serialize(format="longturtle")
315-
bnode_map[bnode].append(cbd)
316-
317-
sorted_bnodes = sorted(
318-
[(k, v) for k, v in bnode_map.items()], key=lambda x: x[1]
319-
)
320-
return objects + [x[0] for x in sorted_bnodes]
321-
322307
def objectList(self, objects):
323-
objects = self.sortObjects(objects)
324308
count = len(objects)
325309
if count == 0:
326310
return
Lines changed: 40 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,74 +1,72 @@
1-
PREFIX cn: <https://linked.data.gov.au/def/cn/>
2-
PREFIX ex: <http://example.com/>
31
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
42
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
5-
PREFIX sdo: <https://schema.org/>
3+
PREFIX schema: <https://schema.org/>
64
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
75

8-
ex:nicholas
9-
a sdo:Person ;
10-
sdo:age 41 ;
11-
sdo:alternateName
12-
"N.J. Car" ,
13-
"Nick Car" ,
6+
<http://example.com/nicholas>
7+
a schema:Person ;
8+
schema:age 41 ;
9+
schema:alternateName
1410
[
15-
sdo:name "Dr N.J. Car" ;
16-
] ;
17-
sdo:name
11+
schema:name "Dr N.J. Car" ;
12+
] ,
13+
"N.J. Car" ,
14+
"Nick Car" ;
15+
schema:name
1816
[
19-
a cn:CompoundName ;
20-
sdo:hasPart
21-
[
22-
a cn:CompoundName ;
23-
rdf:value "John" ;
24-
] ,
17+
a <https://linked.data.gov.au/def/cn/CompoundName> ;
18+
schema:hasPart
2519
[
26-
a cn:CompoundName ;
27-
rdf:value "Nicholas" ;
28-
] ,
29-
[
30-
a cn:CompoundName ;
31-
sdo:hasPart
20+
a <https://linked.data.gov.au/def/cn/CompoundName> ;
21+
schema:hasPart
3222
[
33-
a cn:CompoundName ;
23+
a <https://linked.data.gov.au/def/cn/CompoundName> ;
3424
rdf:value "Car" ;
3525
] ,
3626
[
37-
a cn:CompoundName ;
27+
a <https://linked.data.gov.au/def/cn/CompoundName> ;
3828
rdf:value "Maxov" ;
3929
] ;
30+
] ,
31+
[
32+
a <https://linked.data.gov.au/def/cn/CompoundName> ;
33+
rdf:value "Nicholas" ;
34+
] ,
35+
[
36+
a <https://linked.data.gov.au/def/cn/CompoundName> ;
37+
rdf:value "John" ;
4038
] ;
4139
] ;
42-
sdo:worksFor <https://kurrawong.ai> ;
40+
schema:worksFor <https://kurrawong.ai> ;
4341
.
4442

4543
<https://kurrawong.ai>
46-
a sdo:Organization ;
47-
sdo:location <https://kurrawong.ai/hq> ;
44+
a schema:Organization ;
45+
schema:location <https://kurrawong.ai/hq> ;
4846
.
4947

5048
<https://kurrawong.ai/hq>
51-
a sdo:Place ;
52-
sdo:address
49+
a schema:Place ;
50+
schema:address
5351
[
54-
a sdo:PostalAddress ;
55-
sdo:addressCountry
52+
a schema:PostalAddress ;
53+
schema:addressCountry
5654
[
57-
sdo:identifier "au" ;
58-
sdo:name "Australia" ;
55+
schema:identifier "au" ;
56+
schema:name "Australia" ;
5957
] ;
60-
sdo:addressLocality "Shorncliffe" ;
61-
sdo:addressRegion "QLD" ;
62-
sdo:postalCode 4017 ;
63-
sdo:streetAddress (
58+
schema:addressLocality "Shorncliffe" ;
59+
schema:addressRegion "QLD" ;
60+
schema:postalCode 4017 ;
61+
schema:streetAddress (
6462
72
6563
"Yundah"
6664
"Street"
6765
) ;
6866
] ;
69-
sdo:geo
67+
schema:geo
7068
[
71-
sdo:polygon "POLYGON((153.082403 -27.325801, 153.08241 -27.32582, 153.082943 -27.325612, 153.083010 -27.325742, 153.083543 -27.325521, 153.083456 -27.325365, 153.082403 -27.325801))"^^geo:wktLiteral ;
69+
schema:polygon "POLYGON((153.082403 -27.325801, 153.08241 -27.32582, 153.082943 -27.325612, 153.083010 -27.325742, 153.083543 -27.325521, 153.083456 -27.325365, 153.082403 -27.325801))"^^geo:wktLiteral ;
7270
] ;
73-
sdo:name "KurrawongAI HQ" ;
71+
schema:name "KurrawongAI HQ" ;
7472
.

test/jsonld/local-suite/manifest.jsonld

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,17 @@
2727
"purpose": "Multiple @id aliases. Issue #2164",
2828
"input": "toRdf-twoimports-in.jsonld",
2929
"expect": "toRdf-twoimports-out.nq"
30+
},
31+
{
32+
"@id": "#toRdf-two-invalid-ids",
33+
"@type": ["jld:PositiveEvaluationTest", "jld:ToRDFTest"],
34+
"name": "Two invalid identifiers",
35+
"purpose": "Multiple nodes with invalid @ids are not merged together.",
36+
"option": {
37+
"produceGeneralizedRdf": true
38+
},
39+
"input": "toRdf-twoinvalidids-in.jsonld",
40+
"expect": "toRdf-twoinvalidids-out.nq"
3041
}
3142
]
3243
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
"@id": "https://example.org/root-object",
3+
"https://schema.org/author": [
4+
{
5+
"@id": "https://example.org/ invalid url 1",
6+
"https://schema.org/name": "Jane Doe"
7+
},
8+
{
9+
"@id": "https://example.org/ invalid url 1",
10+
"https://schema.org/givenName": "Jane",
11+
"https://schema.org/familyName": "Doe"
12+
},
13+
{
14+
"@id": "https://example.org/ invalid url 2",
15+
"https://schema.org/name": "John Doe",
16+
"https://schema.org/givenName": "John",
17+
"https://schema.org/familyName": "Doe"
18+
}
19+
]
20+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
2+
<https://example.org/root-object> <https://schema.org/author> _:b1.
3+
<https://example.org/root-object> <https://schema.org/author> _:b2.
4+
5+
_:b1 <https://schema.org/name> "Jane Doe".
6+
_:b1 <https://schema.org/givenName> "Jane".
7+
_:b1 <https://schema.org/familyName> "Doe".
8+
_:b2 <https://schema.org/name> "John Doe".
9+
_:b2 <https://schema.org/givenName> "John".
10+
_:b2 <https://schema.org/familyName> "Doe".
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
#!/usr/bin/env python3
2+
3+
# Portions of this file contributed by NIST are governed by the
4+
# following statement:
5+
#
6+
# This software was developed at the National Institute of Standards
7+
# and Technology by employees of the Federal Government in the course
8+
# of their official duties. Pursuant to Title 17 Section 105 of the
9+
# United States Code, this software is not subject to copyright
10+
# protection within the United States. NIST assumes no responsibility
11+
# whatsoever for its use by other parties, and makes no guarantees,
12+
# expressed or implied, about its quality, reliability, or any other
13+
# characteristic.
14+
#
15+
# We would appreciate acknowledgement if the software is used.
16+
17+
import random
18+
from collections import defaultdict
19+
20+
from rdflib import RDFS, BNode, Graph, Literal, Namespace, URIRef
21+
22+
EX = Namespace("http://example.org/ex/")
23+
24+
25+
def test_sort_semiblank_graph() -> None:
26+
"""
27+
This test reviews whether the output of the Turtle form is
28+
consistent when involving repeated generates with blank nodes.
29+
"""
30+
31+
serialization_counter: defaultdict[str, int] = defaultdict(int)
32+
33+
first_graph_text: str = ""
34+
35+
# Use a fixed sequence of once-but-no-longer random values for more
36+
# consistent test results.
37+
nonrandom_shuffler = random.Random(1234)
38+
for x in range(1, 10):
39+
graph = Graph()
40+
graph.bind("ex", EX)
41+
graph.bind("rdfs", RDFS)
42+
43+
graph.add((EX.A, RDFS.comment, Literal("Thing A")))
44+
graph.add((EX.B, RDFS.comment, Literal("Thing B")))
45+
graph.add((EX.C, RDFS.comment, Literal("Thing C")))
46+
47+
nodes: list[URIRef] = [EX.A, EX.B, EX.C, EX.B]
48+
nonrandom_shuffler.shuffle(nodes)
49+
for node in nodes:
50+
# Instantiate one bnode per URIRef node.
51+
graph.add((BNode(), RDFS.seeAlso, node))
52+
53+
nesteds: list[URIRef] = [EX.A, EX.B, EX.C]
54+
nonrandom_shuffler.shuffle(nesteds)
55+
for nested in nesteds:
56+
# Instantiate a nested node reference.
57+
outer_node = BNode()
58+
inner_node = BNode()
59+
graph.add((outer_node, EX.has, inner_node))
60+
graph.add((inner_node, RDFS.seeAlso, nested))
61+
62+
graph_text = graph.serialize(format="longturtle", sort=True)
63+
if first_graph_text == "":
64+
first_graph_text = graph_text
65+
66+
serialization_counter[graph_text] += 1
67+
68+
expected_serialization = """\
69+
PREFIX ns1: <http://example.org/ex/>
70+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
71+
72+
ns1:A
73+
rdfs:comment "Thing A" ;
74+
.
75+
76+
ns1:C
77+
rdfs:comment "Thing C" ;
78+
.
79+
80+
ns1:B
81+
rdfs:comment "Thing B" ;
82+
.
83+
84+
[] ns1:has
85+
[
86+
rdfs:seeAlso ns1:A ;
87+
] ;
88+
.
89+
90+
[] rdfs:seeAlso ns1:B ;
91+
.
92+
93+
[] ns1:has
94+
[
95+
rdfs:seeAlso ns1:C ;
96+
] ;
97+
.
98+
99+
[] rdfs:seeAlso ns1:A ;
100+
.
101+
102+
[] rdfs:seeAlso ns1:C ;
103+
.
104+
105+
[] rdfs:seeAlso ns1:B ;
106+
.
107+
108+
[] ns1:has
109+
[
110+
rdfs:seeAlso ns1:B ;
111+
] ;
112+
.
113+
114+
"""
115+
116+
assert expected_serialization.strip() == first_graph_text.strip()
117+
assert 1 == len(serialization_counter)

0 commit comments

Comments
 (0)