Skip to content

Commit 404be3b

Browse files
Implement RDF Patch serializer (#2877)
* Implement RDF Patch serializer. Supports serialization from Dataset instances only; triples and quads within a Dataset are supported. * Add examples for Patch serialization. * Remove unnecessary use of addN -> add * Handle RDFLib graph/dataset addition quirks. Should fix static analysis issues. * Fix mypy errors * Attempt to fix failing test. * Add Patch documentation + docstring --------- Co-authored-by: Ashley Sommer <[email protected]>
1 parent aa9d103 commit 404be3b

File tree

5 files changed

+359
-0
lines changed

5 files changed

+359
-0
lines changed

docs/plugin_serializers.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ n3 :class:`~rdflib.plugins.serializers.n3.N3Serializer`
2121
nquads :class:`~rdflib.plugins.serializers.nquads.NQuadsSerializer`
2222
nt :class:`~rdflib.plugins.serializers.nt.NTSerializer`
2323
hext :class:`~rdflib.plugins.serializers.hext.HextuplesSerializer`
24+
patch :class:`~rdflib.plugins.serializers.patch.PatchSerializer`
2425
pretty-xml :class:`~rdflib.plugins.serializers.rdfxml.PrettyXMLSerializer`
2526
trig :class:`~rdflib.plugins.serializers.trig.TrigSerializer`
2627
trix :class:`~rdflib.plugins.serializers.trix.TriXSerializer`
@@ -34,6 +35,11 @@ JSON-LD
3435
-------
3536
JSON-LD - 'json-ld' - has been incorporated into RDFLib since v6.0.0.
3637

38+
RDF Patch
39+
---------
40+
41+
The RDF Patch Serializer - 'patch' - uses the RDF Patch format defined at https://afs.github.io/rdf-patch/. It supports serializing context aware stores as either addition or deletion patches; and also supports serializing the difference between two context aware stores as a Patch of additions and deletions.
42+
3743
HexTuples
3844
---------
3945
The HexTuples Serializer - 'hext' - uses the HexTuples format defined at https://github.com/ontola/hextuples.

examples/patch_serializer_example.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
from rdflib import Dataset, Graph, Literal, URIRef
2+
3+
4+
def main():
5+
# example for adding a quad
6+
ds = Dataset()
7+
g = Graph(identifier=URIRef("http://graph-a"))
8+
ds.add_graph(g)
9+
triple = (URIRef("http://subj-a"), URIRef("http://pred-a"), Literal("obj-a"))
10+
ds.get_context(g.identifier).add(triple)
11+
result = ds.serialize(format="patch", operation="add")
12+
print("Add Quad Patch:")
13+
print(result)
14+
15+
# alternate example for adding a quad
16+
ds = Dataset()
17+
quad = (
18+
URIRef("http://subj-a"),
19+
URIRef("http://pred-a"),
20+
Literal("obj-a"),
21+
Graph(identifier=URIRef("http://graph-a")),
22+
)
23+
ds.add(quad)
24+
result = ds.serialize(format="patch", operation="add")
25+
print("Add Quad Patch:")
26+
print(result)
27+
28+
# example for adding a triple
29+
ds = Dataset()
30+
ds.add(triple)
31+
result = ds.serialize(format="patch", operation="add")
32+
print("\nAdd Triple Patch:")
33+
print(result)
34+
35+
# Example for diff quads
36+
quad_1 = (
37+
URIRef("http://subj-a"),
38+
URIRef("http://pred-a"),
39+
Literal("obj-a"),
40+
Graph(identifier=URIRef("http://graph-a")),
41+
)
42+
quad_2 = (
43+
URIRef("http://subj-b"),
44+
URIRef("http://pred-b"),
45+
Literal("obj-b"),
46+
Graph(identifier=URIRef("http://graph-b")),
47+
)
48+
quad_3 = (
49+
URIRef("http://subj-c"),
50+
URIRef("http://pred-c"),
51+
Literal("obj-c"),
52+
Graph(identifier=URIRef("http://graph-c")),
53+
)
54+
ds1 = Dataset()
55+
ds2 = Dataset()
56+
ds1.addN([quad_1, quad_2])
57+
ds2.addN([quad_2, quad_3])
58+
result = ds1.serialize(format="patch", target=ds2)
59+
print("Diff Quad Patch:")
60+
print(result)
61+
62+
63+
if __name__ == "__main__":
64+
main()

rdflib/plugin.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,12 @@ def plugins(
363363
"rdflib.plugins.serializers.hext",
364364
"HextuplesSerializer",
365365
)
366+
register(
367+
"patch",
368+
Serializer,
369+
"rdflib.plugins.serializers.patch",
370+
"PatchSerializer",
371+
)
366372

367373
# Register Triple Parsers
368374
register(

rdflib/plugins/serializers/patch.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
from __future__ import annotations
2+
3+
import warnings
4+
from typing import IO, Optional
5+
from uuid import uuid4
6+
7+
from rdflib import Dataset
8+
from rdflib.plugins.serializers.nquads import _nq_row
9+
from rdflib.plugins.serializers.nt import _nt_row
10+
from rdflib.serializer import Serializer
11+
12+
add_remove_methods = {"add": "A", "remove": "D"}
13+
14+
15+
class PatchSerializer(Serializer):
16+
"""
17+
Creates an RDF patch file to add and remove triples/quads.
18+
Can either:
19+
- Create an add or delete patch for a single Dataset.
20+
- Create a patch to represent the difference between two Datasets.
21+
"""
22+
23+
def __init__(
24+
self,
25+
store: Dataset,
26+
):
27+
self.store: Dataset = store
28+
super().__init__(store)
29+
30+
def serialize(
31+
self,
32+
stream: IO[bytes],
33+
base: Optional[str] = None,
34+
encoding: Optional[str] = None,
35+
**kwargs,
36+
):
37+
"""
38+
Serialize the store to the given stream.
39+
:param stream: The stream to serialize to.
40+
:param base: The base URI to use for the serialization.
41+
:param encoding: The encoding to use for the serialization.
42+
:param kwargs: Additional keyword arguments.
43+
Supported keyword arguments:
44+
- operation: The operation to perform. Either 'add' or 'remove'.
45+
- target: The target Dataset to compare against.
46+
NB: Only one of 'operation' or 'target' should be provided.
47+
- header_id: The header ID to use.
48+
- header_prev: The previous header ID to use.
49+
"""
50+
operation = kwargs.get("operation")
51+
target = kwargs.get("target")
52+
header_id = kwargs.get("header_id")
53+
header_prev = kwargs.get("header_prev")
54+
if not header_id:
55+
header_id = f"uuid:{uuid4()}"
56+
encoding = self.encoding
57+
if base is not None:
58+
warnings.warn("PatchSerializer does not support base.")
59+
if encoding is not None and encoding.lower() != self.encoding.lower():
60+
warnings.warn(
61+
"PatchSerializer does not use custom encoding. "
62+
f"Given encoding was: {encoding}"
63+
)
64+
65+
def write_header():
66+
stream.write(f"H id <{header_id}> .\n".encode(encoding, "replace"))
67+
if header_prev:
68+
stream.write(f"H prev <{header_prev}>\n".encode(encoding, "replace"))
69+
stream.write("TX .\n".encode(encoding, "replace"))
70+
71+
def write_triples(contexts, op_code, use_passed_contexts=False):
72+
for context in contexts:
73+
if not use_passed_contexts:
74+
context = self.store.get_context(context.identifier)
75+
for triple in context:
76+
stream.write(
77+
self._patch_row(triple, context.identifier, op_code).encode(
78+
encoding, "replace"
79+
)
80+
)
81+
82+
if operation:
83+
assert operation in add_remove_methods, f"Invalid operation: {operation}"
84+
85+
write_header()
86+
if operation:
87+
operation_code = add_remove_methods.get(operation)
88+
write_triples(self.store.contexts(), operation_code)
89+
elif target:
90+
to_add, to_remove = self._diff(target)
91+
write_triples(to_add.contexts(), "A", use_passed_contexts=True)
92+
write_triples(to_remove.contexts(), "D", use_passed_contexts=True)
93+
94+
stream.write("TC .\n".encode(encoding, "replace"))
95+
96+
def _diff(self, target):
97+
rows_to_add = target - self.store
98+
rows_to_remove = self.store - target
99+
return rows_to_add, rows_to_remove
100+
101+
def _patch_row(self, triple, context_id, operation):
102+
if context_id == self.store.default_context.identifier:
103+
return f"{operation} {_nt_row(triple)}"
104+
else:
105+
return f"{operation} {_nq_row(triple, context_id)}"
Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
from rdflib import Dataset, Graph, Literal, URIRef
2+
3+
4+
def test_add_quad():
5+
ds = Dataset()
6+
ds.add(
7+
(
8+
URIRef("http://example.org/subject1"),
9+
URIRef("http://example.org/predicate2"),
10+
Literal("object2"),
11+
Graph(identifier=URIRef("http://example.org/graph1")),
12+
)
13+
)
14+
result = ds.serialize(format="patch", operation="add")
15+
assert (
16+
"""A <http://example.org/subject1> <http://example.org/predicate2> "object2" <http://example.org/graph1> .
17+
"""
18+
in result
19+
)
20+
21+
22+
def test_delete_quad():
23+
ds = Dataset()
24+
ds.add(
25+
(
26+
URIRef("http://example.org/subject1"),
27+
URIRef("http://example.org/predicate2"),
28+
Literal("object2"),
29+
Graph(identifier=URIRef("http://example.org/graph1")),
30+
)
31+
)
32+
result = ds.serialize(format="patch", operation="remove")
33+
assert (
34+
"""D <http://example.org/subject1> <http://example.org/predicate2> "object2" <http://example.org/graph1> .
35+
"""
36+
in result
37+
)
38+
39+
40+
def test_diff_quad():
41+
quad_1 = (
42+
URIRef("http://example.org/subject1"),
43+
URIRef("http://example.org/predicate2"),
44+
Literal("object2"),
45+
Graph(identifier=URIRef("http://example.org/graph1")),
46+
)
47+
quad_2 = (
48+
URIRef("http://example.org/subject2"),
49+
URIRef("http://example.org/predicate3"),
50+
Literal("object3"),
51+
Graph(identifier=URIRef("http://example.org/graph2")),
52+
)
53+
ds1 = Dataset()
54+
ds2 = Dataset()
55+
ds1.add(quad_1)
56+
ds2.addN([quad_1, quad_2])
57+
result = ds1.serialize(format="patch", target=ds2)
58+
assert (
59+
"""A <http://example.org/subject2> <http://example.org/predicate3> "object3" <http://example.org/graph2> ."""
60+
in result
61+
)
62+
63+
64+
def test_add_triple():
65+
ds = Dataset()
66+
ds.add(
67+
(
68+
URIRef("http://example.org/subject1"),
69+
URIRef("http://example.org/predicate2"),
70+
Literal("object2"),
71+
)
72+
)
73+
result = ds.serialize(format="patch", operation="add")
74+
assert (
75+
"""A <http://example.org/subject1> <http://example.org/predicate2> "object2" ."""
76+
in result
77+
)
78+
79+
80+
def test_delete_triple():
81+
ds = Dataset()
82+
ds.add(
83+
(
84+
URIRef("http://example.org/subject1"),
85+
URIRef("http://example.org/predicate2"),
86+
Literal("object2"),
87+
)
88+
)
89+
result = ds.serialize(format="patch", operation="remove")
90+
assert (
91+
"""D <http://example.org/subject1> <http://example.org/predicate2> "object2" ."""
92+
in result
93+
)
94+
95+
96+
def test_diff_triple():
97+
triple_1 = (
98+
URIRef("http://example.org/subject1"),
99+
URIRef("http://example.org/predicate2"),
100+
Literal("object2"),
101+
)
102+
triple_2 = (
103+
URIRef("http://example.org/subject2"),
104+
URIRef("http://example.org/predicate3"),
105+
Literal("object3"),
106+
)
107+
ds1 = Dataset()
108+
ds2 = Dataset()
109+
ds1.add(triple_1)
110+
ds2.add(triple_1)
111+
ds2.add(triple_2)
112+
result = ds1.serialize(format="patch", target=ds2)
113+
assert (
114+
"""A <http://example.org/subject2> <http://example.org/predicate3> "object3" ."""
115+
in result
116+
)
117+
118+
119+
def test_diff_quad_overlap():
120+
quad_1 = (
121+
URIRef("http://example.org/subject1"),
122+
URIRef("http://example.org/predicate1"),
123+
Literal("object1"),
124+
Graph(identifier=URIRef("http://example.org/graph1")),
125+
)
126+
quad_2 = (
127+
URIRef("http://example.org/subject2"),
128+
URIRef("http://example.org/predicate2"),
129+
Literal("object2"),
130+
Graph(identifier=URIRef("http://example.org/graph2")),
131+
)
132+
quad_3 = (
133+
URIRef("http://example.org/subject3"),
134+
URIRef("http://example.org/predicate3"),
135+
Literal("object3"),
136+
Graph(identifier=URIRef("http://example.org/graph3")),
137+
)
138+
ds1 = Dataset()
139+
ds2 = Dataset()
140+
ds1.addN([quad_1, quad_2])
141+
ds2.addN([quad_2, quad_3])
142+
result = ds1.serialize(format="patch", target=ds2)
143+
# first quad needs to be removed
144+
assert (
145+
"""D <http://example.org/subject1> <http://example.org/predicate1> "object1" <http://example.org/graph1> ."""
146+
in result
147+
)
148+
# third quad needs to be added
149+
assert (
150+
"""A <http://example.org/subject3> <http://example.org/predicate3> "object3" <http://example.org/graph3> ."""
151+
in result
152+
)
153+
154+
155+
def test_header_id():
156+
ds = Dataset()
157+
ds.add(
158+
(
159+
URIRef("http://example.org/subject1"),
160+
URIRef("http://example.org/predicate2"),
161+
Literal("object2"),
162+
)
163+
)
164+
result = ds.serialize(format="patch", operation="add", header_id="uuid:123")
165+
assert """H id <uuid:123>""" in result
166+
167+
168+
def test_prev_header():
169+
ds = Dataset()
170+
ds.add(
171+
(
172+
URIRef("http://example.org/subject1"),
173+
URIRef("http://example.org/predicate2"),
174+
Literal("object2"),
175+
)
176+
)
177+
result = ds.serialize(format="patch", operation="add", header_prev="uuid:123")
178+
assert """H prev <uuid:123>""" in result

0 commit comments

Comments
 (0)