Skip to content

Commit aa9d103

Browse files
recalcitrantsupplantnicholascarashleysommer
authored
Add initial implementation of RDF Patch parser. (#2863)
* Add initial implementation of RDF Patch parser. * Add example, run Black & Ruff * Ruff again * Add docstring for Patch Operations + add class to All declaration to fix sphinx build. * Black again * Add parsing of <_:identifier> style bnodes; switch to line by line parsing. * Remove unused mypy comment. * Add mypy exclusions (comments); the code will not enter these methods if self.line is None in the first place. * Remove mypy comment. * Add ignore to 181. * Add RDF Patch to Parser documentation. * Correct Patch Parser Name. Remove unnecssary return statements. * Remove unneccessary deskolemize from examples. * Attempt to fix failing test. --------- Co-authored-by: Nicholas Car <[email protected]> Co-authored-by: Ashley Sommer <[email protected]>
1 parent 324f20a commit aa9d103

20 files changed

+432
-0
lines changed

docs/plugin_parsers.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ json-ld :class:`~rdflib.plugins.parsers.jsonld.JsonLDParser`
2424
hext :class:`~rdflib.plugins.parsers.hext.HextuplesParser`
2525
n3 :class:`~rdflib.plugins.parsers.notation3.N3Parser`
2626
nquads :class:`~rdflib.plugins.parsers.nquads.NQuadsParser`
27+
patch :class:`~rdflib.plugins.parsers.patch.RDFPatchParser`
2728
nt :class:`~rdflib.plugins.parsers.ntriples.NTParser`
2829
trix :class:`~rdflib.plugins.parsers.trix.TriXParser`
2930
turtle :class:`~rdflib.plugins.parsers.notation3.TurtleParser`

examples/parse_patch.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
from rdflib import Dataset
2+
3+
4+
def main():
5+
# RDF patch data
6+
add_patch = """
7+
TX .
8+
A _:bn1 <http://example.org/predicate1> "object1" .
9+
A _:bn1 <http://example.org/predicate2> "object2" .
10+
TC .
11+
"""
12+
13+
delete_patch = """
14+
TX .
15+
D _:bn1 <http://example.org/predicate1> "object1" .
16+
TC .
17+
"""
18+
19+
ds = Dataset()
20+
21+
# Apply add patch
22+
ds.parse(data=add_patch, format="patch")
23+
print("After add patch:")
24+
for triple in ds:
25+
print(triple)
26+
27+
# Apply delete patch
28+
ds.parse(data=delete_patch, format="patch")
29+
print("After delete patch:")
30+
for triple in ds:
31+
print(triple)
32+
33+
34+
if __name__ == "__main__":
35+
main()

rdflib/plugin.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,14 @@ def plugins(
488488
"HextuplesParser",
489489
)
490490

491+
# Register RDF Patch Parsers
492+
register(
493+
"patch",
494+
Parser,
495+
"rdflib.plugins.parsers.patch",
496+
"RDFPatchParser",
497+
)
498+
491499
# Register SPARQL Processors
492500
register(
493501
"sparql",

rdflib/plugins/parsers/patch.py

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
from __future__ import annotations
2+
3+
from codecs import getreader
4+
from enum import Enum
5+
from typing import TYPE_CHECKING, Any, MutableMapping, Optional, Union
6+
7+
from rdflib.exceptions import ParserError as ParseError
8+
from rdflib.graph import Dataset
9+
from rdflib.parser import InputSource
10+
from rdflib.plugins.parsers.nquads import NQuadsParser
11+
12+
# Build up from the NTriples parser:
13+
from rdflib.plugins.parsers.ntriples import r_nodeid, r_tail, r_uriref, r_wspace
14+
from rdflib.term import BNode, URIRef
15+
16+
if TYPE_CHECKING:
17+
import typing_extensions as te
18+
19+
__all__ = ["RDFPatchParser", "Operation"]
20+
21+
_BNodeContextType = MutableMapping[str, BNode]
22+
23+
24+
class Operation(Enum):
25+
"""
26+
Enum of RDF Patch operations.
27+
28+
Operations:
29+
- `AddTripleOrQuad` (A): Adds a triple or quad.
30+
- `DeleteTripleOrQuad` (D): Deletes a triple or quad.
31+
- `AddPrefix` (PA): Adds a prefix.
32+
- `DeletePrefix` (PD): Deletes a prefix.
33+
- `TransactionStart` (TX): Starts a transaction.
34+
- `TransactionCommit` (TC): Commits a transaction.
35+
- `TransactionAbort` (TA): Aborts a transaction.
36+
- `Header` (H): Specifies a header.
37+
"""
38+
39+
AddTripleOrQuad = "A"
40+
DeleteTripleOrQuad = "D"
41+
AddPrefix = "PA"
42+
DeletePrefix = "PD"
43+
TransactionStart = "TX"
44+
TransactionCommit = "TC"
45+
TransactionAbort = "TA"
46+
Header = "H"
47+
48+
49+
class RDFPatchParser(NQuadsParser):
50+
def parse( # type: ignore[override]
51+
self,
52+
inputsource: InputSource,
53+
sink: Dataset,
54+
bnode_context: Optional[_BNodeContextType] = None,
55+
skolemize: bool = False,
56+
**kwargs: Any,
57+
) -> Dataset:
58+
"""
59+
Parse inputsource as an RDF Patch file.
60+
61+
:type inputsource: `rdflib.parser.InputSource`
62+
:param inputsource: the source of RDF Patch formatted data
63+
:type sink: `rdflib.graph.Dataset`
64+
:param sink: where to send parsed data
65+
:type bnode_context: `dict`, optional
66+
:param bnode_context: a dict mapping blank node identifiers to `~rdflib.term.BNode` instances.
67+
See `.W3CNTriplesParser.parse`
68+
"""
69+
assert sink.store.context_aware, (
70+
"RDFPatchParser must be given" " a context aware store."
71+
)
72+
# type error: Incompatible types in assignment (expression has type "ConjunctiveGraph", base class "W3CNTriplesParser" defined the type as "Union[DummySink, NTGraphSink]")
73+
self.sink: Dataset = Dataset(store=sink.store)
74+
self.skolemize = skolemize
75+
76+
source = inputsource.getCharacterStream()
77+
if not source:
78+
source = inputsource.getByteStream()
79+
source = getreader("utf-8")(source)
80+
81+
if not hasattr(source, "read"):
82+
raise ParseError("Item to parse must be a file-like object.")
83+
84+
self.file = source
85+
self.buffer = ""
86+
while True:
87+
self.line = __line = self.readline()
88+
if self.line is None:
89+
break
90+
try:
91+
self.parsepatch(bnode_context)
92+
except ParseError as msg:
93+
raise ParseError("Invalid line (%s):\n%r" % (msg, __line))
94+
return self.sink
95+
96+
def parsepatch(self, bnode_context: Optional[_BNodeContextType] = None) -> None:
97+
self.eat(r_wspace)
98+
# From spec: "No comments should be included (comments start # and run to end
99+
# of line)."
100+
if (not self.line) or self.line.startswith("#"):
101+
return # The line is empty or a comment
102+
103+
# if header, transaction, skip
104+
operation = self.operation()
105+
self.eat(r_wspace)
106+
107+
if operation in [Operation.AddTripleOrQuad, Operation.DeleteTripleOrQuad]:
108+
self.add_or_remove_triple_or_quad(operation, bnode_context)
109+
elif operation == Operation.AddPrefix:
110+
self.add_prefix()
111+
elif operation == Operation.DeletePrefix:
112+
self.delete_prefix()
113+
114+
def add_or_remove_triple_or_quad(
115+
self, operation, bnode_context: Optional[_BNodeContextType] = None
116+
) -> None:
117+
self.eat(r_wspace)
118+
if (not self.line) or self.line.startswith("#"):
119+
return # The line is empty or a comment
120+
121+
subject = self.labeled_bnode() or self.subject(bnode_context)
122+
self.eat(r_wspace)
123+
124+
predicate = self.predicate()
125+
self.eat(r_wspace)
126+
127+
obj = self.labeled_bnode() or self.object(bnode_context)
128+
self.eat(r_wspace)
129+
130+
context = self.labeled_bnode() or self.uriref() or self.nodeid(bnode_context)
131+
self.eat(r_tail)
132+
133+
if self.line:
134+
raise ParseError("Trailing garbage")
135+
# Must have a context aware store - add on a normal Graph
136+
# discards anything where the ctx != graph.identifier
137+
if operation == Operation.AddTripleOrQuad:
138+
if context:
139+
self.sink.get_context(context).add((subject, predicate, obj))
140+
else:
141+
self.sink.default_context.add((subject, predicate, obj))
142+
elif operation == Operation.DeleteTripleOrQuad:
143+
if context:
144+
self.sink.get_context(context).remove((subject, predicate, obj))
145+
else:
146+
self.sink.default_context.remove((subject, predicate, obj))
147+
148+
def add_prefix(self):
149+
# Extract prefix and URI from the line
150+
prefix, ns, _ = self.line.replace('"', "").replace("'", "").split(" ") # type: ignore[union-attr]
151+
ns_stripped = ns.strip("<>")
152+
self.sink.bind(prefix, ns_stripped)
153+
154+
def delete_prefix(self):
155+
prefix, _, _ = self.line.replace('"', "").replace("'", "").split(" ") # type: ignore[union-attr]
156+
self.sink.namespace_manager.bind(prefix, None, replace=True)
157+
158+
def operation(self) -> Operation:
159+
for op in Operation:
160+
if self.line.startswith(op.value): # type: ignore[union-attr]
161+
self.eat_op(op.value)
162+
return op
163+
raise ValueError(
164+
f'Invalid or no Operation found in line: "{self.line}". Valid Operations '
165+
f"codes are {', '.join([op.value for op in Operation])}"
166+
)
167+
168+
def eat_op(self, op: str) -> None:
169+
self.line = self.line.lstrip(op) # type: ignore[union-attr]
170+
171+
def nodeid(
172+
self, bnode_context: Optional[_BNodeContextType] = None
173+
) -> Union[te.Literal[False], BNode, URIRef]:
174+
if self.peek("_"):
175+
return BNode(self.eat(r_nodeid).group(1))
176+
return False
177+
178+
def labeled_bnode(self):
179+
if self.peek("<_"):
180+
plain_uri = self.eat(r_uriref).group(1)
181+
bnode_id = r_nodeid.match(plain_uri).group(1) # type: ignore[union-attr]
182+
return BNode(bnode_id)
183+
return False
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
TX .
2+
A _:bn1 <http://example.org/predicate1> "object1" <http://example.org/graph1> .
3+
A _:bn1 <http://example.org/predicate2> "object2" <http://example.org/graph1> .
4+
A _:bn1 <http://example.org/predicate3> "object3" <http://example.org/graph1> .
5+
D _:bn1 <http://example.org/predicate2> "object2" <http://example.org/graph1> .
6+
TC .
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
TX .
2+
A <_:bn1> <http://example.org/predicate1> "object1" <http://example.org/graph1> .
3+
A <_:bn1> <http://example.org/predicate2> "object2" <http://example.org/graph1> .
4+
A <_:bn1> <http://example.org/predicate3> "object3" <http://example.org/graph1> .
5+
D <_:bn1> <http://example.org/predicate2> "object2" <http://example.org/graph1> .
6+
TC .
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
TX .
2+
PA present <http://some-other-ns#> .
3+
PA removed <http://ns-for-prefix-to-remove#> .
4+
PD removed <http://ns-for-prefix-to-remove#> .
5+
A <http://ns-for-prefix-to-remove#test-subj> <http://ns-for-prefix-to-remove#test-pred> "object1" .
6+
TC .
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
TX .
2+
A <http://example.org/subject1> <http://example.org/predicate1> "object1" .
3+
A <http://example.org/subject1> <http://example.org/predicate2> "object2" <http://example.org/graph1> .
4+
D <http://example.org/subject1> <http://example.org/predicate1> "object1" .
5+
D <http://example.org/subject1> <http://example.org/predicate2> "object2" <http://example.org/graph1> .
6+
TC .

test/data/patch/add_bnode_graph.rdp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
TX .
2+
A _:bn1 <http://example.org/predicate1> "object1" _:bn1 .
3+
TC .

test/data/patch/add_bnode_quad.rdp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
TX .
2+
A _:bn1 <http://example.org/predicate1> "object1" <https://graph-1> .
3+
TC .

0 commit comments

Comments
 (0)