Skip to content

Commit f5a0586

Browse files
committed
Extend refactoring of RDF processing
1 parent 81bd2a1 commit f5a0586

File tree

8 files changed

+102
-69
lines changed

8 files changed

+102
-69
lines changed

lib/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
from .mappings import MappingRegistry
44
from .errors import ApiError, NotFound, NotAllowed, ServerError
55
from .utils import read_json, write_json
6-
from .rdf import createTripleStore, triple_iterator
6+
from .rdf import triple_iterator
7+
from .triplestores import createTripleStore
78
from .rdffilter import RDFFilter
89
from .validate import validateJSON, ValidationError
910

lib/rdf.py

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
from .triplestores import createTripleStore
21
from pyld import jsonld
3-
from rdflib import URIRef, Literal, BNode
42
from .errors import ValidationError
53
from .walk import walk
64
from .validate import invalidIRI
@@ -18,26 +16,6 @@ def jsonld2nt(doc, context):
1816
return jsonld.to_rdf(expanded, options={'format': 'application/n-quads'})
1917

2018

21-
def sparql_to_rdf(binding):
22-
if binding['type'] == 'uri':
23-
return URIRef(binding['value'])
24-
elif binding['type'] == 'bnode':
25-
return BNode(binding['value'])
26-
elif binding['type'] == 'literal':
27-
if 'datatype' in binding:
28-
return Literal(binding['value'], datatype=URIRef(binding['datatype']))
29-
elif 'xml:lang' in binding:
30-
return Literal(binding['value'], lang=binding['xml:lang'])
31-
else:
32-
return Literal(binding['value'])
33-
34-
35-
def result_to_ttl(data):
36-
rows = [dict([(key, sparql_to_rdf(val).n3())
37-
for key, val in row.items()]) for row in data]
38-
return "\n".join([f"{row['s']} {row['p']} {row['o']} ." for row in rows])
39-
40-
4119
class NullLog:
4220
def append(self, msg):
4321
pass

lib/registry.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from shutil import copy, copyfileobj, rmtree
44
import urllib
55
from .validate import validateJSON
6-
from .rdf import jsonld2nt, result_to_ttl
6+
from .rdf import jsonld2nt
77
from .rdffilter import RDFFilter
88
from .log import Log
99
from .errors import ApiError, NotFound, ValidationError
@@ -91,7 +91,7 @@ def replace(self, items):
9191

9292
def update_metadata(self):
9393
query = f"SELECT * {{ GRAPH <{self.graph}> {{ VALUES (?p) {{(dct:issued)}} ?s ?p ?o }} }}"
94-
issued = result_to_ttl(self.sparql.query(query))
94+
issued = self.sparql.query(query, "nq")
9595
metadata = jsonld2nt(self.list(), self.context)
9696
file = self.stage / f"{self.kind}.ttl"
9797
with open(file, "w") as f:

lib/triplestores.py

Lines changed: 56 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from abc import ABC, abstractmethod
2-
from rdflib import URIRef, BNode, Dataset, Graph
2+
from rdflib import URIRef, Literal, BNode, Dataset, Graph
33
from SPARQLWrapper import SPARQLWrapper
44
import requests
55
import warnings
@@ -26,7 +26,7 @@ class AbstractTripleStore(ABC):
2626

2727
"""Must implement SPARQL Query"""
2828
@abstractmethod
29-
def query(self, query):
29+
def query(self, query, format):
3030
pass
3131

3232
"""Import"""
@@ -67,10 +67,11 @@ def __client(self, query):
6767
client.setQuery(self.prefixes + query)
6868
return client
6969

70-
def query(self, query):
70+
def query(self, query, format='sparql'):
7171
client = self.__client(query)
7272
try:
73-
return client.queryAndConvert()["results"]["bindings"]
73+
result = client.queryAndConvert()["results"]["bindings"]
74+
return convert_query_result(result, convert_sparql_term, format)
7475
except Exception as e:
7576
raise ServerError(f"SPARQL Query failed: {e}")
7677

@@ -97,28 +98,17 @@ class InternalTripleStore(AbstractTripleStore):
9798
def __init__(self):
9899
self.ds = Dataset(default_union=True)
99100

100-
def query(self, query):
101-
def term(t):
102-
if isinstance(t, URIRef):
103-
return {"type": "uri", "value": str(t)}
104-
if isinstance(t, BNode):
105-
return {"type": "bnode", "value": str(t)}
106-
literal = {"type": "literal", "value": str(t)}
107-
if t.language:
108-
literal["xml:lang"] = t.language
109-
if t.datatype:
110-
literal["datatype"] = str(t.datatype)
111-
return literal
112-
101+
def query(self, query, format='sparql'):
113102
def map_row(row):
114-
return {str(k): term(v) for k, v in row.items()}
103+
return {str(k): convert_rdflib_term(v, format) for k, v in row.items()}
115104

116105
query = self.prefixes + query
117106

118107
# RDFLib raises warning, see <https://github.com/RDFLib/rdflib/issues/3361>
119108
with warnings.catch_warnings():
120109
warnings.filterwarnings("ignore", category=DeprecationWarning)
121-
return [map_row(row) for row in self.ds.query(query).bindings]
110+
result = self.ds.query(query).bindings
111+
return convert_query_result(result, convert_rdflib_term, format)
122112

123113
def _update(self, query):
124114
self.ds.update(self.prefixes + query)
@@ -133,3 +123,50 @@ def store_file(self, graph, file):
133123
for triple in data:
134124
graph.add(triple)
135125
return True
126+
127+
128+
def convert_query_result(result, mapper, target):
129+
"""Convert a SPARQL Query result to target form (sparql, rdflib, n3, nq, ttl)."""
130+
131+
if target == "nq" or target == "ttl":
132+
result = convert_query_result(result, mapper, "n3")
133+
return "\n".join([
134+
" ".join([row.get(f) for f in ['g', 's', 'p', 'o'] if f in row]) + " ."
135+
for row in result])
136+
137+
return [{str(k): mapper(v, target) for k, v in row.items()} for row in result]
138+
139+
140+
def convert_sparql_term(term, format):
141+
if format == "rdflib" or format == "n3":
142+
if term['type'] == 'uri':
143+
term = URIRef(term['value'])
144+
elif term['type'] == 'bnode':
145+
term = BNode(term['value'])
146+
elif term['type'] == 'literal':
147+
if 'datatype' in term:
148+
term = Literal(term['value'], datatype=URIRef(term['datatype']))
149+
elif 'xml:lang' in term:
150+
term = Literal(term['value'], lang=term['xml:lang'])
151+
else:
152+
term = Literal(term['value'])
153+
if format == "n3":
154+
return term.n3()
155+
return term
156+
157+
158+
def convert_rdflib_term(term, format):
159+
if format == "rdflib":
160+
return term
161+
if format == "n3":
162+
return term.n3()
163+
if isinstance(term, URIRef):
164+
return {"type": "uri", "value": str(term)}
165+
if isinstance(term, BNode):
166+
return {"type": "bnode", "value": str(term)}
167+
literal = {"type": "literal", "value": str(term)}
168+
if term.language:
169+
literal["xml:lang"] = term.language
170+
if term.datatype:
171+
literal["datatype"] = str(term.datatype)
172+
return literal

tests/test_api.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,10 @@ def expect_error(client, method, path, json=None, error=None, code=400):
5757
def count_graphs(sparql):
5858
query = "SELECT ?g (count(*) as ?t) { GRAPH ?g {?s ?p ?o} } GROUP BY ?g"
5959
graphs = {}
60+
6061
for row in sparql.query(query):
61-
graphs[row['g']['value']] = int(row['t']['value'])
62+
if "g" in row: # <https://github.com/RDFLib/rdflib/issues/3382>
63+
graphs[row['g']['value']] = int(row['t']['value'])
6264
return graphs
6365

6466

@@ -124,14 +126,15 @@ def test_terminology(client):
124126
fail("GET", "/terminology/18274", code=404)
125127
fail("GET", "/terminology/18274/stage/", code=404)
126128

129+
# register terminology from BARTOC
130+
assert count_graphs(sparql) == {}
127131
with patch('requests.get', new=mock_requests_get):
128-
129-
# register terminology, get afterwards
130132
assert client.put("/terminology/18274").status_code == 200
131133
assert client.get("/terminology/18274").status_code == 200
132134

133135
# try to register non-existing terminology
134136
fail("PUT", "/terminology/0", code=404)
137+
assert count_graphs(sparql) == {'https://graph.nfdi4objects.net/terminology/': 12}
135138

136139
assert client.get("/terminology/18274/stage/").status_code == 200
137140
fail("GET", "/terminology/18274/stage/terminology-18274.nt", code=404)
@@ -169,6 +172,10 @@ def test_terminology(client):
169172
assert client.get('/terminology/18274/receive').status_code == 200
170173
assert client.get("/terminology/18274/stage/terminology-18274.nt").status_code == 200
171174

175+
# FIXME: Here we get too many bnodes in InternalTripleStore
176+
# print(sparql.query("SELECT * { GRAPH ?g { ?s ?p ?o } }", "ttl"))
177+
# assert count_graphs(sparql) == { 'https://graph.nfdi4objects.net/terminology/': 99 } # 12
178+
172179
# load terminology data and check log
173180
fail("GET", '/terminology/18274/load', code=404)
174181
assert client.post('/terminology/18274/load').status_code == 200

tests/test_collections.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
# Unit test
22
import pytest
33
import tempfile
4-
from lib import CollectionRegistry
5-
from lib.rdf import createTripleStore
4+
from lib import CollectionRegistry, createTripleStore
65

76

87
@pytest.fixture

tests/test_rdf.py

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# Unit test
22
from lib import triple_iterator, RDFFilter, ValidationError
3-
from lib.triplestores import createTripleStore
43

54

65
def parse(source, filter=None, unique=False):
@@ -20,7 +19,7 @@ def fail(file, error):
2019
assert e.to_dict() == error
2120

2221

23-
def _test_parsing():
22+
def test_parsing():
2423
assert len(parse("tests/skos.rdf")) == 377
2524
assert len(parse("tests/rdf.zip")) == 4
2625
assert len(parse("tests/iri.ttl")) == 1
@@ -63,27 +62,10 @@ def _test_parsing():
6362
})
6463

6564

66-
def _test_filter():
65+
def test_filter():
6766
triples = parse("tests/filter.ttl")
6867
assert len(triples) == 7
6968

7069
filter = RDFFilter(disallow_subject_ns=("http://www.cidoc-crm.org/cidoc-crm/"))
7170
triples = parse("tests/filter.ttl", filter, True)
7271
assert len(triples) == 3
73-
74-
75-
def test_store():
76-
store = createTripleStore()
77-
78-
store.insert('http://example.org/', '_:b1 dct:title "foo"')
79-
assert store.query("SELECT * { ?s ?p ?o }") == [{
80-
's': {'type': 'bnode', 'value': 'b1'},
81-
'p': {'type': 'uri', 'value': 'http://purl.org/dc/terms/title'},
82-
'o': {'type': 'literal', 'value': 'foo'}
83-
}]
84-
85-
return
86-
87-
store.store_file('http://example.org/1', "tests/filter.ttl")
88-
query = "SELECT * { GRAPH <http://example.org/1> { ?s ?b ?o } }"
89-
assert len(store.query(query)) == 6

tests/test_triplestores.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Unit test
2+
from lib.triplestores import createTripleStore
3+
from rdflib import URIRef, Literal, BNode
4+
5+
6+
def test_store():
7+
store = createTripleStore()
8+
9+
store.insert('http://example.org/', '_:b1 dct:title "foo"')
10+
assert store.query("SELECT * { ?s ?p ?o }") == [{
11+
's': {'type': 'bnode', 'value': 'b1'},
12+
'p': {'type': 'uri', 'value': 'http://purl.org/dc/terms/title'},
13+
'o': {'type': 'literal', 'value': 'foo'}
14+
}]
15+
assert store.query("SELECT * { ?s ?p ?o }", "rdflib") == [{
16+
's': BNode('b1'),
17+
'p': URIRef('http://purl.org/dc/terms/title'),
18+
'o': Literal("foo")
19+
}]
20+
assert store.query("SELECT * { ?s ?p ?o }", "n3") == [{
21+
's': '_:b1',
22+
'p': '<http://purl.org/dc/terms/title>',
23+
'o': '"foo"'
24+
}]
25+
assert store.query("SELECT * { ?s ?p ?o }", "nq") == '_:b1 <http://purl.org/dc/terms/title> "foo" .'
26+
27+
store.store_file('http://example.org/1', "tests/filter.ttl")
28+
query = "SELECT * { GRAPH <http://example.org/1> { ?s ?b ?o } }"
29+
assert len(store.query(query)) == 6

0 commit comments

Comments
 (0)