Skip to content

Commit d060f98

Browse files
committed
Replace lightrdf with pyoxigraph (#44)
1 parent 5ec6074 commit d060f98

File tree

4 files changed

+15
-17
lines changed

4 files changed

+15
-17
lines changed

lib/rdf.py

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from pyld import jsonld
55
from .errors import ServerError
66
from .walk import walk
7-
import lightrdf
7+
from pyoxigraph import parse, RdfFormat
88

99

1010
def jsonld2nt(doc, context):
@@ -88,19 +88,14 @@ def sparql_to_rdf(binding):
8888

8989
def triple_iterator(source, log):
9090
"""Recursively extract RDF triples from a file, directory and/or ZIP archive."""
91-
rdfparser = lightrdf.Parser()
9291
for name, path, archive in walk(source):
9392
format = None
9493
if name.endswith(".ttl"):
95-
format = "turtle"
94+
format = RdfFormat.TURTLE
9695
elif name.endswith(".nt"):
97-
format = "nt"
98-
elif name.endswith(".owl"):
99-
format = "owl"
100-
elif name.endswith(".rdf"):
101-
format = "xml"
102-
elif name.endswith(".xml"):
103-
format = "xml"
96+
format = RdfFormat.N_TRIPLES
97+
elif name.endswith(".rdf") or name.endswith(".xml"):
98+
format = RdfFormat.RDF_XML
10499
else:
105100
continue
106101

@@ -112,7 +107,7 @@ def triple_iterator(source, log):
112107
base = f"file://{file}"
113108

114109
# Check whether XML file is RDF/XML
115-
if format == "xml":
110+
if format == RdfFormat.RDF_XML:
116111
f = open(file, "r") if type(file) is str else file
117112
# FIXME: this requires all XML files to be UTF-8!
118113
xml = f.read()
@@ -125,8 +120,11 @@ def triple_iterator(source, log):
125120
try:
126121
log.append(f"Extracting RDF from {base} as {format}")
127122
# TODO: pass errors as warnings to logger instead of STDERR
128-
for triple in rdfparser.parse(file, format=format):
129-
yield triple
123+
if type(file) is str:
124+
file = open(file, "rb")
125+
for triple in parse(file, format=format, lenient=True, without_named_graphs=True):
126+
# TODO: check whether IRIs are valid!
127+
yield str(triple.subject), str(triple.predicate), str(triple.object)
130128
except Exception as e:
131129
log.append(f"Error parsing {base}: {e}")
132130
raise e

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,6 @@ requests
55
datahugger
66
jsonschema
77
SPARQLWrapper
8-
lightrdf
8+
pyoxigraph
99
rdflib
1010
PyLD

tests/invalid.nt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
<http://lobid.org/organisations/DE-MUS-486617#!> <http://purl.org/lobid/lv#picaPPN> "1018736557" .
2-
<http://lobid.org/organisations/DE-MUS-486617#!> <https://schema.org/url> <http://www.magdeburg-tourist.de/index.phtml?sNavID=115.9&mNavID=37.9&object=tx|115.48.1> .
1+
<http://lobid.org/organisations/DE-MUS-486617#!> <http://purl.org/lobid/lv#picaPPN> "1018736557 .
2+
http://lobid.org/organisations/DE-MUS-486617#!> <https://schema.org/url> <http://www.magdeburg-tourist.de/index.phtml?sNavID=115.9&mNavID=37.9&object=tx|115.48.1> .
33
<http://lobid.org/organisations/DE-MUS-486617#!> <https://d-nb.info/standards/elementset/gnd#gndIdentifier> "5095833-1" .

tests/test_api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,7 @@ def test_mappings(client):
366366
f'Retrieving source {cwd}/tests/mappings.ndjson from data directory',
367367
'Converting JSKOS mappings to RDF mapping triples',
368368
'Processed 2 lines into 1 mappings',
369-
f'Extracting RDF from file://{stage}/mappings/1/original.ttl as turtle',
369+
f'Extracting RDF from file://{stage}/mappings/1/original.ttl as Turtle',
370370
'Removed 0 triples, changed 0 triples, kept 1 triples.',
371371
'done']
372372

0 commit comments

Comments
 (0)