44from pyld import jsonld
55from .errors import ServerError
66from .walk import walk
7- import lightrdf
7+ from pyoxigraph import parse , RdfFormat
88
99
1010def jsonld2nt (doc , context ):
@@ -88,19 +88,14 @@ def sparql_to_rdf(binding):
8888
8989def triple_iterator (source , log ):
9090 """Recursively extract RDF triples from a file, directory and/or ZIP archive."""
91- rdfparser = lightrdf .Parser ()
9291 for name , path , archive in walk (source ):
9392 format = None
9493 if name .endswith (".ttl" ):
95- format = "turtle"
94+ format = RdfFormat . TURTLE
9695 elif name .endswith (".nt" ):
97- format = "nt"
98- elif name .endswith (".owl" ):
99- format = "owl"
100- elif name .endswith (".rdf" ):
101- format = "xml"
102- elif name .endswith (".xml" ):
103- format = "xml"
96+ format = RdfFormat .N_TRIPLES
97+ elif name .endswith (".rdf" ) or name .endswith (".xml" ):
98+ format = RdfFormat .RDF_XML
10499 else :
105100 continue
106101
@@ -112,7 +107,7 @@ def triple_iterator(source, log):
112107 base = f"file://{ file } "
113108
114109 # Check whether XML file is RDF/XML
115- if format == "xml" :
110+ if format == RdfFormat . RDF_XML :
116111 f = open (file , "r" ) if type (file ) is str else file
117112 # FIXME: this requires all XML files to be UTF-8!
118113 xml = f .read ()
@@ -125,8 +120,11 @@ def triple_iterator(source, log):
125120 try :
126121 log .append (f"Extracting RDF from { base } as { format } " )
127122 # TODO: pass errors as warnings to logger instead of STDERR
128- for triple in rdfparser .parse (file , format = format ):
129- yield triple
123+ if type (file ) is str :
124+ file = open (file , "rb" )
125+ for triple in parse (file , format = format , lenient = True , without_named_graphs = True ):
126+ # TODO: check whether IRIs are valid!
127+ yield str (triple .subject ), str (triple .predicate ), str (triple .object )
130128 except Exception as e :
131129 log .append (f"Error parsing { base } : { e } " )
132130 raise e
0 commit comments