From 654bc787a5449f7e331860a67cb993226ac99585 Mon Sep 17 00:00:00 2001 From: Gunnar Aastrand Grimnes Date: Tue, 16 May 2017 12:05:26 +0200 Subject: [PATCH 1/6] also register sparql result parsers by content-type and a single parser for graph-based results --- rdflib/plugin.py | 20 ++++++++++++ rdflib/plugins/sparql/results/graph.py | 18 +++++++++++ rdflib/plugins/sparql/results/xmlresults.py | 35 +++++++-------------- rdflib/query.py | 7 +++-- 4 files changed, 53 insertions(+), 27 deletions(-) create mode 100644 rdflib/plugins/sparql/results/graph.py diff --git a/rdflib/plugin.py b/rdflib/plugin.py index d5e6c21d8..a98277150 100644 --- a/rdflib/plugin.py +++ b/rdflib/plugin.py @@ -322,12 +322,32 @@ def plugins(name=None, kind=None): register( 'xml', ResultParser, 'rdflib.plugins.sparql.results.xmlresults', 'XMLResultParser') +register( + 'application/sparql-results+xml', ResultParser, + 'rdflib.plugins.sparql.results.xmlresults', 'XMLResultParser') + +register( + 'application/rdf+xml', ResultParser, + 'rdflib.plugins.sparql.results.graph', 'GraphResultParser') + + register( 'json', ResultParser, 'rdflib.plugins.sparql.results.jsonresults', 'JSONResultParser') +register( + 'application/sparql-results+json', ResultParser, + 'rdflib.plugins.sparql.results.jsonresults', 'JSONResultParser') + register( 'csv', ResultParser, 'rdflib.plugins.sparql.results.csvresults', 'CSVResultParser') +register( + 'text/csv', ResultParser, + 'rdflib.plugins.sparql.results.csvresults', 'CSVResultParser') + register( 'tsv', ResultParser, 'rdflib.plugins.sparql.results.tsvresults', 'TSVResultParser') +register( + 'text/tab-separated-values', ResultParser, + 'rdflib.plugins.sparql.results.tsvresults', 'TSVResultParser') diff --git a/rdflib/plugins/sparql/results/graph.py b/rdflib/plugins/sparql/results/graph.py new file mode 100644 index 000000000..b02aab7df --- /dev/null +++ b/rdflib/plugins/sparql/results/graph.py @@ -0,0 +1,18 @@ +from rdflib import Graph + +from rdflib.query import ( + Result, + ResultParser, + ResultSerializer, + ResultException +) + +class GraphResultParser(ResultParser): + + def parse(self, source, content_type): + + res = Result('CONSTRUCT') # hmm - or describe?type_) + res.graph = Graph() + res.graph.parse(source, format=content_type) + + return res diff --git a/rdflib/plugins/sparql/results/xmlresults.py b/rdflib/plugins/sparql/results/xmlresults.py index 9e0019198..ba35749cc 100644 --- a/rdflib/plugins/sparql/results/xmlresults.py +++ b/rdflib/plugins/sparql/results/xmlresults.py @@ -37,25 +37,18 @@ class XMLResultParser(ResultParser): - def parse(self, source): + def parse(self, source, content_type=None): return XMLResult(source) class XMLResult(Result): - def __init__(self, source): + def __init__(self, source, content_type=None): - xmlstring = source.read() - - if isinstance(xmlstring, text_type): - xmlstring = xmlstring.encode('utf-8') try: parser = etree.XMLParser(huge_tree=True) - tree = etree.parse(BytesIO(xmlstring), parser) + tree = etree.parse(source, parser) except TypeError: - tree = etree.fromstring(xmlstring) - except Exception as e: - log.exception("Error parsing XML results: %s"%xmlstring) - raise e + tree = etree.parse(source) boolean = tree.find(RESULTS_NS_ET + 'boolean') results = tree.find(RESULTS_NS_ET + 'results') @@ -65,18 +58,11 @@ def __init__(self, source): elif results is not None: type_ = 'SELECT' else: - g = Graph() - try: - g.parse(data=xmlstring) - if len(g) == 0: - raise - type_ = 'CONSTRUCT' - - except: - raise ResultException( - "No RDF Graph, result-bindings or boolean answer found!") + raise ResultException( + "No RDF result-bindings or boolean answer found!") Result.__init__(self, type_) + if type_ == 'SELECT': self.bindings = [] for result in results: @@ -90,10 +76,11 @@ def __init__(self, source): './%shead/%svariable' % ( RESULTS_NS_ET, RESULTS_NS_ET))] - elif type_ == 'ASK': + else: self.askAnswer = boolean.text.lower().strip() == "true" - elif type_ == 'CONSTRUCT': - self.graph = g + + + def parseTerm(element): diff --git a/rdflib/query.py b/rdflib/query.py index 432beeb00..9cfe5a26b 100644 --- a/rdflib/query.py +++ b/rdflib/query.py @@ -195,10 +195,11 @@ def _set_bindings(self, b): _get_bindings, _set_bindings, doc="a list of variable bindings as dicts") @staticmethod - def parse(source, format='xml', **kwargs): + def parse(source=None, format=None, content_type=None, **kwargs): from rdflib import plugin - parser = plugin.get(format, ResultParser)() - return parser.parse(source, **kwargs) + parser = plugin.get(format or content_type or 'xml', ResultParser)() + + return parser.parse(source, content_type=content_type, **kwargs) def serialize( self, destination=None, encoding="utf-8", format='xml', **args): From 7255e6db12e6479d10b3b738fef1529e393c9ff7 Mon Sep 17 00:00:00 2001 From: Gunnar Aastrand Grimnes Date: Tue, 16 May 2017 12:25:49 +0200 Subject: [PATCH 2/6] Remove the SPARQLWrapper dependency Replace with a very thin wrapper for modern RDF stores speaking SPARQL 1.1 protocol. This is based on `requests`. --- rdflib/plugins/stores/sparqlstore.py | 179 +++++++------------------ rdflib/plugins/stores/sparqlwrapper.py | 108 +++++++++++++++ setup.py | 4 +- 3 files changed, 159 insertions(+), 132 deletions(-) create mode 100644 rdflib/plugins/stores/sparqlwrapper.py diff --git a/rdflib/plugins/stores/sparqlstore.py b/rdflib/plugins/stores/sparqlstore.py index 2be781d7a..285dbfb0d 100644 --- a/rdflib/plugins/stores/sparqlstore.py +++ b/rdflib/plugins/stores/sparqlstore.py @@ -13,67 +13,20 @@ import re import collections -import warnings -import contextlib -try: - from SPARQLWrapper import SPARQLWrapper, XML, JSON, POST, GET, URLENCODED, POSTDIRECTLY -except ImportError: - raise Exception( - "SPARQLWrapper not found! SPARQL Store will not work." + - "Install with 'pip install SPARQLWrapper'") +from .sparqlwrapper import SPARQLWrapper from rdflib.plugins.stores.regexmatching import NATIVE_REGEX from rdflib.store import Store -from rdflib.query import Result from rdflib import Variable, BNode from rdflib.graph import DATASET_DEFAULT_GRAPH_ID from rdflib.term import Node from six import string_types -class NSSPARQLWrapper(SPARQLWrapper): - nsBindings = {} - - def setNamespaceBindings(self, bindings): - """ - A shortcut for setting namespace bindings that will be added - to the prolog of the query - - @param bindings: A dictionary of prefixs to URIs - """ - self.nsBindings.update(bindings) - - def setQuery(self, query): - """ - Set the SPARQL query text. Note: no check is done on the - validity of the query (syntax or otherwise) by this module, - except for testing the query type (SELECT, ASK, etc). - - Syntax and validity checking is done by the SPARQL service itself. - - @param query: query text - @type query: string - @bug: #2320024 - """ - self.queryType = self._parseQueryType(query) - self.queryString = self.injectPrefixes(query) - - def injectPrefixes(self, query): - prefixes = list(self.nsBindings.items()) - if not prefixes: - return query - return '\n'.join([ - '\n'.join(['PREFIX %s: <%s>' % (k, v) for k, v in prefixes]), - '', # separate prefixes from query with an empty line - query - ]) - - BNODE_IDENT_PATTERN = re.compile('(?P