diff --git a/SPARQLWrapper/KeyCaseInsensitiveDict.py b/SPARQLWrapper/KeyCaseInsensitiveDict.py index ff7f59f..f20a040 100644 --- a/SPARQLWrapper/KeyCaseInsensitiveDict.py +++ b/SPARQLWrapper/KeyCaseInsensitiveDict.py @@ -18,15 +18,19 @@ _V = TypeVar("_V") + class KeyCaseInsensitiveDict(Dict[str, _V]): """ A simple implementation of a key case-insensitive dictionary """ - def __init__(self, d: Mapping[str, _V]={}) -> None: + def __init__(self, d=None) -> None: """ :param dict d: The source dictionary. """ + super().__init__() + if d is None: + d = {} for k, v in d.items(): self[k] = v diff --git a/SPARQLWrapper/SPARQLExceptions.py b/SPARQLWrapper/SPARQLExceptions.py index 65cc25c..eb9ba47 100644 --- a/SPARQLWrapper/SPARQLExceptions.py +++ b/SPARQLWrapper/SPARQLExceptions.py @@ -35,13 +35,9 @@ def __init__(self, response: Optional[bytes] = None): :param string response: The server response """ if response: - formatted_msg = "%s: %s. \n\nResponse:\n%r" % ( - self.__class__.__name__, - self.msg, - response, - ) + formatted_msg = f"{self.__class__.__name__}: {self.msg}. \n\nResponse:\n{response}" else: - formatted_msg = "%s: %s." % (self.__class__.__name__, self.msg) + formatted_msg = f"{self.__class__.__name__}: {self.msg}." super(SPARQLWrapperException, self).__init__(formatted_msg) diff --git a/SPARQLWrapper/SmartWrapper.py b/SPARQLWrapper/SmartWrapper.py index e88c155..c2b76ff 100644 --- a/SPARQLWrapper/SmartWrapper.py +++ b/SPARQLWrapper/SmartWrapper.py @@ -20,11 +20,13 @@ """ +import contextlib from typing import Any, Dict, List, Optional, Tuple, Union from SPARQLWrapper.Wrapper import JSON, SELECT, QueryResult from SPARQLWrapper.Wrapper import SPARQLWrapper as SW + ###################################################################################### @@ -66,15 +68,10 @@ def __init__(self, variable: str, binding: Dict[str, str]) -> None: self.type = binding["type"] self.lang = None self.datatype = None - try: + with contextlib.suppress(Exception): self.lang = binding["xml:lang"] - except: - # no lang is set - pass - try: + with contextlib.suppress(Exception): self.datatype = binding["datatype"] - except: - pass def __repr__(self) -> str: cls = self.__class__.__name__ @@ -119,31 +116,16 @@ def __init__(self, retval: QueryResult): self.fullResult = retval._convertJSON() self.head = self.fullResult["head"] self.variables: Optional[List[str]] = None - try: + with contextlib.suppress(Exception): self.variables = self.fullResult["head"]["vars"] - except: - pass - self.bindings: List[Dict[str, Value]] = [] - try: + with contextlib.suppress(Exception): for b in self.fullResult["results"]["bindings"]: - # This is a single binding. It is a dictionary per variable; each value is a dictionary again - # that has to be converted into a Value instance - newBind = {} - # type error: Item "None" of "Union[List[str], Any, None]" has no attribute "__iter__" (not iterable) - for key in self.variables: # type: ignore [union-attr] - if key in b: - # there is a real binding for this key - newBind[key] = Value(key, b[key]) + newBind = {key: Value(key, b[key]) for key in self.variables if key in b} self.bindings.append(newBind) - except: - pass - self.askResult = False - try: + with contextlib.suppress(Exception): self.askResult = self.fullResult["boolean"] - except: - pass def getValues(self, key: str) -> Optional[List[Value]]: """A shorthand for the retrieval of all bindings for a single key. It is @@ -156,7 +138,7 @@ def getValues(self, key: str) -> Optional[List[Value]]: """ try: return [b[key] for b in self[key]] - except: + except Exception: return [] def __contains__(self, key: Union[str, List[str], Tuple[str]]) -> bool: @@ -180,13 +162,9 @@ def __contains__(self, key: Union[str, List[str], Tuple[str]]) -> bool: return False for b in self.bindings: # try to find a binding where all key elements are present - if False in [k in b for k in key]: - # this is not a binding for the key combination, move on... - continue - else: + if False not in [k in b for k in key]: # yep, this one is good! return True - return False else: # type error: Unsupported right operand type for in ("Optional[List[str]]") if key not in self.variables: # type: ignore [operator] @@ -194,7 +172,7 @@ def __contains__(self, key: Union[str, List[str], Tuple[str]]) -> bool: for b in self.bindings: if key in b: return True - return False + return False def __getitem__(self, key: Union[slice, str, List[str]]) -> List[Dict[str, Value]]: """Emulation of the ``obj[key]`` operator. Slice notation is also available. @@ -222,18 +200,18 @@ def _checkKeys(keys: Union[List[Any], Tuple[Any, ...]]) -> bool: for k in keys: # type error: Unsupported right operand type for in ("Optional[List[str]]") if ( - not isinstance(k, str) - or k not in self.variables # type: ignore [operator] + not isinstance(k, str) + or k not in self.variables # type: ignore [operator] ): return False return True def _nonSliceCase( - key: Union[ - str, - List[Any], - Tuple[Any], - ] + key: Union[ + str, + List[Any], + Tuple[Any], + ] ) -> Union[List[Any], bool, Tuple[Any]]: # type error: Unsupported right operand type for in ("Optional[List[str]]") if isinstance(key, str) and key != "" and key in self.variables: # type: ignore[operator] @@ -273,7 +251,7 @@ def _nonSliceCase( # if we got that far, we should be all right! retval.append(b) # if retval is of zero length, no hit; an exception should be raised to stay within the python style - if len(retval) == 0: + if not retval: raise IndexError return retval @@ -330,7 +308,7 @@ def setReturnFormat(self, format: Optional[str]) -> None: """ pass - def query(self) -> Union[Bindings, QueryResult]: # type: ignore[override] + def query(self) -> Union[Bindings, QueryResult]: # type: ignore[override] """ Execute the query and do an automatic conversion. @@ -345,13 +323,10 @@ def query(self) -> Union[Bindings, QueryResult]: # type: ignore[override] """ res = super(SPARQLWrapper2, self).query() - if self.queryType == SELECT: - return Bindings(res) - else: - return res + return Bindings(res) if self.queryType == SELECT else res def queryAndConvert( # type: ignore[override] - self, + self, ) -> Union[Union[Bindings, QueryResult], QueryResult.ConvertResult]: """This is here to override the inherited method; it is equivalent to :class:`query`. diff --git a/SPARQLWrapper/Wrapper.py b/SPARQLWrapper/Wrapper.py index 977380a..c71e450 100644 --- a/SPARQLWrapper/Wrapper.py +++ b/SPARQLWrapper/Wrapper.py @@ -21,7 +21,6 @@ :requires: `RDFLib `_ package. """ - import base64 import json import re @@ -29,20 +28,23 @@ import urllib.parse import urllib.request import warnings +import xml.dom.minidom from http.client import HTTPResponse from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Tuple, Union, cast +from xml.dom.minidom import Document from urllib.request import ( urlopen as urlopener, ) # don't change the name: tests override it from xml.dom.minidom import Document, parse +import deprecation +import requests + from SPARQLWrapper import __agent__ +from SPARQLWrapper import __version__ if TYPE_CHECKING: from rdflib import Graph - - - from .KeyCaseInsensitiveDict import KeyCaseInsensitiveDict from .SPARQLExceptions import ( EndPointInternalError, @@ -82,7 +84,8 @@ """to be used to set HTTP method ``POST``.""" _allowedRequests = [POST, GET] -# Possible HTTP Authentication methods + +# Possible HTTP Authentication methods - DEPRECATED BASIC = "BASIC" """to be used to set ``BASIC`` HTTP Authentication method.""" DIGEST = "DIGEST" @@ -150,8 +153,8 @@ # joseki will be o.k., too. The situation with turtle and n3 is even more confusing because the text/n3 and text/turtle # mime types have just been proposed and not yet widely used... -_SPARQL_DEFAULT = ["application/sparql-results+xml", "application/rdf+xml", "*/*"] -_SPARQL_XML = ["application/sparql-results+xml"] +_SPARQL_DEFAULT = ["application/xml", "application/sparql-results+xml", "application/rdf+xml", "*/*"] +_SPARQL_XML = ["application/xml", "application/sparql-results+xml"] _SPARQL_JSON = [ "application/sparql-results+json", "application/json", @@ -167,7 +170,7 @@ "text/n3", ] _RDF_JSONLD = ["application/ld+json", "application/x-json+ld"] -_CSV = ["text/csv"] +_CSV = ["application/csv", "text/csv"] _TSV = ["text/tab-separated-values"] _XML = ["application/xml"] _ALL = ["*/*"] @@ -183,6 +186,7 @@ # parameters they do not understand. So: just repeat all possibilities in the final URI. UGLY!!!!!!! _returnFormatSetting = ["format", "output", "results"] + ####################################################################################################### @@ -194,70 +198,48 @@ class SPARQLWrapper(object): are retained from one query to the next (in other words, only the query string changes). The instance can also be reset to its initial values using the :meth:`resetQuery` method. - :ivar endpoint: SPARQL endpoint's URI. - :vartype endpoint: string - :ivar updateEndpoint: SPARQL endpoint's URI for SPARQL Update operations (if it's a different one). - The **default** value is ``None``. - :vartype updateEndpoint: string - :ivar agent: The User-Agent for the HTTP request header. The **default** value is an autogenerated string using t - he SPARQLWrapper version code. - :vartype agent: string - :ivar _defaultGraph: URI for the default graph. The value can be set either via an explicit call - :func:`addParameter("default-graph-uri", uri)` or as part of the query string. The **default** - value is ``None``. - :vartype _defaultGraph: string - :ivar user: The username of the credentials for querying the current endpoint. The value can be set an explicit - call :func:`setCredentials`. The **default** value is ``None``. - :vartype user: string - :ivar passwd: The password of the credentials for querying the current endpoint. The value can be set an explicit - call :func:`setCredentials`. The **default** value is ``None``. - :vartype passwd: string - :ivar http_auth: HTTP Authentication type. The **default** value is :data:`BASIC`. Possible values are - :data:`BASIC` or :data:`DIGEST`. It is used only in case the credentials are set. - :vartype http_auth: string - :ivar onlyConneg: Option for allowing (or not) **only** HTTP Content Negotiation (so dismiss the use of HTTP - parameters). The default value is ``False``. - :vartype onlyConneg: boolean - :ivar customHttpHeaders: Custom HTTP Headers to be included in the request. It is a dictionary where keys are the - header field and values are the header values. **Important**: These headers override previous values (including - ``Content-Type``, ``User-Agent``, ``Accept`` and ``Authorization`` if they are present). - :vartype customHttpHeaders: dict - :ivar timeout: The timeout (in seconds) to use for querying the endpoint. - :vartype timeout: int - :ivar queryString: The SPARQL query text. - :vartype queryString: string - :ivar queryType: The type of SPARQL query (aka SPARQL query form), like :data:`CONSTRUCT`, :data:`SELECT`, - :data:`ASK`, :data:`DESCRIBE`, :data:`INSERT`, :data:`DELETE`, :data:`CREATE`, :data:`CLEAR`, :data:`DROP`, - :data:`LOAD`, :data:`COPY`, :data:`MOVE` or :data:`ADD` (constants in this module). - :vartype queryType: string - :ivar returnFormat: The return format.\ - No local check is done, so the parameter is simply sent to the endpoint. Eg, if the value is set to :data:`JSON` - and a construct query is issued, it is up to the endpoint to react or not, this wrapper does not check.\ - The possible values are :data:`JSON`, :data:`XML`, :data:`TURTLE`, :data:`N3`, :data:`RDF`, :data:`RDFXML`, - :data:`CSV`, :data:`TSV`, :data:`JSONLD` (constants in this module).\ - The **default** value is :data:`XML`. - :vartype returnFormat: string - :ivar requestMethod: The request method for query or update operations. The possibles values are URL-encoded - (:data:`URLENCODED`) or POST directly (:data:`POSTDIRECTLY`). - :vartype requestMethod: string - :ivar method: The invocation method (HTTP verb). The **default** value is :data:`GET`, but it can be set to - :data:`POST`. - :vartype method: string - :ivar parameters: The parameters of the request (key/value pairs in a dictionary). - :vartype parameters: dict - :ivar _defaultReturnFormat: The default return format. It is used in case the same class instance is reused for - subsequent queries. - :vartype _defaultReturnFormat: string - - :cvar prefix_pattern: regular expression used to remove base/prefixes in the process of determining the query type. - :vartype prefix_pattern: :class:`re.RegexObject`, a compiled regular expression. See the :mod:`re` module of Python - :cvar pattern: regular expression used to determine whether a query (without base/prefixes) is of type - :data:`CONSTRUCT`, :data:`SELECT`, :data:`ASK`, :data:`DESCRIBE`, :data:`INSERT`, :data:`DELETE`, :data:`CREATE`, - :data:`CLEAR`, :data:`DROP`, :data:`LOAD`, :data:`COPY`, :data:`MOVE` or :data:`ADD`. - :vartype pattern: :class:`re.RegexObject`, a compiled regular expression. See the :mod:`re` module of Python - :cvar comments_pattern: regular expression used to remove comments from a query. - :vartype comments_pattern: :class:`re.RegexObject`, a compiled regular expression. See the :mod:`re` module of - Python + Attributes: + endpoint: str -> SPARQL endpoint's URI. + updateEndpoint: str -> SPARQL endpoint's URI for SPARQL Update operations (if it's a different one). + **Default** is ``None``. + agent: str -> User-Agent for HTTP request header. **Default** value is an autogenerated string using + SPARQLWrapper version code. + _defaultGraph: str -> URI for the default graph. Value can be set either via explicit call + :func:`addParameter("default-graph-uri", uri)` or as part of the query string. + **Default** value is ``None``. + onlyConneg: bool -> Option for allowing (or not) **only** HTTP Content Negotiation (so dismiss the use of HTTP + parameters). **Default** value is ``False``. + customHttpHeaders: dict -> Custom HTTP Headers to be included in the request. A dictionary where keys are header + field and values are header values. **Important**: These headers override previous values (including + ``Content-Type``, ``User-Agent`` and ``Accept`` if present). + timeout: int -> Timeout (in seconds) to use for endpoint querying. + queryString: str -> The SPARQL query text. + queryType: str -> Type of SPARQL query (aka SPARQL query form), like :data:`CONSTRUCT`, :data:`SELECT`, + :data:`ASK`, :data:`DESCRIBE`, :data:`INSERT`, :data:`DELETE`, :data:`CREATE`, :data:`CLEAR`, :data:`DROP`, + :data:`LOAD`, :data:`COPY`, :data:`MOVE` or :data:`ADD` (constants in this module). + returnFormat: str -> The return format. **Important:** No local check is done, parameter is simply sent to + endpoint. Eg, if the value is set to :data:`JSON` and a construct query is issued, it is up to the endpoint + to react or not. Possible values are :data:`JSON`, :data:`XML`, :data:`TURTLE`, :data:`N3`, :data:`RDF`, + :data:`RDFXML`, :data:`CSV`, :data:`TSV`, :data:`JSONLD` (constants in this module). + The **default** value is :data:`XML`. + requestMethod: str -> The request method for query or update operations. The possibles values are URL-encoded + (:data:`URLENCODED`) or POST directly (:data:`POSTDIRECTLY`). + method: str -> Invocation method (HTTP verb).**Default** value is :data:`GET`, but can be set to :data:`POST`. + parameters: dict -> The parameters of the request (key/value pairs in a dictionary). + _defaultReturnFormat: str -> Default return format. Used in case the same class instance is reused for + subsequent queries + session: requests.Session -> Expects already authorized session, if authorization is needed, otherwise a new one + is generated without any authentication. **Default** is ``None``. + prefix_pattern: class -> Regular expression used to remove base/prefixes in process of query + type determination. :class:`re.RegexObject`, a compiled regular expression. See the :mod:`re` module + of Python. + pattern: class -> Regular expression used to determine whether a query (without base/prefixes) + is of type :data:`CONSTRUCT`, :data:`SELECT`, :data:`ASK`, :data:`DESCRIBE`, :data:`INSERT`, :data:`DELETE`, + :data:`CREATE`, :data:`CLEAR`, :data:`DROP`, :data:`LOAD`, :data:`COPY`, :data:`MOVE` or :data:`ADD`. + :class:`re.RegexObject`, a compiled regular expression. See the :mod:`re` module of Python. + comments_pattern: class -> regular expression used to remove comments from a query. + :class:`re.RegexObject`, a compiled regular expression. See the :mod:`re` module of Python. + """ prefix_pattern = re.compile( @@ -271,12 +253,13 @@ class SPARQLWrapper(object): comments_pattern = re.compile(r"(^|\n)\s*#.*?\n") def __init__( - self, - endpoint: str, - updateEndpoint: Optional[str] = None, - returnFormat: str = XML, - defaultGraph: Optional[str] = None, - agent: str = __agent__, + self, + endpoint: str, + updateEndpoint: Optional[str] = None, + returnFormat: str = XML, + defaultGraph: Optional[str] = None, + session: Optional[requests.Session] = None, + agent: str = __agent__, ) -> None: """ Class encapsulating a full SPARQL call. @@ -297,20 +280,21 @@ def __init__( :func:`addParameter("default-graph-uri", uri)` or as part of the query string. The **default** value is ``None``. :type defaultGraph: string - :param agent: The User-Agent for the HTTP request header. The **default** value is an autogenerated string + :param agent: The User-Agent for the HTTP request header. **Default** value is an autogenerated string using the SPARQLWrapper version number. :type agent: string + :param session: The Session for the endpoint authentication. **Default** value is a new Session without + any Authentication values + :type session: requests.Session """ self.endpoint = endpoint - self.updateEndpoint = updateEndpoint if updateEndpoint else endpoint + self.updateEndpoint = updateEndpoint or endpoint self.agent = agent - self.user: Optional[str] = None - self.passwd: Optional[str] = None - self.http_auth = BASIC self._defaultGraph = defaultGraph self.onlyConneg = False # Only Content Negotiation self.customHttpHeaders: Dict[str, str] = {} self.timeout: Optional[int] + self.session: requests.Session = session if returnFormat in _allowedFormats: self._defaultReturnFormat = returnFormat @@ -346,8 +330,7 @@ def setReturnFormat(self, format: str) -> None: self.returnFormat = format else: warnings.warn( - "Ignore format '%s'; current instance supports: %s." - % (format, ", ".join(_allowedFormats)), + f"""Ignore format '{format}'; current instance supports: {", ".join(_allowedFormats)}.""", SyntaxWarning, ) @@ -369,7 +352,7 @@ def setTimeout(self, timeout: int) -> None: :param timeout: Timeout in seconds. :type timeout: int """ - self.timeout = int(timeout) + self.timeout = timeout def setOnlyConneg(self, onlyConneg: bool) -> None: """Set this option for allowing (or not) only HTTP Content Negotiation (so dismiss the use of HTTP parameters). @@ -396,32 +379,38 @@ def setRequestMethod(self, method: str) -> None: if method in _REQUEST_METHODS: self.requestMethod = method else: - warnings.warn("invalid update method '%s'" % method, RuntimeWarning) + warnings.warn(f"invalid update method '{method}'", RuntimeWarning) + @deprecation.deprecated(deprecated_in="1.6.0", current_version=__version__, + details="Use addParameter('default-graph-uri', uri)` instead") def addDefaultGraph(self, uri: str) -> None: """ Add a default graph URI. - .. deprecated:: 1.6.0 Use :func:`addParameter("default-graph-uri", uri)` instead of this - method. + **DEPRECATED**: In version 1.6.0 - Use :func:`addParameter("default-graph-uri", uri)` + instead of this method. :param uri: URI of the default graph. :type uri: string """ self.addParameter("default-graph-uri", uri) + @deprecation.deprecated(deprecated_in="1.6.0", current_version=__version__, + details="Use addParameter('named-graph-uri', uri) instead") def addNamedGraph(self, uri: str) -> None: """ Add a named graph URI. - .. deprecated:: 1.6.0 Use :func:`addParameter("named-graph-uri", uri)` instead of this - method. + **DEPRECATED**: In version 1.6.0 - Use :func:`addParameter("named-graph-uri", uri)` + instead of this method. :param uri: URI of the named graph. :type uri: string """ self.addParameter("named-graph-uri", uri) + @deprecation.deprecated(deprecated_in="1.6.0", current_version=__version__, + details="Use addParameter(key, value) instead") def addExtraURITag(self, key: str, value: str) -> None: """ Some SPARQL endpoints require extra key value pairs. @@ -429,7 +418,7 @@ def addExtraURITag(self, key: str, value: str) -> None: virtuoso to retrieve graphs that are not stored in its local database. Alias of :func:`addParameter` method. - .. deprecated:: 1.6.0 Use :func:`addParameter(key, value)` instead of this method + **DEPRECATED**: In version 1.6.0 - Use :func:`addParameter(key, value)` instead of this method :param key: key of the query part. :type key: string @@ -438,11 +427,14 @@ def addExtraURITag(self, key: str, value: str) -> None: """ self.addParameter(key, value) + @deprecation.deprecated(deprecated_in="1.6.0", current_version=__version__, + details="Use addParameter(key, value) instead") def addCustomParameter(self, name: str, value: str) -> bool: """ Method is kept for backwards compatibility. Historically, it "replaces" parameters instead of adding. - .. deprecated:: 1.6.0 Use :func:`addParameter(key, value)` instead of this method + **DEPRECATED**: In version deprecated:: 1.6.0 - Use :func:`addParameter(key, value)` + instead of this method :param name: name. :type name: string @@ -454,29 +446,8 @@ def addCustomParameter(self, name: str, value: str) -> bool: self.clearParameter(name) return self.addParameter(name, value) - def addParameter(self, name: str, value: str) -> bool: - """ - Some SPARQL endpoints allow extra key value pairs. - E.g., in virtuoso, one would add ``should-sponge=soft`` to the query forcing - virtuoso to retrieve graphs that are not stored in its local database. - If the parameter :attr:`query` is tried to be set, this intent is dismissed. - Returns a boolean indicating if the set has been accomplished. - - :param name: name. - :type name: string - :param value: value. - :type value: string - :return: Returns ``True`` if the adding has been accomplished, otherwise ``False``. - :rtype: bool - """ - if name in _SPARQL_PARAMS: - return False - else: - if name not in self.parameters: - self.parameters[name] = [] - self.parameters[name].append(value) - return True - + @deprecation.deprecated(deprecated_in="2.0.0", current_version=__version__, + details="Use requests.Session instead") def addCustomHttpHeader(self, httpHeaderName: str, httpHeaderValue: str) -> None: """ Add a custom HTTP header (this method can override all HTTP headers). @@ -485,6 +456,8 @@ def addCustomHttpHeader(self, httpHeaderName: str, httpHeaderValue: str) -> None ``Content-Type``, ``User-Agent``, ``Accept`` and ``Authorization`` would be overriden if the header field name is present as value of the parameter :attr:`httpHeaderName`. + **DEPRECATED**: In version deprecated: 2.0.0 - Use a prepared requests.Session instead of this method + .. versionadded:: 1.8.2 :param httpHeaderName: The header field name. @@ -494,12 +467,14 @@ def addCustomHttpHeader(self, httpHeaderName: str, httpHeaderValue: str) -> None """ self.customHttpHeaders[httpHeaderName] = httpHeaderValue + @deprecation.deprecated(deprecated_in="2.0.0", current_version=__version__, + details="Use requests.Session instead") def clearCustomHttpHeader(self, httpHeaderName: str) -> bool: """ Clear the values of a custom HTTP Header previously set. Returns a boolean indicating if the clearing has been accomplished. - .. versionadded:: 1.8.2 + **DEPRECATED**: In version deprecated: 2.0.0 - Use a prepared requests.Session instead of this method :param httpHeaderName: HTTP header name. :type httpHeaderName: string @@ -512,6 +487,28 @@ def clearCustomHttpHeader(self, httpHeaderName: str) -> bool: except KeyError: return False + def addParameter(self, name: str, value: str) -> bool: + """ + Some SPARQL endpoints allow extra key value pairs. + E.g., in virtuoso, one would add ``should-sponge=soft`` to the query forcing + virtuoso to retrieve graphs that are not stored in its local database. + If the parameter :attr:`query` is tried to be set, this intent is dismissed. + Returns a boolean indicating if the set has been accomplished. + + :param name: name. + :type name: string + :param value: value. + :type value: string + :return: Returns ``True`` if the adding has been accomplished, otherwise ``False``. + :rtype: bool + """ + if name in _SPARQL_PARAMS: + return False + elif name not in self.parameters: + self.parameters[name] = [] + self.parameters[name].append(value) + return True + def clearParameter(self, name: str) -> bool: """ Clear the values of a concrete parameter. @@ -524,19 +521,22 @@ def clearParameter(self, name: str) -> bool: """ if name in _SPARQL_PARAMS: return False - else: - try: - del self.parameters[name] - return True - except KeyError: - return False + try: + del self.parameters[name] + return True + except KeyError: + return False + @deprecation.deprecated(deprecated_in="2.0.0", current_version=__version__, + details="Use requests.session instead") def setCredentials( self, user: Optional[str], passwd: Optional[str], realm: str = "SPARQL" ) -> None: """ Set the credentials for querying the current endpoint. + **DEPRECATED**: In version deprecated: 2.0.0 - Use a prepared requests.Session instead of this method + :param user: username. :type user: string :param passwd: password. @@ -551,10 +551,14 @@ def setCredentials( self.passwd = passwd self.realm = realm + @deprecation.deprecated(deprecated_in="2.0.0", current_version=__version__, + details="Use requests.session instead") def setHTTPAuth(self, auth: str) -> None: """ Set the HTTP Authentication type. Possible values are :class:`BASIC` or :class:`DIGEST`. + **DEPRECATED**: In version deprecated: 2.0.0 - Use a prepared requests.Session instead of this method + :param auth: auth type. :type auth: string :raises TypeError: If the :attr:`auth` parameter is not an string. @@ -628,10 +632,9 @@ def _parseQueryType(self, query: str) -> Optional[str]: if r_queryType in _allowedQueryTypes: return r_queryType - else: # raise Exception("Illegal SPARQL Query; must be one of SELECT, ASK, DESCRIBE, or CONSTRUCT") - warnings.warn("unknown query type '%s'" % r_queryType, RuntimeWarning) - return SELECT + warnings.warn(f"unknown query type '{r_queryType}'", RuntimeWarning) + return SELECT def setMethod(self, method: str) -> None: """Set the invocation method. By default, this is :data:`GET`, but can be set to :data:`POST`. @@ -642,16 +645,20 @@ def setMethod(self, method: str) -> None: if method in _allowedRequests: self.method = method + @deprecation.deprecated(deprecated_in="2.0.0", current_version=__version__, + details="Use requests.Session instead") def setUseKeepAlive(self) -> None: """Make :mod:`urllib2` use keep-alive. + **DEPRECATED:: In version 2.0.0. - Use requests.Session instead + :raises ImportError: when could not be imported ``keepalive.HTTPHandler``. """ try: from keepalive import HTTPHandler # type: ignore[import] if urllib.request._opener and any( # type: ignore[attr-defined] - isinstance(h, HTTPHandler) for h in urllib.request._opener.handlers # type: ignore[attr-defined] + isinstance(h, HTTPHandler) for h in urllib.request._opener.handlers # type: ignore[attr-defined] ): # already installed return @@ -702,7 +709,7 @@ def _cleanComments(self, query: str) -> str: return re.sub(self.comments_pattern, "\n\n", query) def _getRequestEncodedParameters( - self, query: Optional[Tuple[str, str]] = None + self, query: Optional[Tuple[str, str]] = None ) -> str: """ Internal method for getting the request encoded parameters. @@ -764,20 +771,19 @@ def _getAcceptHeader(self) -> str: elif self.returnFormat == JSON: acceptHeader = ",".join(_SPARQL_JSON) elif ( - self.returnFormat == CSV + self.returnFormat == CSV ): # Allowed for SELECT and ASK (https://www.w3.org/TR/2013/REC-sparql11-protocol-20130321/#query-success) # but only described for SELECT (https://www.w3.org/TR/sparql11-results-csv-tsv/) acceptHeader = ",".join(_CSV) elif ( - self.returnFormat == TSV + self.returnFormat == TSV ): # Allowed for SELECT and ASK (https://www.w3.org/TR/2013/REC-sparql11-protocol-20130321/#query-success) # but only described for SELECT (https://www.w3.org/TR/sparql11-results-csv-tsv/) acceptHeader = ",".join(_TSV) else: acceptHeader = ",".join(_ALL) warnings.warn( - "Sending Accept header '*/*' because unexpected returned format '%s' in a '%s' SPARQL query form" - % (self.returnFormat, self.queryType), + f"Sending Accept header '*/*' because unexpected returned format '{self.returnFormat}' in a '{self.queryType}' SPARQL query form", RuntimeWarning, ) elif self.queryType in [CONSTRUCT, DESCRIBE]: @@ -785,15 +791,14 @@ def _getAcceptHeader(self) -> str: acceptHeader = ",".join(_RDF_TURTLE) elif self.returnFormat == N3: acceptHeader = ",".join(_RDF_N3) - elif self.returnFormat == XML or self.returnFormat == RDFXML: + elif self.returnFormat in [XML, RDFXML]: acceptHeader = ",".join(_RDF_XML) elif self.returnFormat == JSONLD and JSONLD in _allowedFormats: acceptHeader = ",".join(_RDF_JSONLD) else: acceptHeader = ",".join(_ALL) warnings.warn( - "Sending Accept header '*/*' because unexpected returned format '%s' in a '%s' SPARQL query form" - % (self.returnFormat, self.queryType), + f"Sending Accept header '*/*' because unexpected returned format '{self.returnFormat}' in a '{self.queryType}' SPARQL query form", RuntimeWarning, ) elif self.queryType in [ @@ -817,10 +822,15 @@ def _getAcceptHeader(self) -> str: acceptHeader = "*/*" return acceptHeader + @deprecation.deprecated(deprecated_in="2.0.0", current_version=__version__, + details="Use requests.Session instead") def _createRequest(self) -> urllib.request.Request: """Internal method to create request according a HTTP method. Returns a :class:`urllib2.Request` object of the :mod:`urllib2` Python library + **DEPRECATED:: In version 2.0.0. - Use requests.Session instead + + :raises NotImplementedError: If the HTTP authentification method is not one of the valid values: :data:`BASIC` or :data:`DIGEST`. :return: request a :class:`urllib2.Request` object of the :mod:`urllib2` Python library @@ -905,46 +915,31 @@ def _createRequest(self) -> urllib.request.Request: return request - def _query(self) -> Tuple[HTTPResponse, str]: + def _query(self) -> tuple[requests.models.Response, str]: """Internal method to execute the query. Returns the output of the - :func:`urllib2.urlopen` method of the :mod:`urllib2` Python library - - :return: tuples with the raw request plus the expected format. - :raises QueryBadFormed: If the HTTP return code is ``400``. - :raises Unauthorized: If the HTTP return code is ``401``. - :raises EndPointNotFound: If the HTTP return code is ``404``. - :raises URITooLong: If the HTTP return code is ``414``. - :raises EndPointInternalError: If the HTTP return code is ``500``. - :raises urllib2.HTTPError: If the HTTP return code is different to ``400``, ``401``, ``404``, ``414``, ``500``. + :func:`requests.Session` method of the :mod:`Requests` Python library. + The session object is passed by the user or created without any authentication. """ - request = self._createRequest() + if self.session is None: + self.session = requests.Session() - try: - if self.timeout: - response = urlopener(request, timeout=self.timeout) - else: - response = urlopener(request) - return response, self.returnFormat - except urllib.error.HTTPError as e: - if e.code == 400: - raise QueryBadFormed(e.read()) - elif e.code == 404: - raise EndPointNotFound(e.read()) - elif e.code == 401: - raise Unauthorized(e.read()) - elif e.code == 414: - raise URITooLong(e.read()) - elif e.code == 500: - raise EndPointInternalError(e.read()) - else: - raise e + accept_header = self._getAcceptHeader() + + self.session.headers = {'Accept': accept_header, + 'Content-Type': accept_header} + self.session.params = {'query': self.queryString} + + response = self.session.get(self.endpoint, timeout=self.timeout) \ + if self.timeout else self.session.get(self.endpoint) + + return response, self.returnFormat def query(self) -> "QueryResult": """ Execute the query. Exceptions can be raised if either the URI is wrong or the HTTP sends back an error (this is also the case when the query is syntactically incorrect, leading to an HTTP error sent back by the SPARQL endpoint). - The usual urllib2 exceptions are raised, which therefore cover possible SPARQL errors, too. + The usual requests exceptions are raised, which therefore cover possible SPARQL errors, too. Note that some combinations of return formats and query types may not make sense. For example, a SELECT query with Turtle response is meaningless (the output of a SELECT is not a Graph), or a CONSTRUCT @@ -975,7 +970,7 @@ def __str__(self) -> str: :return: A human-readable string of the object. :rtype: string """ - fullname = self.__module__ + "." + self.__class__.__name__ + fullname = f"{self.__module__}.{self.__class__.__name__}" items = ('"%s" : %r' % (k, v) for k, v in sorted(self.__dict__.items())) str_dict_items = "{%s}" % (",\n".join(items)) return "<%s object at 0x%016X>\n%s" % (fullname, id(self), str_dict_items) @@ -986,7 +981,7 @@ def __str__(self) -> str: class QueryResult(object): """ - Wrapper around an a query result. Users should not create instances of this class, it is + Wrapper around query result. Users should not create instances of this class, it is generated by a :func:`SPARQLWrapper.query` call. The results can be converted to various formats, or used directly. @@ -1000,7 +995,7 @@ class QueryResult(object): For convenience, these methods are also available on the :class:`QueryResult` instance. The :func:`__iter__` and :func:`next` methods are also implemented (by mapping them to :attr:`response`). This - means that the common idiom ``for l in obj : do_something_with_line(l)`` would work, too. + means that the common idiom ``for l in obj : do_something_with_line(l)`` would work, too. Note: DEPRECATED :ivar response: the direct HTTP response; a file-like object, as return by the :func:`urllib2.urlopen` library call. :ivar requestedFormat: The requested format. The possible values are: :data:`JSON`, :data:`XML`, :data:`RDFXML`, @@ -1011,7 +1006,7 @@ class QueryResult(object): ConvertResult = Union[bytes, str, Dict[Any, Any], "Graph", Document, None] - def __init__(self, result: Union[HTTPResponse, Tuple[HTTPResponse, str]]) -> None: + def __init__(self, result: Union[requests.models.Response, Tuple[requests.models.Response, str]]) -> None: """ :param result: HTTP response stemming from a :func:`SPARQLWrapper.query` call, or a tuple with the expected format: (response, format). @@ -1028,7 +1023,7 @@ def geturl(self) -> str: :return: URL of the original call. :rtype: string """ - return self.response.geturl() + return self.response.url def info(self) -> KeyCaseInsensitiveDict[str]: """Return the meta-information of the HTTP result. @@ -1036,16 +1031,26 @@ def info(self) -> KeyCaseInsensitiveDict[str]: :return: meta-information of the HTTP result. :rtype: dict """ - return KeyCaseInsensitiveDict(dict(self.response.info())) + return KeyCaseInsensitiveDict(dict(self.response.headers)) + @deprecation.deprecated(deprecated_in="2.0.0", current_version=__version__, + details="requests does not need another __iter__ method") def __iter__(self) -> Iterator[bytes]: """Return an iterator object. This method is expected for the inclusion of the object in a standard ``for`` loop. + + **DEPRECATED:: In version 2.0.0. - It is possible to iterate through requests objects. Method not needed. """ return self.response.__iter__() + @deprecation.deprecated(deprecated_in="2.0.0", current_version=__version__, + details="requests does not need another __next__ method") def __next__(self) -> bytes: - """Method for the standard iterator.""" + """ + Method for the standard iterator. + + **DEPRECATED:: In version 2.0.0. - It is possible to iterate through requests objects. Method not needed. + """ return next(self.response) def _convertJSON(self) -> Dict[Any, Any]: @@ -1056,7 +1061,7 @@ def _convertJSON(self) -> Dict[Any, Any]: :return: converted result. :rtype: dict """ - json_str = json.loads(self.response.read().decode("utf-8")) + json_str = self.response.json() if isinstance(json_str, dict): return json_str else: @@ -1070,9 +1075,11 @@ def _convertXML(self) -> Document: :return: converted result. :rtype: :class:`xml.dom.minidom.Document` """ - doc = parse(self.response) - rdoc = cast(Document, doc) - return rdoc + xml_str = self.response.text + if isinstance(xml_str, str): + return xml.dom.minidom.parseString(self.response.text) + else: + raise TypeError(type(xml_str)) def _convertRDF(self) -> "Graph": """ @@ -1095,7 +1102,7 @@ def _convertN3(self) -> bytes: :return: converted result. :rtype: string """ - return self.response.read() + return self.response.content def _convertCSV(self) -> bytes: """ @@ -1105,7 +1112,7 @@ def _convertCSV(self) -> bytes: :return: converted result. :rtype: string """ - return self.response.read() + return self.response.content def _convertTSV(self) -> bytes: """ @@ -1115,7 +1122,7 @@ def _convertTSV(self) -> bytes: :return: converted result. :rtype: string """ - return self.response.read() + return self.response.content def _convertJSONLD(self) -> "Graph": """ @@ -1158,10 +1165,10 @@ def _content_type_in_list(real: str, expected: List[str]) -> bool: types of the expected list. :rtype: boolean """ - return True in [real.find(mime) != -1 for mime in expected] + return True in [mime in real for mime in expected] def _validate_format( - format_name: str, allowed: List[str], mime: str, requested: str + format_name: str, allowed: List[str], mime: str, requested: str ) -> None: """Internal method for validating if the requested format is one of the allowed formats. @@ -1213,11 +1220,10 @@ def _validate_format( return self._convertJSONLD() else: warnings.warn( - "unknown response content type '%s' returning raw response..." - % (ct), + f"unknown response content type '{ct}' returning raw response...", RuntimeWarning, ) - return self.response.read() + return self.response.content def _get_responseFormat(self) -> Optional[str]: """ @@ -1244,7 +1250,7 @@ def _content_type_in_list(real: str, expected: List[str]) -> bool: types of the expected list. :rtype: boolean """ - return True in [real.find(mime) != -1 for mime in expected] + return True in [mime in real for mime in expected] if "content-type" in self.info(): ct = self.info()["content-type"] # returned Content-Type value @@ -1269,8 +1275,7 @@ def _content_type_in_list(real: str, expected: List[str]) -> bool: return JSONLD else: warnings.warn( - "Unknown response content type. Returning raw content-type ('%s')." - % (ct), + f"Unknown response content type. Returning raw content-type ('{ct}').", RuntimeWarning, ) return ct @@ -1285,10 +1290,10 @@ def print_results(self, minWidth: Optional[int] = None) -> None: """ # Check if the requested format was JSON. If not, exit. - responseFormat = self._get_responseFormat() - if responseFormat != JSON: + response_format = self._get_responseFormat() + if response_format != JSON: message = "Format return was %s, but JSON was expected. No printing." - warnings.warn(message % (responseFormat), RuntimeWarning) + warnings.warn(message % response_format, RuntimeWarning) return None results = self._convertJSON() @@ -1296,18 +1301,12 @@ def print_results(self, minWidth: Optional[int] = None) -> None: width = self.__get_results_width(results, minWidth) else: width = self.__get_results_width(results) - index = 0 - for var in results["head"]["vars"]: - print( - ("?" + var).ljust(width[index]), - "|", - ) - index += 1 + for index, var in enumerate(results["head"]["vars"]): + print(f"?{var}".ljust(width[index]), "|") print() print("=" * (sum(width) + 3 * len(width))) for result in results["results"]["bindings"]: - index = 0 - for var in results["head"]["vars"]: + for index, var in enumerate(results["head"]["vars"]): result_value = self.__get_prettyprint_string_sparql_var_result( result[var] ) @@ -1315,23 +1314,20 @@ def print_results(self, minWidth: Optional[int] = None) -> None: result_value.ljust(width[index]), "|", ) - index += 1 print() def __get_results_width( - self, results: Dict[Any, Any], minWidth: int = 2 + self, results: Dict[Any, Any], min_width: int = 2 ) -> List[int]: - width: List[int] = [] - for var in results["head"]["vars"]: - width.append(max(minWidth, len(var) + 1)) + width: List[int] = [ + max(min_width, len(var) + 1) for var in results["head"]["vars"] + ] for result in results["results"]["bindings"]: - index = 0 - for var in results["head"]["vars"]: + for index, var in enumerate(results["head"]["vars"]): result_value = self.__get_prettyprint_string_sparql_var_result( result[var] ) width[index] = max(width[index], len(result_value)) - index += 1 return width def __get_prettyprint_string_sparql_var_result(self, result: Dict[str, str]) -> str: @@ -1339,9 +1335,9 @@ def __get_prettyprint_string_sparql_var_result(self, result: Dict[str, str]) -> lang = result.get("xml:lang", None) datatype = result.get("datatype", None) if lang is not None: - value += "@" + lang + value += f"@{lang}" if datatype is not None: - value += " [" + datatype + "]" + value += f" [{datatype}]" return value def __str__(self) -> str: @@ -1349,21 +1345,21 @@ def __str__(self) -> str: :return: A human-readable string of the object. :rtype: string - .. versionadded:: 1.8.3 + .. version added:: 1.8.3 """ - fullname = self.__module__ + "." + self.__class__.__name__ - str_requestedFormat = '"requestedFormat" : ' + repr(self.requestedFormat) - str_url = self.response.geturl() - str_code = self.response.getcode() - str_headers = self.response.info() + fullname = f"{self.__module__}.{self.__class__.__name__}" + str_requested_format = f'"requestedFormat" : {repr(self.requestedFormat)}' + str_url = self.response.url + str_code = self.response.status_code + str_headers = self.response.headers str_response = ( - '"response (a file-like object, as return by the urllib2.urlopen library call)" : {\n\t"url" : "' - '%s",\n\t"code" : "%s",\n\t"headers" : %s}' - % (str_url, str_code, str_headers) + '"response (a file-like object, as return by the urllib2.urlopen library call)" : {\n\t"url" : "' + '%s",\n\t"code" : "%s",\n\t"headers" : %s}' + % (str_url, str_code, str_headers) ) return "<%s object at 0x%016X>\n{%s,\n%s}" % ( fullname, id(self), - str_requestedFormat, + str_requested_format, str_response, ) diff --git a/SPARQLWrapper/__init__.py b/SPARQLWrapper/__init__.py index 885978b..745e176 100644 --- a/SPARQLWrapper/__init__.py +++ b/SPARQLWrapper/__init__.py @@ -3,8 +3,8 @@ """ **SPARQLWrapper** is a simple Python wrapper around a `SPARQL `_ service to -remotelly execute your queries. It helps in creating the query -invokation and, possibly, convert the result into a more manageable +remotely execute your queries. It helps in creating the query +invocation and, possibly, convert the result into a more manageable format. """ diff --git a/SPARQLWrapper/sparql_dataframe.py b/SPARQLWrapper/sparql_dataframe.py index e8707f5..86f3822 100644 --- a/SPARQLWrapper/sparql_dataframe.py +++ b/SPARQLWrapper/sparql_dataframe.py @@ -16,7 +16,7 @@ class QueryException(Exception): def get_sparql_dataframe_orig( - endpoint: str, query: Union[str, bytes] + endpoint: str, query: Union[str, bytes] ) -> "pd.DataFrame": """copy paste from: https://github.com/lawlesst/sparql-dataframe""" # pandas inside to avoid requiring it @@ -28,15 +28,17 @@ def get_sparql_dataframe_orig( raise QueryException("Only SPARQL SELECT queries are supported.") sparql.setReturnFormat(CSV) results = sparql.query().convert() - if isinstance(results, bytes): - _csv = io.StringIO(results.decode("utf-8")) - return pd.read_csv(_csv, sep=",") - else: + + if not isinstance(results, bytes): raise TypeError(type(results)) + _csv = io.StringIO(results.decode("utf-8")) + + return pd.read_csv(_csv, sep=",") + def get_sparql_typed_dict( - endpoint: str, query: Union[str, bytes] + endpoint: str, query: Union[str, bytes] ) -> List[Dict[str, Value]]: """modified from: https://github.com/lawlesst/sparql-dataframe""" # pandas inside to avoid requiring it @@ -70,5 +72,4 @@ def get_sparql_dataframe(endpoint: str, query: Union[str, bytes]) -> "pd.DataFra d = get_sparql_typed_dict(endpoint, query) # TODO: will nan fill somehow, make more strict if there is way of getting the nan types from rdflib - df = pd.DataFrame(d) - return df + return pd.DataFrame(d)