diff --git a/rdflib/graph.py b/rdflib/graph.py index 1e2c71b36..d9f76c12e 100644 --- a/rdflib/graph.py +++ b/rdflib/graph.py @@ -285,14 +285,13 @@ from rdflib.store import Store from rdflib.term import ( BNode, - Genid, IdentifiedNode, Identifier, Literal, Node, - RDFLibGenid, URIRef, Variable, + _Deskolemizer, ) if TYPE_CHECKING: @@ -475,6 +474,7 @@ def __init__( self.context_aware = False self.formula_aware = False self.default_union = False + self._deskolemizer = _Deskolemizer() def __getnewargs__(self) -> tuple[Any, ...]: return ( @@ -1859,38 +1859,38 @@ def do_skolemize2(t: _TripleType) -> _TripleType: def de_skolemize( self, new_graph: Graph | None = None, uriref: URIRef | None = None ) -> Graph: + """ + Return a new graph with skolem IRIs replaced with their blank node mappings. + + :param new_graph: An optional target graph where the de-skolemization results + will be stored. If not provided, a new graph is created. + :param uriref: The skolem IRI to be de-skolemized. If not provided, + de-skolemization is applied to all skolem IRIs in the graph. + """ + def do_de_skolemize(uriref: URIRef, t: _TripleType) -> _TripleType: (s, p, o) = t if s == uriref: if TYPE_CHECKING: assert isinstance(s, URIRef) - s = s.de_skolemize() + s = self._deskolemizer(s) if o == uriref: if TYPE_CHECKING: assert isinstance(o, URIRef) - o = o.de_skolemize() + o = self._deskolemizer(o) return s, p, o def do_de_skolemize2(t: _TripleType) -> _TripleType: (s, p, o) = t - - if RDFLibGenid._is_rdflib_skolem(s): - s = RDFLibGenid(s).de_skolemize() - elif Genid._is_external_skolem(s): - s = Genid(s).de_skolemize() - - if RDFLibGenid._is_rdflib_skolem(o): - o = RDFLibGenid(o).de_skolemize() - elif Genid._is_external_skolem(o): - o = Genid(o).de_skolemize() - - return s, p, o + if TYPE_CHECKING: + assert isinstance(s, URIRef) and isinstance(o, URIRef) + return self._deskolemizer(s), p, self._deskolemizer(o) retval = Graph() if new_graph is None else new_graph if uriref is None: self._process_skolem_tuples(retval, do_de_skolemize2) - elif isinstance(uriref, Genid): + else: # type error: Argument 1 to "do_de_skolemize" has incompatible type "Optional[URIRef]"; expected "URIRef" self._process_skolem_tuples(retval, lambda t: do_de_skolemize(uriref, t)) # type: ignore[arg-type, unused-ignore] @@ -2539,11 +2539,11 @@ def graph( base: str | None = None, ) -> Graph: if identifier is None: - from rdflib.term import _SKOLEM_DEFAULT_AUTHORITY, rdflib_skolem_genid + from rdflib.term import _RDFLIB_GENID_PATH, _SKOLEM_DEFAULT_AUTHORITY self.bind( "genid", - _SKOLEM_DEFAULT_AUTHORITY + rdflib_skolem_genid, + _SKOLEM_DEFAULT_AUTHORITY + _RDFLIB_GENID_PATH, override=False, ) identifier = BNode().skolemize() diff --git a/rdflib/term.py b/rdflib/term.py index ce0127afd..3d61e27cb 100644 --- a/rdflib/term.py +++ b/rdflib/term.py @@ -55,7 +55,7 @@ TypeVar, overload, ) -from urllib.parse import urldefrag, urljoin, urlparse +from urllib.parse import urldefrag, urljoin, urlparse, urlsplit from uuid import uuid4 import rdflib @@ -93,9 +93,9 @@ _SKOLEM_DEFAULT_AUTHORITY = "https://rdflib.github.io" logger = logging.getLogger(__name__) -skolem_genid = "/.well-known/genid/" -rdflib_skolem_genid = "/.well-known/genid/rdflib/" -skolems: dict[str, BNode] = {} +_WELL_KNOWN_GENID = "/.well-known/genid/" +_RDFLIB_GENID_SUFFIX = "rdflib/" +_RDFLIB_GENID_PATH = "/.well-known/genid/" + _RDFLIB_GENID_SUFFIX _invalid_uri_chars = '<>" {}|\\^`' @@ -383,61 +383,30 @@ def __radd__(self, other) -> URIRef: def __mod__(self, other) -> URIRef: return self.__class__(str(self) % other) - def de_skolemize(self) -> BNode: - """Create a Blank Node from a skolem URI, in accordance - with http://www.w3.org/TR/rdf11-concepts/#section-skolemization. - This function accepts only rdflib type skolemization, to provide - a round-tripping within the system. - .. versionadded:: 4.0 - """ - if isinstance(self, RDFLibGenid): - parsed_uri = urlparse(f"{self}") - return BNode(value=parsed_uri.path[len(rdflib_skolem_genid) :]) - elif isinstance(self, Genid): - bnode_id = f"{self}" - if bnode_id in skolems: - return skolems[bnode_id] - else: - retval = BNode() - skolems[bnode_id] = retval - return retval - else: - raise Exception(f"<{self}> is not a skolem URI") +class _Deskolemizer: + __slots__ = "_skolems" + def __init__(self) -> None: + self._skolems: dict[str, BNode] = {} -class Genid(URIRef): - __slots__ = () - - @staticmethod - def _is_external_skolem(uri: Any) -> bool: - if not isinstance(uri, str): - uri = str(uri) - parsed_uri = urlparse(uri) - gen_id = parsed_uri.path.rfind(skolem_genid) - if gen_id != 0: - return False - return True - - -class RDFLibGenid(Genid): - __slots__ = () + def __call__(self, uri: URIRef) -> URIRef | BNode: + parsed_uri = urlsplit(uri) + if parsed_uri.query != "" or parsed_uri.fragment != "": + # Behaviour is undefined for skolem URIs with query or fragment, so just return the URI + return uri + if parsed_uri.path.startswith(_WELL_KNOWN_GENID): + genid_suffix = parsed_uri.path[len(_WELL_KNOWN_GENID) :] + if genid_suffix.startswith(_RDFLIB_GENID_SUFFIX): + return BNode(value=parsed_uri.path[len(_RDFLIB_GENID_PATH) :]) + else: + if uri in self._skolems: + return self._skolems[uri] - @staticmethod - def _is_rdflib_skolem(uri: Any) -> bool: - if not isinstance(uri, str): - uri = str(uri) - parsed_uri = urlparse(uri) - if ( - parsed_uri.params != "" - or parsed_uri.query != "" - or parsed_uri.fragment != "" - ): - return False - gen_id = parsed_uri.path.rfind(rdflib_skolem_genid) - if gen_id != 0: - return False - return True + retval = BNode() + self._skolems[uri] = retval + return retval + return uri def _unique_id() -> str: @@ -536,8 +505,8 @@ def skolemize( if authority is None: authority = _SKOLEM_DEFAULT_AUTHORITY if basepath is None: - basepath = rdflib_skolem_genid - skolem = basepath + str(self) + basepath = _RDFLIB_GENID_PATH + skolem = "%s%s" % (basepath, str(self)) return URIRef(urljoin(authority, skolem)) diff --git a/test/test_issues/test_issue1808.py b/test/test_issues/test_issue1808.py index 04f0927a4..c42ae1c66 100644 --- a/test/test_issues/test_issue1808.py +++ b/test/test_issues/test_issue1808.py @@ -1,5 +1,5 @@ from rdflib import Graph -from rdflib.term import BNode, URIRef, rdflib_skolem_genid +from rdflib.term import _RDFLIB_GENID_PATH, BNode, URIRef def test(): @@ -15,7 +15,7 @@ def test(): gs = g.skolemize() for s, p, o in gs: - assert isinstance(s, URIRef) and s.__contains__(rdflib_skolem_genid) + assert isinstance(s, URIRef) and _RDFLIB_GENID_PATH in s query_with_iri = "select ?p ?o {{ <{}> ?p ?o }}".format(s) query_for_all = "select ?s ?p ?o { ?s ?p ?o }" diff --git a/test/test_skolem_genid.py b/test/test_skolem_genid.py index ee88f5a80..f7253f734 100644 --- a/test/test_skolem_genid.py +++ b/test/test_skolem_genid.py @@ -1,8 +1,8 @@ -from rdflib import URIRef -from rdflib.term import Genid, RDFLibGenid +from rdflib import RDF, SDO, BNode, Graph, URIRef +from rdflib.term import _Deskolemizer -def test_skolem_genid_and_rdflibgenid(): +def test_skolem_genid_and_rdflib_genid(): rdflib_genid = URIRef( "https://rdflib.github.io/.well-known/genid/rdflib/N97c39b957bc444949a82793519348dc2" ) @@ -10,8 +10,37 @@ def test_skolem_genid_and_rdflibgenid(): "http://example.com/.well-known/genid/example/Ne864c0e3684044f381d518fdac652f2e" ) - assert RDFLibGenid._is_rdflib_skolem(rdflib_genid) is True - assert Genid._is_external_skolem(rdflib_genid) is True + _deskolemizer = _Deskolemizer() + rdflib_bnode = _deskolemizer(rdflib_genid) + assert isinstance(rdflib_bnode, BNode) + assert rdflib_bnode.n3() == "_:N97c39b957bc444949a82793519348dc2" - assert RDFLibGenid._is_rdflib_skolem(custom_genid) is False - assert Genid._is_external_skolem(custom_genid) is True + custom_bnode = _deskolemizer(custom_genid) + assert isinstance(custom_bnode, BNode) + assert custom_bnode.n3().startswith("_:") + + +def test_graph_de_skolemize(): + graph = Graph() + + rdflib_genid = URIRef( + "https://rdflib.github.io/.well-known/genid/rdflib/N97c39b957bc444949a82793519348dc2" + ) + custom_genid = URIRef( + "http://example.com/.well-known/genid/example/Ne864c0e3684044f381d518fdac652f2e" + ) + + rdflib_statement = (rdflib_genid, RDF.type, SDO.Thing) + custom_statement = (custom_genid, RDF.type, SDO.Person) + + graph.add(rdflib_statement) + graph.add(custom_statement) + graph = graph.de_skolemize(uriref=rdflib_genid) + + assert rdflib_statement not in graph + assert (BNode("N97c39b957bc444949a82793519348dc2"), RDF.type, SDO.Thing) in graph + assert custom_statement in graph + + graph = graph.de_skolemize(uriref=custom_genid) + assert custom_statement not in graph + assert isinstance(graph.value(predicate=RDF.type, object=SDO.Person), BNode)