jsonld - Improve handling of URNs in norm_url (#2892)

avillar · web-flow · commit 5baa8d5146bf · 2024-08-26T16:22:47.000+10:00
* jsonld - Improve handling of URNs in norm_url

* Fix import package

* Fix formatting with black
diff --git a/rdflib/plugins/shared/jsonld/util.py b/rdflib/plugins/shared/jsonld/util.py
@@ -223,13 +223,28 @@ def norm_url(base: str, url: str) -> str:
     """
     if "://" in url:
         return url
-    parts = urlsplit(urljoin(base, url))
-    path = normpath(parts[2])
-    if sep != "/":
-        path = "/".join(path.split(sep))
-    if parts[2].endswith("/") and not path.endswith("/"):
-        path += "/"
-    result = urlunsplit(parts[0:2] + (path,) + parts[3:])
+
+    # Fix for URNs
+    parsed_base = urlsplit(base)
+    parsed_url = urlsplit(url)
+    if parsed_url.scheme:
+        # Assume full URL
+        return url
+    if parsed_base.scheme in ("urn", "urn-x"):
+        # No scheme -> assume relative and join paths
+        base_path_parts = parsed_base.path.split("/", 1)
+        base_path = "/" + (base_path_parts[1] if len(base_path_parts) > 1 else "")
+        joined_path = urljoin(base_path, parsed_url.path)
+        fragment = f"#{parsed_url.fragment}" if parsed_url.fragment else ""
+        result = f"{parsed_base.scheme}:{base_path_parts[0]}{joined_path}{fragment}"
+    else:
+        parts = urlsplit(urljoin(base, url))
+        path = normpath(parts[2])
+        if sep != "/":
+            path = "/".join(path.split(sep))
+        if parts[2].endswith("/") and not path.endswith("/"):
+            path += "/"
+        result = urlunsplit(parts[0:2] + (path,) + parts[3:])
     if url.endswith("#") and not result.endswith("#"):
         result += "#"
     return result
diff --git a/test/jsonld/test_norm_urn.py b/test/jsonld/test_norm_urn.py
@@ -0,0 +1,21 @@
+from rdflib.plugins.shared.jsonld.util import norm_url
+
+
+def test_norm_urn():
+    assert norm_url("urn:ns:test", "/one") == "urn:ns:test/one"
+    assert norm_url("urn:ns:test/path/", "two") == "urn:ns:test/path/two"
+    assert norm_url("urn:ns:test/path", "two") == "urn:ns:test/two"
+    assert norm_url("urn:ns:test", "three") == "urn:ns:test/three"
+    assert norm_url("urn:ns:test/path#", "four") == "urn:ns:test/four"
+    assert norm_url("urn:ns:test/path1/path2/", "../path3") == "urn:ns:test/path1/path3"
+    assert norm_url("urn:ns:test/path1/path2/", "/path3") == "urn:ns:test/path3"
+    assert (
+        norm_url("urn:ns:test/path1/path2/", "http://example.com")
+        == "http://example.com"
+    )
+    assert (
+        norm_url("urn:ns:test/path1/path2/", "urn:another:test/path")
+        == "urn:another:test/path"
+    )
+    assert norm_url("urn:ns:test/path", "#four") == "urn:ns:test/path#four"
+    assert norm_url("urn:ns:test/path/", "#four") == "urn:ns:test/path/#four"