Skip to content

Commit 5baa8d5

Browse files
authored
jsonld - Improve handling of URNs in norm_url (#2892)
* jsonld - Improve handling of URNs in norm_url * Fix import package * Fix formatting with black
1 parent 1618b3f commit 5baa8d5

File tree

2 files changed

+43
-7
lines changed

2 files changed

+43
-7
lines changed

rdflib/plugins/shared/jsonld/util.py

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -223,13 +223,28 @@ def norm_url(base: str, url: str) -> str:
223223
"""
224224
if "://" in url:
225225
return url
226-
parts = urlsplit(urljoin(base, url))
227-
path = normpath(parts[2])
228-
if sep != "/":
229-
path = "/".join(path.split(sep))
230-
if parts[2].endswith("/") and not path.endswith("/"):
231-
path += "/"
232-
result = urlunsplit(parts[0:2] + (path,) + parts[3:])
226+
227+
# Fix for URNs
228+
parsed_base = urlsplit(base)
229+
parsed_url = urlsplit(url)
230+
if parsed_url.scheme:
231+
# Assume full URL
232+
return url
233+
if parsed_base.scheme in ("urn", "urn-x"):
234+
# No scheme -> assume relative and join paths
235+
base_path_parts = parsed_base.path.split("/", 1)
236+
base_path = "/" + (base_path_parts[1] if len(base_path_parts) > 1 else "")
237+
joined_path = urljoin(base_path, parsed_url.path)
238+
fragment = f"#{parsed_url.fragment}" if parsed_url.fragment else ""
239+
result = f"{parsed_base.scheme}:{base_path_parts[0]}{joined_path}{fragment}"
240+
else:
241+
parts = urlsplit(urljoin(base, url))
242+
path = normpath(parts[2])
243+
if sep != "/":
244+
path = "/".join(path.split(sep))
245+
if parts[2].endswith("/") and not path.endswith("/"):
246+
path += "/"
247+
result = urlunsplit(parts[0:2] + (path,) + parts[3:])
233248
if url.endswith("#") and not result.endswith("#"):
234249
result += "#"
235250
return result

test/jsonld/test_norm_urn.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from rdflib.plugins.shared.jsonld.util import norm_url
2+
3+
4+
def test_norm_urn():
5+
assert norm_url("urn:ns:test", "/one") == "urn:ns:test/one"
6+
assert norm_url("urn:ns:test/path/", "two") == "urn:ns:test/path/two"
7+
assert norm_url("urn:ns:test/path", "two") == "urn:ns:test/two"
8+
assert norm_url("urn:ns:test", "three") == "urn:ns:test/three"
9+
assert norm_url("urn:ns:test/path#", "four") == "urn:ns:test/four"
10+
assert norm_url("urn:ns:test/path1/path2/", "../path3") == "urn:ns:test/path1/path3"
11+
assert norm_url("urn:ns:test/path1/path2/", "/path3") == "urn:ns:test/path3"
12+
assert (
13+
norm_url("urn:ns:test/path1/path2/", "http://example.com")
14+
== "http://example.com"
15+
)
16+
assert (
17+
norm_url("urn:ns:test/path1/path2/", "urn:another:test/path")
18+
== "urn:another:test/path"
19+
)
20+
assert norm_url("urn:ns:test/path", "#four") == "urn:ns:test/path#four"
21+
assert norm_url("urn:ns:test/path/", "#four") == "urn:ns:test/path/#four"

0 commit comments

Comments
 (0)