Skip to content

Commit 575c331

Browse files
committed
Add the URL subtraction feature
1 parent ab22ffa commit 575c331

File tree

6 files changed

+187
-1
lines changed

6 files changed

+187
-1
lines changed

CHANGES/1538.feature.rst

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
Added a new method :py:meth:`~yarl.URL.relative_to`
2+
to get the relative path between URLs.
3+
4+
Note that both URLs must have the same scheme, user, password, host and port:
5+
6+
.. code-block:: pycon
7+
8+
>>> target = URL("http://example.com/path/to")
9+
>>> base = URL("http://example.com/")
10+
>>> target.relative_to(base)
11+
URL('path/to')
12+
>>> base.relative_to(target)
13+
URL('../..')
14+
15+
-- by :user:`babieiev`.

docs/api.rst

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1015,6 +1015,22 @@ The path is encoded if needed.
10151015
>>> base.join(URL('//python.org/page.html'))
10161016
URL('http://python.org/page.html')
10171017

1018+
.. method:: URL.relative_to(url)
1019+
1020+
Return a new URL with a relative *path* between two other URL objects.
1021+
*scheme*, *user*, *password*, *host*, *port*, *query* and *fragment* are removed.
1022+
1023+
.. doctest::
1024+
1025+
>>> target = URL('http://example.com/path/to')
1026+
>>> base = URL('http://example.com/')
1027+
>>> target.relative_to(base)
1028+
URL('path/to')
1029+
>>> base.relative_to(target)
1030+
URL('../..')
1031+
1032+
.. versionadded:: 1.21
1033+
10181034
Human readable representation
10191035
-----------------------------
10201036

tests/test_url.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import pytest
55

66
from yarl import URL
7+
from yarl._path import normalize_path
78

89
_WHATWG_C0_CONTROL_OR_SPACE = (
910
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10"
@@ -994,6 +995,91 @@ def test_div_with_dots() -> None:
994995
assert url.raw_path == "/path/to"
995996

996997

998+
# relative_to
999+
1000+
1001+
@pytest.mark.parametrize(
1002+
("target", "base", "expected"),
1003+
[
1004+
("http://example.com/path/to", "http://example.com/", "path/to"),
1005+
("http://example.com/path/to", "http://example.com/spam", "../path/to"),
1006+
("http://example.com/path/to", "http://example.com/spam/", "../path/to"),
1007+
("http://example.com/this/is/a/test", "http://example.com/this/", "is/a/test"),
1008+
(
1009+
"http://example.com/this/./is/a/test",
1010+
"http://example.com/this/",
1011+
"is/a/test",
1012+
),
1013+
(
1014+
"http://example.com/////path/////to",
1015+
"http://example.com/////spam",
1016+
"../path/////to",
1017+
),
1018+
(
1019+
"http://example.com////path/////to",
1020+
"http://example.com/////spam",
1021+
"../../path/////to",
1022+
),
1023+
(
1024+
"http://example.com/this/is/../a//test",
1025+
"http://example.com/this/",
1026+
"a//test",
1027+
),
1028+
("http://example.com/", "http://example.com/", "."),
1029+
("http://example.com", "http://example.com", "."),
1030+
("http://example.com/", "http://example.com", "."),
1031+
("http://example.com", "http://example.com/", "."),
1032+
("//example.com", "//example.com", "."),
1033+
("/path/to", "/spam/", "../path/to"),
1034+
("path/to", "spam/", "../path/to"),
1035+
(
1036+
"http://example.com/path/to//",
1037+
"http://example.com/path/to",
1038+
".//",
1039+
),
1040+
(
1041+
"http://example.com/path/to//",
1042+
"http://example.com/path/to/",
1043+
".//",
1044+
),
1045+
("path/../to", "path/", "../to"),
1046+
("path/..", ".", "../path/.."),
1047+
("path/../replace/me", "path/../replace", "me"),
1048+
("path/../replace/me", "path/../replace/", "me"),
1049+
("path/to", "spam", "../path/to"),
1050+
("..", ".", "../.."),
1051+
(".", "..", "../."),
1052+
],
1053+
)
1054+
def test_relative_to(target: str, base: str, expected: str) -> None:
1055+
# test the input data
1056+
target_url = URL(target)
1057+
base_url = URL(base)
1058+
assert normalize_path(target_url.path) == normalize_path((base_url / expected).path)
1059+
# test the function itself
1060+
expected_url = URL(expected)
1061+
relative_url = target_url.relative_to(base_url)
1062+
assert relative_url == expected_url
1063+
1064+
1065+
def test_relative_to_a_non_url() -> None:
1066+
expected_error_msg = r"^other should be URL$"
1067+
with pytest.raises(TypeError, match=expected_error_msg):
1068+
URL("https://example.com/path/to").relative_to("http://example.com/")
1069+
1070+
1071+
def test_relative_to_with_different_schemes() -> None:
1072+
expected_error_msg = r"^Both URLs should have the same scheme$"
1073+
with pytest.raises(ValueError, match=expected_error_msg):
1074+
URL("http://example.com/").relative_to(URL("https://example.com/"))
1075+
1076+
1077+
def test_relative_to_with_different_netlocs() -> None:
1078+
expected_error_msg = r"^Both URLs should have the same netloc$"
1079+
with pytest.raises(ValueError, match=expected_error_msg):
1080+
URL("https://spam.com/").relative_to(URL("https://ham.com/"))
1081+
1082+
9971083
# joinpath
9981084

9991085

tests/test_url_benchmarks.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@
3434
QUERY_URL = URL(QUERY_URL_STR)
3535
URL_WITH_PATH_STR = "http://www.domain.tld/req"
3636
URL_WITH_PATH = URL(URL_WITH_PATH_STR)
37+
URL_WITH_LONGER_PATH = URL("http://www.domain.tld/req/req/req")
38+
URL_WITH_LONG_PATH = URL("http://www.domain.tld/" + "req/" * 30)
39+
URL_WITH_VERY_LONG_PATH = URL("http://www.domain.tld/" + "req/" * 100)
3740
REL_URL = URL("/req")
3841
QUERY_SEQ = {str(i): tuple(str(j) for j in range(10)) for i in range(10)}
3942
SIMPLE_QUERY = {str(i): str(i) for i in range(10)}
@@ -574,6 +577,20 @@ def _run() -> None:
574577
URL_WITH_PATH.parent
575578

576579

580+
def test_relative_to(benchmark: BenchmarkFixture) -> None:
581+
@benchmark
582+
def _run() -> None:
583+
for _ in range(100):
584+
URL_WITH_LONGER_PATH.relative_to(URL_WITH_PATH)
585+
586+
587+
def test_relative_to_long_urls(benchmark: BenchmarkFixture) -> None:
588+
@benchmark
589+
def _run() -> None:
590+
for _ in range(100):
591+
URL_WITH_VERY_LONG_PATH.relative_to(URL_WITH_LONG_PATH)
592+
593+
577594
def test_url_join(benchmark: "BenchmarkFixture") -> None:
578595
@benchmark
579596
def _run() -> None:

yarl/_path.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,27 @@ def normalize_path(path: str) -> str:
3939

4040
segments = path.split("/")
4141
return prefix + "/".join(normalize_path_segments(segments))
42+
43+
44+
def calculate_relative_path(target: str, base: str) -> str:
45+
"""Calculate the relative path between two other paths"""
46+
47+
base_segments = base.rstrip("/").split("/")
48+
target_segments = target.rstrip("/").split("/")
49+
50+
offset = 0
51+
for base_seg, target_seg in zip(base_segments, target_segments):
52+
if base_seg != target_seg:
53+
break
54+
offset += 1
55+
56+
remaining_base_segments = base_segments[offset:]
57+
remaining_target_segments = target_segments[offset:]
58+
59+
relative_segments = [".."] * len(remaining_base_segments)
60+
relative_segments.extend(remaining_target_segments)
61+
62+
relative = "/".join(relative_segments) or "."
63+
trailing_slashes = target[len(target.rstrip("/")) :]
64+
65+
return relative + trailing_slashes if len(trailing_slashes) > 1 else relative

yarl/_url.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
split_url,
2222
unsplit_result,
2323
)
24-
from ._path import normalize_path, normalize_path_segments
24+
from ._path import calculate_relative_path, normalize_path, normalize_path_segments
2525
from ._query import (
2626
Query,
2727
QueryVariable,
@@ -1387,6 +1387,34 @@ def with_suffix(
13871387
fragment = self._fragment if keep_fragment else ""
13881388
return from_parts(self._scheme, netloc, "/".join(parts), query, fragment)
13891389

1390+
def relative_to(self, other: object) -> "URL":
1391+
"""Return a new URL with a relative path between two other URL objects.
1392+
1393+
Note that both URLs must have the same scheme and netloc.
1394+
1395+
Example:
1396+
>>> target = URL("http://example.com/path/to")
1397+
>>> base = URL("http://example.com/")
1398+
>>> target.relative_to(base)
1399+
URL('path/to')
1400+
>>> base.relative_to(target)
1401+
URL('../..')
1402+
"""
1403+
1404+
if type(other) is not URL:
1405+
raise TypeError("other should be URL")
1406+
1407+
target_scheme, target_netloc, target_path, _, _ = self._val
1408+
base_scheme, base_netloc, base_path, _, _ = other._val
1409+
1410+
if target_scheme != base_scheme:
1411+
raise ValueError("Both URLs should have the same scheme")
1412+
if target_netloc != base_netloc:
1413+
raise ValueError("Both URLs should have the same netloc")
1414+
1415+
path = calculate_relative_path(target_path, base_path)
1416+
return from_parts("", "", path, "", "")
1417+
13901418
def join(self, url: "URL") -> "URL":
13911419
"""Join URLs
13921420

0 commit comments

Comments
 (0)