-
Notifications
You must be signed in to change notification settings - Fork 299
feat: add join
method to Url
class
#1378
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 3 commits
a7d9351
7ef57ba
e8bd322
8b70975
6a4fa06
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -155,6 +155,14 @@ impl PyUrl { | |||||||||||||||||||||
(self.__str__(),) | ||||||||||||||||||||||
} | ||||||||||||||||||||||
|
||||||||||||||||||||||
fn __truediv__(&self, other: &str) -> PyResult<Self> { | ||||||||||||||||||||||
self.join(other, true) | ||||||||||||||||||||||
} | ||||||||||||||||||||||
|
||||||||||||||||||||||
fn __floordiv__(&self, other: &str) -> PyResult<Self> { | ||||||||||||||||||||||
self.join(other, false) | ||||||||||||||||||||||
} | ||||||||||||||||||||||
Comment on lines
+158
to
+164
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, sorry I missed these in the last round of review. I think the difference between the I think better we just have
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay a = Url("http://a")
print(a / "b" / "c" / "d")
# http://a/b/c/d/ a = Url("file:///home/user/")
print(a / "music" / "pop")
# file:///home/user/music/pop/ With print(a / "dir" / "dir" / "dir" // "file.txt") # file:///home/user/dir/dir/dir/file.txt There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, I see. Yikes, there are so many subleties here! It seems to me that our >>> urllib.parse.urljoin("https://foo.com/a", "b")
'https://foo.com/b' versus pathlib's >>> pathlib.Path("/foo/a").joinpath("b")
PosixPath('/foo/a/b') Given these are inconsistent, I think we should perhaps back away from trying to have pathlib-like semantics at all. Would you be open to the idea of dropping the operators from the PR completely, so we can get There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Alternatively we could also have And then could have There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Great question. I think I'd prefer we just had There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think without comment from anyone else, let's just do There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it would be great to have more time to discuss the semantics (does it need to match There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Really sorry for the late response. The main URL joining part is handled by rust-url's join method which implements the WHATWG URL spec. So, the
I think the IMO, we can have |
||||||||||||||||||||||
|
||||||||||||||||||||||
#[classmethod] | ||||||||||||||||||||||
#[pyo3(signature=(*, scheme, host, username=None, password=None, port=None, path=None, query=None, fragment=None))] | ||||||||||||||||||||||
#[allow(clippy::too_many_arguments)] | ||||||||||||||||||||||
|
@@ -190,6 +198,26 @@ impl PyUrl { | |||||||||||||||||||||
} | ||||||||||||||||||||||
cls.call1((url,)) | ||||||||||||||||||||||
} | ||||||||||||||||||||||
|
||||||||||||||||||||||
#[pyo3(signature=(path, trailing_slash=true))] | ||||||||||||||||||||||
pub fn join(&self, path: &str, trailing_slash: bool) -> PyResult<Self> { | ||||||||||||||||||||||
let mut new_url = self | ||||||||||||||||||||||
.lib_url | ||||||||||||||||||||||
.join(path) | ||||||||||||||||||||||
.map_err(|err| PyValueError::new_err(err.to_string()))?; | ||||||||||||||||||||||
|
||||||||||||||||||||||
if !trailing_slash || new_url.query().is_some() || new_url.fragment().is_some() || new_url.cannot_be_a_base() { | ||||||||||||||||||||||
return Ok(PyUrl::new(new_url)); | ||||||||||||||||||||||
} | ||||||||||||||||||||||
|
||||||||||||||||||||||
new_url | ||||||||||||||||||||||
.path_segments_mut() | ||||||||||||||||||||||
.map_err(|()| PyValueError::new_err("Url cannot be a base"))? | ||||||||||||||||||||||
.pop_if_empty() | ||||||||||||||||||||||
.push(""); | ||||||||||||||||||||||
davidhewitt marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||
|
||||||||||||||||||||||
Ok(PyUrl::new(new_url)) | ||||||||||||||||||||||
} | ||||||||||||||||||||||
} | ||||||||||||||||||||||
|
||||||||||||||||||||||
#[pyclass(name = "MultiHostUrl", module = "pydantic_core._pydantic_core", subclass, frozen)] | ||||||||||||||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,10 @@ | |
|
||
from ..conftest import Err, PyAndJson | ||
|
||
SIMPLE_BASE = 'http://a/b/c/d' | ||
QUERY_BASE = 'http://a/b/c/d;p?q' | ||
QUERY_FRAGMENT_BASE = 'http://a/b/c/d;p?q#f' | ||
|
||
|
||
def test_url_ok(py_and_json: PyAndJson): | ||
v = py_and_json(core_schema.url_schema()) | ||
|
@@ -1299,3 +1303,150 @@ def test_url_build() -> None: | |
) | ||
assert url == Url('postgresql://testuser:[email protected]:5432/database?sslmode=require#test') | ||
assert str(url) == 'postgresql://testuser:[email protected]:5432/database?sslmode=require#test' | ||
|
||
|
||
@pytest.mark.parametrize( | ||
'base_url,join_path,expected_with_slash,expected_without_slash', | ||
[ | ||
# Tests are based on the URL specification from https://url.spec.whatwg.org/ | ||
# Joining empty path with or without trailing slash should not affect the base url. | ||
('http://example.com/', '', 'http://example.com/', 'http://example.com/'), | ||
('svn://pathtorepo/dir1', 'dir2', 'svn://pathtorepo/dir2/', 'svn://pathtorepo/dir2'), | ||
('svn+ssh://pathtorepo/dir1', 'dir2', 'svn+ssh://pathtorepo/dir2/', 'svn+ssh://pathtorepo/dir2'), | ||
('ws://a/b', 'g', 'ws://a/g/', 'ws://a/g'), | ||
('wss://a/b', 'g', 'wss://a/g/', 'wss://a/g'), | ||
('http://a/b/c/de', ';x', 'http://a/b/c/;x/', 'http://a/b/c/;x'), | ||
# Non-RFC-defined tests, covering variations of base and trailing | ||
# slashes | ||
('http://a/b/c/d/e/', '../../f/g/', 'http://a/b/c/f/g/', 'http://a/b/c/f/g/'), | ||
('http://a/b/c/d/e', '../../f/g/', 'http://a/b/f/g/', 'http://a/b/f/g/'), | ||
('http://a/b/c/d/e/', '/../../f/g/', 'http://a/f/g/', 'http://a/f/g/'), | ||
('http://a/b/c/d/e', '/../../f/g/', 'http://a/f/g/', 'http://a/f/g/'), | ||
('http://a/b/c/d/e/', '../../f/g', 'http://a/b/c/f/g/', 'http://a/b/c/f/g'), | ||
('http://a/b/', '../../f/g/', 'http://a/f/g/', 'http://a/f/g/'), | ||
(SIMPLE_BASE, 'g:h', 'g:h', 'g:h'), | ||
(SIMPLE_BASE, 'g', 'http://a/b/c/g/', 'http://a/b/c/g'), | ||
(SIMPLE_BASE, './g', 'http://a/b/c/g/', 'http://a/b/c/g'), | ||
(SIMPLE_BASE, 'g/', 'http://a/b/c/g/', 'http://a/b/c/g/'), | ||
(SIMPLE_BASE, '/g', 'http://a/g/', 'http://a/g'), | ||
(SIMPLE_BASE, '//g', 'http://g/', 'http://g/'), | ||
(SIMPLE_BASE, '?y', 'http://a/b/c/d?y', 'http://a/b/c/d?y'), | ||
(SIMPLE_BASE, 'g?y', 'http://a/b/c/g?y', 'http://a/b/c/g?y'), | ||
(SIMPLE_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x', 'http://a/b/c/g?y/./x'), | ||
(SIMPLE_BASE, '.', 'http://a/b/c/', 'http://a/b/c/'), | ||
(SIMPLE_BASE, './', 'http://a/b/c/', 'http://a/b/c/'), | ||
(SIMPLE_BASE, '..', 'http://a/b/', 'http://a/b/'), | ||
(SIMPLE_BASE, '../', 'http://a/b/', 'http://a/b/'), | ||
(SIMPLE_BASE, '../g', 'http://a/b/g/', 'http://a/b/g'), | ||
(SIMPLE_BASE, '../..', 'http://a/', 'http://a/'), | ||
(SIMPLE_BASE, '../../g', 'http://a/g/', 'http://a/g'), | ||
(SIMPLE_BASE, './../g', 'http://a/b/g/', 'http://a/b/g'), | ||
(SIMPLE_BASE, './g/.', 'http://a/b/c/g/', 'http://a/b/c/g/'), | ||
(SIMPLE_BASE, 'g/./h', 'http://a/b/c/g/h/', 'http://a/b/c/g/h'), | ||
(SIMPLE_BASE, 'g/../h', 'http://a/b/c/h/', 'http://a/b/c/h'), | ||
(SIMPLE_BASE, 'http:g', 'http://a/b/c/g/', 'http://a/b/c/g'), | ||
(SIMPLE_BASE, 'http:g?y', 'http://a/b/c/g?y', 'http://a/b/c/g?y'), | ||
(SIMPLE_BASE, 'http:g?y/./x', 'http://a/b/c/g?y/./x', 'http://a/b/c/g?y/./x'), | ||
(SIMPLE_BASE + '/', 'foo', SIMPLE_BASE + '/foo/', SIMPLE_BASE + '/foo'), | ||
(QUERY_BASE, '?y', 'http://a/b/c/d;p?y', 'http://a/b/c/d;p?y'), | ||
(QUERY_BASE, ';x', 'http://a/b/c/;x/', 'http://a/b/c/;x'), | ||
(QUERY_BASE, 'g:h', 'g:h', 'g:h'), | ||
(QUERY_BASE, 'g', 'http://a/b/c/g/', 'http://a/b/c/g'), | ||
(QUERY_BASE, './g', 'http://a/b/c/g/', 'http://a/b/c/g'), | ||
(QUERY_BASE, 'g/', 'http://a/b/c/g/', 'http://a/b/c/g/'), | ||
(QUERY_BASE, '/g', 'http://a/g/', 'http://a/g'), | ||
(QUERY_BASE, '//g', 'http://g/', 'http://g/'), | ||
(QUERY_BASE, '?y', 'http://a/b/c/d;p?y', 'http://a/b/c/d;p?y'), | ||
(QUERY_BASE, 'g?y', 'http://a/b/c/g?y', 'http://a/b/c/g?y'), | ||
(QUERY_BASE, '#s', 'http://a/b/c/d;p?q#s', 'http://a/b/c/d;p?q#s'), | ||
(QUERY_BASE, 'g#s', 'http://a/b/c/g#s', 'http://a/b/c/g#s'), | ||
(QUERY_BASE, 'g?y#s', 'http://a/b/c/g?y#s', 'http://a/b/c/g?y#s'), | ||
(QUERY_BASE, ';x', 'http://a/b/c/;x/', 'http://a/b/c/;x'), | ||
(QUERY_BASE, 'g;x', 'http://a/b/c/g;x/', 'http://a/b/c/g;x'), | ||
(QUERY_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s', 'http://a/b/c/g;x?y#s'), | ||
(QUERY_BASE, '', 'http://a/b/c/d;p?q', 'http://a/b/c/d;p?q'), | ||
(QUERY_BASE, '.', 'http://a/b/c/', 'http://a/b/c/'), | ||
(QUERY_BASE, './', 'http://a/b/c/', 'http://a/b/c/'), | ||
(QUERY_BASE, '..', 'http://a/b/', 'http://a/b/'), | ||
(QUERY_BASE, '../', 'http://a/b/', 'http://a/b/'), | ||
(QUERY_BASE, '../g', 'http://a/b/g/', 'http://a/b/g'), | ||
(QUERY_BASE, '../..', 'http://a/', 'http://a/'), | ||
(QUERY_BASE, '../../', 'http://a/', 'http://a/'), | ||
(QUERY_BASE, '../../g', 'http://a/g/', 'http://a/g'), | ||
(QUERY_BASE, '../../../g', 'http://a/g/', 'http://a/g'), | ||
# Abnormal Examples | ||
(QUERY_BASE, '../../../g', 'http://a/g/', 'http://a/g'), | ||
(QUERY_BASE, '../../../../g', 'http://a/g/', 'http://a/g'), | ||
(QUERY_BASE, '/./g', 'http://a/g/', 'http://a/g'), | ||
(QUERY_BASE, '/../g', 'http://a/g/', 'http://a/g'), | ||
(QUERY_BASE, 'g.', 'http://a/b/c/g./', 'http://a/b/c/g.'), | ||
(QUERY_BASE, '.g', 'http://a/b/c/.g/', 'http://a/b/c/.g'), | ||
(QUERY_BASE, 'g..', 'http://a/b/c/g../', 'http://a/b/c/g..'), | ||
(QUERY_BASE, '..g', 'http://a/b/c/..g/', 'http://a/b/c/..g'), | ||
(QUERY_BASE, './../g', 'http://a/b/g/', 'http://a/b/g'), | ||
(QUERY_BASE, './g/.', 'http://a/b/c/g/', 'http://a/b/c/g/'), | ||
(QUERY_BASE, 'g/./h', 'http://a/b/c/g/h/', 'http://a/b/c/g/h'), | ||
(QUERY_BASE, 'g/../h', 'http://a/b/c/h/', 'http://a/b/c/h'), | ||
(QUERY_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y/', 'http://a/b/c/g;x=1/y'), | ||
(QUERY_BASE, 'g;x=1/../y', 'http://a/b/c/y/', 'http://a/b/c/y'), | ||
(QUERY_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x', 'http://a/b/c/g?y/./x'), | ||
(QUERY_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x', 'http://a/b/c/g?y/../x'), | ||
(QUERY_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x', 'http://a/b/c/g#s/./x'), | ||
(QUERY_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x', 'http://a/b/c/g#s/../x'), | ||
(QUERY_BASE, 'http:g', 'http://a/b/c/g/', 'http://a/b/c/g'), | ||
# Test with empty (but defined) components. | ||
(QUERY_FRAGMENT_BASE, '', 'http://a/b/c/d;p?q', 'http://a/b/c/d;p?q'), | ||
(QUERY_FRAGMENT_BASE, '#', 'http://a/b/c/d;p?q#', 'http://a/b/c/d;p?q#'), | ||
(QUERY_FRAGMENT_BASE, '#z', 'http://a/b/c/d;p?q#z', 'http://a/b/c/d;p?q#z'), | ||
(QUERY_FRAGMENT_BASE, '?', 'http://a/b/c/d;p?', 'http://a/b/c/d;p?'), | ||
(QUERY_FRAGMENT_BASE, '?#z', 'http://a/b/c/d;p?#z', 'http://a/b/c/d;p?#z'), | ||
(QUERY_FRAGMENT_BASE, '?y', 'http://a/b/c/d;p?y', 'http://a/b/c/d;p?y'), | ||
(QUERY_FRAGMENT_BASE, ';', 'http://a/b/c/;/', 'http://a/b/c/;'), | ||
(QUERY_FRAGMENT_BASE, ';?y', 'http://a/b/c/;?y', 'http://a/b/c/;?y'), | ||
(QUERY_FRAGMENT_BASE, ';#z', 'http://a/b/c/;#z', 'http://a/b/c/;#z'), | ||
(QUERY_FRAGMENT_BASE, ';x', 'http://a/b/c/;x/', 'http://a/b/c/;x'), | ||
(QUERY_FRAGMENT_BASE, '/w', 'http://a/w/', 'http://a/w'), | ||
(QUERY_FRAGMENT_BASE, '//;x', 'http://;x/', 'http://;x/'), | ||
(QUERY_FRAGMENT_BASE, '//v', 'http://v/', 'http://v/'), | ||
# For backward compatibility with RFC1630, the scheme name is allowed | ||
# to be present in a relative reference if it is the same as the base | ||
# URI scheme. | ||
(QUERY_FRAGMENT_BASE, 'http:', 'http://a/b/c/d;p?q', 'http://a/b/c/d;p?q'), | ||
(QUERY_FRAGMENT_BASE, 'http:#', 'http://a/b/c/d;p?q#', 'http://a/b/c/d;p?q#'), | ||
(QUERY_FRAGMENT_BASE, 'http:#z', 'http://a/b/c/d;p?q#z', 'http://a/b/c/d;p?q#z'), | ||
(QUERY_FRAGMENT_BASE, 'http:?', 'http://a/b/c/d;p?', 'http://a/b/c/d;p?'), | ||
(QUERY_FRAGMENT_BASE, 'http:?#z', 'http://a/b/c/d;p?#z', 'http://a/b/c/d;p?#z'), | ||
(QUERY_FRAGMENT_BASE, 'http:?y', 'http://a/b/c/d;p?y', 'http://a/b/c/d;p?y'), | ||
(QUERY_FRAGMENT_BASE, 'http:;', 'http://a/b/c/;/', 'http://a/b/c/;'), | ||
(QUERY_FRAGMENT_BASE, 'http:;?y', 'http://a/b/c/;?y', 'http://a/b/c/;?y'), | ||
(QUERY_FRAGMENT_BASE, 'http:;#z', 'http://a/b/c/;#z', 'http://a/b/c/;#z'), | ||
(QUERY_FRAGMENT_BASE, 'http:;x', 'http://a/b/c/;x/', 'http://a/b/c/;x'), | ||
(QUERY_FRAGMENT_BASE, 'http:/w', 'http://a/w/', 'http://a/w'), | ||
(QUERY_FRAGMENT_BASE, 'http://;x', 'http://;x/', 'http://;x/'), | ||
(QUERY_FRAGMENT_BASE, 'http:///w', 'http://w/', 'http://w/'), | ||
(QUERY_FRAGMENT_BASE, 'http://v', 'http://v/', 'http://v/'), | ||
# Different scheme is not ignored. | ||
(QUERY_FRAGMENT_BASE, 'https:;', 'https://;/', 'https://;/'), | ||
(QUERY_FRAGMENT_BASE, 'https:;x', 'https://;x/', 'https://;x/'), | ||
], | ||
) | ||
def test_url_join(base_url, join_path, expected_with_slash, expected_without_slash) -> None: | ||
"""Tests are based on | ||
https://github.com/python/cpython/blob/3a0e7f57628466aedcaaf6c5ff7c8224f5155a2c/Lib/test/test_urlparse.py | ||
and the URL specification from https://url.spec.whatwg.org/ | ||
""" | ||
url = Url(base_url) | ||
assert str(url.join(join_path, trailing_slash=True)) == expected_with_slash | ||
assert str(url.join(join_path, trailing_slash=False)) == expected_without_slash | ||
|
||
|
||
def test_url_join_operators() -> None: | ||
url = Url('http://a/b/c/d') | ||
assert str(url / 'e' / 'f') == 'http://a/b/c/e/f/' | ||
assert str(url / 'e' // 'f') == 'http://a/b/c/e/f' | ||
assert str(url // 'e' // 'f') == 'http://a/b/c/f' | ||
assert str(url / 'e' / '?x=1') == 'http://a/b/c/e/?x=1' | ||
assert str(url / 'e' / '?x=1' / '#y') == 'http://a/b/c/e/?x=1#y' | ||
assert str(url / 'e' / '?x=1' // '#y') == 'http://a/b/c/e/?x=1#y' | ||
assert str(url / 'e' // '?x=1' / '#y') == 'http://a/b/c/e/?x=1#y' | ||
assert str(url // 'e' / '?x=1' / '#y') == 'http://a/b/c/e?x=1#y' |
Uh oh!
There was an error while loading. Please reload this page.