Skip to content

Commit 4d23017

Browse files
authored
revert url credential encoding (to be reintroduced as an option in future) (#1882)
1 parent 2e932c6 commit 4d23017

File tree

2 files changed

+212
-68
lines changed

2 files changed

+212
-68
lines changed

src/url.rs

Lines changed: 123 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use std::borrow::Cow;
22
use std::collections::hash_map::DefaultHasher;
3-
use std::fmt;
4-
use std::fmt::Formatter;
3+
use std::fmt::{self, Display};
4+
use std::fmt::{Formatter, Write};
55
use std::hash::{Hash, Hasher};
66
use std::sync::OnceLock;
77

@@ -229,14 +229,38 @@ impl PyUrl {
229229
path: Option<&str>,
230230
query: Option<&str>,
231231
fragment: Option<&str>,
232+
// encode_credentials: bool, // TODO: re-enable this
233+
) -> PyResult<Bound<'py, PyAny>> {
234+
Self::build_inner(
235+
cls, scheme, host, username, password, port, path, query, fragment, false,
236+
)
237+
}
238+
}
239+
240+
impl PyUrl {
241+
#[allow(clippy::too_many_arguments)]
242+
fn build_inner<'py>(
243+
cls: &Bound<'py, PyType>,
244+
scheme: &str,
245+
host: &str,
246+
username: Option<&str>,
247+
password: Option<&str>,
248+
port: Option<u16>,
249+
path: Option<&str>,
250+
query: Option<&str>,
251+
fragment: Option<&str>,
252+
encode_credentials: bool,
232253
) -> PyResult<Bound<'py, PyAny>> {
233254
let url_host = UrlHostParts {
234255
username: username.map(Into::into),
235256
password: password.map(Into::into),
236257
host: Some(host.into()),
237258
port,
238259
};
239-
let mut url = format!("{scheme}://{url_host}");
260+
let mut url = format!("{scheme}://");
261+
url_host
262+
.to_writer(&mut url, encode_credentials)
263+
.expect("writing to string should not fail");
240264
if let Some(path) = path {
241265
url.push('/');
242266
url.push_str(path);
@@ -446,7 +470,30 @@ impl PyMultiHostUrl {
446470
#[classmethod]
447471
#[pyo3(signature=(*, scheme, hosts=None, path=None, query=None, fragment=None, host=None, username=None, password=None, port=None))]
448472
#[allow(clippy::too_many_arguments)]
449-
pub fn build<'py>(
473+
fn build<'py>(
474+
cls: &Bound<'py, PyType>,
475+
scheme: &str,
476+
hosts: Option<Vec<UrlHostParts>>,
477+
path: Option<&str>,
478+
query: Option<&str>,
479+
fragment: Option<&str>,
480+
// convenience parameters to build with a single host
481+
host: Option<&str>,
482+
username: Option<&str>,
483+
password: Option<&str>,
484+
port: Option<u16>,
485+
// encode_credentials: bool, // TODO: re-enable this
486+
) -> PyResult<Bound<'py, PyAny>> {
487+
Self::build_inner(
488+
cls, scheme, hosts, path, query, fragment, host, username, password, port,
489+
false, // TODO: re-enable this
490+
)
491+
}
492+
}
493+
494+
impl PyMultiHostUrl {
495+
#[allow(clippy::too_many_arguments)]
496+
fn build_inner<'py>(
450497
cls: &Bound<'py, PyType>,
451498
scheme: &str,
452499
hosts: Option<Vec<UrlHostParts>>,
@@ -458,39 +505,44 @@ impl PyMultiHostUrl {
458505
username: Option<&str>,
459506
password: Option<&str>,
460507
port: Option<u16>,
508+
encode_credentials: bool,
461509
) -> PyResult<Bound<'py, PyAny>> {
462-
let mut url =
463-
if hosts.is_some() && (host.is_some() || username.is_some() || password.is_some() || port.is_some()) {
464-
return Err(PyValueError::new_err(
465-
"expected one of `hosts` or singular values to be set.",
466-
));
467-
} else if let Some(hosts) = hosts {
468-
// check all of host / user / password / port empty
469-
// build multi-host url
470-
let mut multi_url = format!("{scheme}://");
471-
for (index, single_host) in hosts.iter().enumerate() {
472-
if single_host.is_empty() {
473-
return Err(PyValueError::new_err(
474-
"expected one of 'host', 'username', 'password' or 'port' to be set",
475-
));
476-
}
477-
multi_url.push_str(&single_host.to_string());
478-
if index != hosts.len() - 1 {
479-
multi_url.push(',');
480-
}
510+
let mut url = format!("{scheme}://");
511+
512+
if hosts.is_some() && (host.is_some() || username.is_some() || password.is_some() || port.is_some()) {
513+
return Err(PyValueError::new_err(
514+
"expected one of `hosts` or singular values to be set.",
515+
));
516+
} else if let Some(hosts) = hosts {
517+
// check all of host / user / password / port empty
518+
// build multi-host url
519+
let len = hosts.len();
520+
for (index, single_host) in hosts.into_iter().enumerate() {
521+
if single_host.is_empty() {
522+
return Err(PyValueError::new_err(
523+
"expected one of 'host', 'username', 'password' or 'port' to be set",
524+
));
481525
}
482-
multi_url
483-
} else if host.is_some() {
484-
let url_host = UrlHostParts {
485-
username: username.map(Into::into),
486-
password: password.map(Into::into),
487-
host: host.map(Into::into),
488-
port,
489-
};
490-
format!("{scheme}://{url_host}")
491-
} else {
492-
return Err(PyValueError::new_err("expected either `host` or `hosts` to be set"));
526+
single_host
527+
.to_writer(&mut url, encode_credentials)
528+
.expect("writing to string should not fail");
529+
if index != len - 1 {
530+
url.push(',');
531+
}
532+
}
533+
} else if host.is_some() {
534+
let url_host = UrlHostParts {
535+
username: username.map(Into::into),
536+
password: password.map(Into::into),
537+
host: host.map(Into::into),
538+
port,
493539
};
540+
url_host
541+
.to_writer(&mut url, encode_credentials)
542+
.expect("writing to string should not fail");
543+
} else {
544+
return Err(PyValueError::new_err("expected either `host` or `hosts` to be set"));
545+
}
494546

495547
if let Some(path) = path {
496548
url.push('/');
@@ -508,55 +560,65 @@ impl PyMultiHostUrl {
508560
}
509561
}
510562

511-
pub struct UrlHostParts {
563+
struct UrlHostParts {
512564
username: Option<String>,
513565
password: Option<String>,
514566
host: Option<String>,
515567
port: Option<u16>,
516568
}
517569

518-
impl UrlHostParts {
519-
fn is_empty(&self) -> bool {
520-
self.host.is_none() && self.password.is_none() && self.host.is_none() && self.port.is_none()
570+
struct MaybeEncoded<'a>(&'a str, bool);
571+
572+
impl fmt::Display for MaybeEncoded<'_> {
573+
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
574+
if self.1 {
575+
write!(f, "{}", encode_userinfo_component(self.0))
576+
} else {
577+
write!(f, "{}", self.0)
578+
}
521579
}
522580
}
523581

524-
impl FromPyObject<'_> for UrlHostParts {
525-
fn extract_bound(ob: &Bound<'_, PyAny>) -> PyResult<Self> {
526-
let py = ob.py();
527-
let dict = ob.downcast::<PyDict>()?;
528-
Ok(UrlHostParts {
529-
username: dict.get_as(intern!(py, "username"))?,
530-
password: dict.get_as(intern!(py, "password"))?,
531-
host: dict.get_as(intern!(py, "host"))?,
532-
port: dict.get_as(intern!(py, "port"))?,
533-
})
582+
impl UrlHostParts {
583+
fn is_empty(&self) -> bool {
584+
self.host.is_none() && self.password.is_none() && self.host.is_none() && self.port.is_none()
534585
}
535-
}
536586

537-
impl fmt::Display for UrlHostParts {
538-
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
587+
fn to_writer(&self, mut w: impl Write, encode_credentials: bool) -> fmt::Result {
539588
match (&self.username, &self.password) {
540-
(Some(username), None) => write!(f, "{}@", encode_userinfo_component(username))?,
541-
(None, Some(password)) => write!(f, ":{}@", encode_userinfo_component(password))?,
589+
(Some(username), None) => write!(w, "{}@", MaybeEncoded(username, encode_credentials))?,
590+
(None, Some(password)) => write!(w, ":{}@", MaybeEncoded(password, encode_credentials))?,
542591
(Some(username), Some(password)) => write!(
543-
f,
592+
w,
544593
"{}:{}@",
545-
encode_userinfo_component(username),
546-
encode_userinfo_component(password)
594+
MaybeEncoded(username, encode_credentials),
595+
MaybeEncoded(password, encode_credentials)
547596
)?,
548597
(None, None) => {}
549598
}
550599
if let Some(host) = &self.host {
551-
write!(f, "{host}")?;
600+
write!(w, "{host}")?;
552601
}
553602
if let Some(port) = self.port {
554-
write!(f, ":{port}")?;
603+
write!(w, ":{port}")?;
555604
}
556605
Ok(())
557606
}
558607
}
559608

609+
impl FromPyObject<'_> for UrlHostParts {
610+
fn extract_bound(ob: &Bound<'_, PyAny>) -> PyResult<Self> {
611+
let py = ob.py();
612+
let dict = ob.downcast::<PyDict>()?;
613+
Ok(UrlHostParts {
614+
username: dict.get_as(intern!(py, "username"))?,
615+
password: dict.get_as(intern!(py, "password"))?,
616+
host: dict.get_as(intern!(py, "host"))?,
617+
port: dict.get_as(intern!(py, "port"))?,
618+
})
619+
}
620+
}
621+
560622
fn host_to_dict<'a>(py: Python<'a>, lib_url: &Url) -> PyResult<Bound<'a, PyDict>> {
561623
let dict = PyDict::new(py);
562624
dict.set_item("username", Some(lib_url.username()).filter(|s| !s.is_empty()))?;
@@ -630,14 +692,10 @@ const USERINFO_ENCODE_SET: &AsciiSet = &CONTROLS
630692
// we must also percent-encode '%'
631693
.add(b'%');
632694

633-
fn encode_userinfo_component(value: &str) -> Cow<'_, str> {
634-
let encoded = percent_encode(value.as_bytes(), USERINFO_ENCODE_SET).to_string();
635-
if encoded == value {
636-
Cow::Borrowed(value)
637-
} else {
638-
Cow::Owned(encoded)
639-
}
695+
fn encode_userinfo_component(value: &str) -> impl Display + '_ {
696+
percent_encode(value.as_bytes(), USERINFO_ENCODE_SET)
640697
}
698+
641699
// based on https://github.com/servo/rust-url/blob/1c1e406874b3d2aa6f36c5d2f3a5c2ea74af9efb/url/src/parser.rs#L161-L167
642700
pub fn scheme_is_special(scheme: &str) -> bool {
643701
matches!(scheme, "http" | "https" | "ws" | "wss" | "ftp" | "file")

tests/validators/test_url.py

Lines changed: 89 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1319,13 +1319,64 @@ def test_multi_url_build() -> None:
13191319

13201320

13211321
@pytest.mark.parametrize('url_type', [Url, MultiHostUrl])
1322-
def test_url_build_encodes_credentials(url_type: type[Union[Url, MultiHostUrl]]) -> None:
1322+
@pytest.mark.parametrize(
1323+
'include_kwarg',
1324+
[
1325+
pytest.param(
1326+
True,
1327+
marks=pytest.mark.xfail(
1328+
reason='semantics of `encode_credentials` need to be fully decided, not enabled yet'
1329+
),
1330+
),
1331+
False,
1332+
],
1333+
)
1334+
def test_url_build_not_encode_credentials(url_type: type[Union[Url, MultiHostUrl]], include_kwarg: bool) -> None:
1335+
kwargs = {}
1336+
if include_kwarg:
1337+
kwargs['encode_credentials'] = False
1338+
url = url_type.build(
1339+
scheme='postgresql',
1340+
username='user name',
1341+
password='p@ss/word?#__',
1342+
host='example.com',
1343+
port=5432,
1344+
**kwargs,
1345+
)
1346+
assert url == url_type('postgresql://user%20name:p@ss/word?#[email protected]:5432')
1347+
assert str(url) == 'postgresql://user%20name:p@ss/word?#[email protected]:5432'
1348+
1349+
# NB without encoding, the special characters can seriously affect the URL
1350+
# parts, probably not what users want.
1351+
#
1352+
# TODO: in v3, probably should set `encode_credentials=True` by default
1353+
#
1354+
# FIXME: I guess there are similar issues with query containing #, for
1355+
# example? Potentially for all of these cases we could just raise an error?
1356+
1357+
if url_type is Url:
1358+
assert url.host == 'ss'
1359+
assert url.username == 'user%20name'
1360+
assert url.password == 'p'
1361+
assert url.port is None
1362+
else:
1363+
assert url.hosts() == [{'username': 'user%20name', 'password': 'p', 'host': 'ss', 'port': None}]
1364+
1365+
assert url.path == '/word'
1366+
assert url.query == ''
1367+
assert url.fragment == '[email protected]:5432'
1368+
1369+
1370+
@pytest.mark.xfail(reason='semantics of `encode_credentials` need to be fully decided, not enabled yet')
1371+
@pytest.mark.parametrize('url_type', [Url, MultiHostUrl])
1372+
def test_url_build_encode_credentials(url_type: type[Union[Url, MultiHostUrl]]) -> None:
13231373
url = url_type.build(
13241374
scheme='postgresql',
13251375
username='user name',
13261376
password='p@ss/word?#__',
13271377
host='example.com',
13281378
port=5432,
1379+
encode_credentials=True,
13291380
)
13301381
assert url == url_type('postgresql://user%20name:p%40ss%2Fword%3F%[email protected]:5432')
13311382
assert str(url) == 'postgresql://user%20name:p%40ss%2Fword%3F%[email protected]:5432'
@@ -1338,12 +1389,47 @@ def test_url_build_encodes_credentials(url_type: type[Union[Url, MultiHostUrl]])
13381389
]
13391390

13401391

1341-
def test_multi_url_build_hosts_encodes_credentials() -> None:
1392+
@pytest.mark.parametrize(
1393+
'include_kwarg',
1394+
[
1395+
pytest.param(
1396+
True,
1397+
marks=pytest.mark.xfail(
1398+
reason='semantics of `encode_credentials` need to be fully decided, not enabled yet'
1399+
),
1400+
),
1401+
False,
1402+
],
1403+
)
1404+
def test_multi_url_build_hosts_not_encode_credentials(include_kwarg: bool) -> None:
1405+
kwargs = {}
1406+
if include_kwarg:
1407+
kwargs['encode_credentials'] = False
1408+
hosts = [
1409+
{'host': 'example.com', 'password': 'p@ss/word?#__', 'username': 'user name', 'port': 5431},
1410+
{'host': 'example.org', 'password': 'p@%ss__', 'username': 'other', 'port': 5432},
1411+
]
1412+
url = MultiHostUrl.build(scheme='postgresql', hosts=hosts, **kwargs)
1413+
1414+
# NB: see comment in `test_url_build_not_encode_credentials` about not
1415+
# encoding credentials leading to VERY broken results
1416+
1417+
assert str(url) == 'postgresql://user%20name:p@ss/word?#[email protected]:5431,other:p@%[email protected]:5432'
1418+
assert url.hosts() == [
1419+
{'username': 'user%20name', 'password': 'p', 'host': 'ss', 'port': None},
1420+
]
1421+
assert url.path == '/word'
1422+
assert url.query == ''
1423+
assert url.fragment == '[email protected]:5431,other:p@%[email protected]:5432'
1424+
1425+
1426+
@pytest.mark.xfail(reason='semantics of `encode_credentials` need to be fully decided, not enabled yet')
1427+
def test_multi_url_build_hosts_encode_credentials() -> None:
13421428
hosts = [
13431429
{'host': 'example.com', 'password': 'p@ss/word?#__', 'username': 'user name', 'port': 5431},
13441430
{'host': 'example.org', 'password': 'p@%ss__', 'username': 'other', 'port': 5432},
13451431
]
1346-
url = MultiHostUrl.build(scheme='postgresql', hosts=hosts)
1432+
url = MultiHostUrl.build(scheme='postgresql', hosts=hosts, encode_credentials=True)
13471433
assert (
13481434
str(url) == 'postgresql://user%20name:p%40ss%2Fword%3F%[email protected]:5431,other:p%40%[email protected]:5432'
13491435
)

0 commit comments

Comments
 (0)