Skip to content

Commit 63c6d6e

Browse files
committed
finish implementation
1 parent b50a4b7 commit 63c6d6e

File tree

2 files changed

+55
-63
lines changed

2 files changed

+55
-63
lines changed

src/url.rs

Lines changed: 30 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use std::borrow::Cow;
12
use std::collections::hash_map::DefaultHasher;
23
use std::fmt;
34
use std::fmt::Formatter;
@@ -15,16 +16,14 @@ use url::Url;
1516
use crate::tools::SchemaDict;
1617
use crate::SchemaValidator;
1718

18-
static SCHEMA_DEFINITION_URL: GILOnceCell<SchemaValidator> = GILOnceCell::new();
19-
2019
#[pyclass(name = "Url", module = "pydantic_core._pydantic_core", subclass, frozen)]
2120
#[derive(Clone)]
2221
#[cfg_attr(debug_assertions, derive(Debug))]
2322
pub struct PyUrl {
2423
lib_url: Url,
25-
/// Whether to serialize the path as empty when it is `/`. The `url` crate always normalizes an empty path to `/`,
24+
/// Override to treat the path as empty when it is `/`. The `url` crate always normalizes an empty path to `/`,
2625
/// but users may want to preserve the empty path when round-tripping.
27-
serialize_path_as_empty: bool,
26+
path_is_empty: bool,
2827
/// Cache for the serialized representation where this diverges from `lib_url.as_str()`
2928
/// (i.e. when trailing slash was added to the empty path, but user didn't want that)
3029
serialized: OnceLock<String>,
@@ -33,16 +32,16 @@ pub struct PyUrl {
3332
impl Hash for PyUrl {
3433
fn hash<H: Hasher>(&self, state: &mut H) {
3534
self.lib_url.hash(state);
36-
self.serialize_path_as_empty.hash(state);
35+
self.path_is_empty.hash(state);
3736
// no need to hash `serialized` as it's derived from the other two fields
3837
}
3938
}
4039

4140
impl PyUrl {
42-
pub fn new(lib_url: Url, serialize_path_as_empty: bool) -> Self {
41+
pub fn new(lib_url: Url, path_is_empty: bool) -> Self {
4342
Self {
4443
lib_url,
45-
serialize_path_as_empty,
44+
path_is_empty,
4645
serialized: OnceLock::new(),
4746
}
4847
}
@@ -55,8 +54,8 @@ impl PyUrl {
5554
&mut self.lib_url
5655
}
5756

58-
pub fn serialized(&self, py: Python<'_>) -> &str {
59-
if self.serialize_path_as_empty {
57+
fn serialized(&self, py: Python<'_>) -> &str {
58+
if self.path_is_empty {
6059
self.serialized
6160
.get_or_init_py_attached(py, || serialize_url_without_path_slash(&self.lib_url))
6261
} else {
@@ -125,6 +124,7 @@ impl PyUrl {
125124
pub fn path(&self) -> Option<&str> {
126125
match self.lib_url.path() {
127126
"" => None,
127+
"/" if self.path_is_empty => None,
128128
path => Some(path),
129129
}
130130
}
@@ -149,12 +149,12 @@ impl PyUrl {
149149
}
150150

151151
// string representation of the URL, with punycode decoded when appropriate
152-
pub fn unicode_string(&self) -> String {
153-
unicode_url(&self.lib_url)
152+
pub fn unicode_string(&self, py: Python<'_>) -> Cow<'_, str> {
153+
unicode_url(self.serialized(py), &self.lib_url)
154154
}
155155

156156
pub fn __str__(&self, py: Python<'_>) -> &str {
157-
dbg!(self.serialized(py))
157+
self.serialized(py)
158158
}
159159

160160
pub fn __repr__(&self, py: Python<'_>) -> String {
@@ -250,8 +250,6 @@ impl PyMultiHostUrl {
250250
}
251251
}
252252

253-
static SCHEMA_DEFINITION_MULTI_HOST_URL: GILOnceCell<SchemaValidator> = GILOnceCell::new();
254-
255253
#[pymethods]
256254
impl PyMultiHostUrl {
257255
#[new]
@@ -312,12 +310,12 @@ impl PyMultiHostUrl {
312310
}
313311

314312
// string representation of the URL, with punycode decoded when appropriate
315-
pub fn unicode_string(&self) -> String {
313+
pub fn unicode_string(&self, py: Python<'_>) -> Cow<'_, str> {
316314
if let Some(extra_urls) = &self.extra_urls {
317315
let scheme = self.ref_url.lib_url.scheme();
318316
let host_offset = scheme.len() + 3;
319317

320-
let mut full_url = self.ref_url.unicode_string();
318+
let mut full_url = self.ref_url.unicode_string(py).into_owned();
321319
full_url.insert(host_offset, ',');
322320

323321
// special urls will have had a trailing slash added, non-special urls will not
@@ -328,15 +326,15 @@ impl PyMultiHostUrl {
328326
let hosts = extra_urls
329327
.iter()
330328
.map(|url| {
331-
let str = unicode_url(url);
329+
let str = unicode_url(url.as_str(), url);
332330
str[host_offset..str.len() - sub].to_string()
333331
})
334332
.collect::<Vec<String>>()
335333
.join(",");
336334
full_url.insert_str(host_offset, &hosts);
337-
full_url
335+
Cow::Owned(full_url)
338336
} else {
339-
self.ref_url.unicode_string()
337+
self.ref_url.unicode_string(py)
340338
}
341339
}
342340

@@ -345,7 +343,7 @@ impl PyMultiHostUrl {
345343
let scheme = self.ref_url.lib_url.scheme();
346344
let host_offset = scheme.len() + 3;
347345

348-
let mut full_url = self.ref_url.lib_url.to_string();
346+
let mut full_url = self.ref_url.serialized(py).to_string();
349347
full_url.insert(host_offset, ',');
350348

351349
// special urls will have had a trailing slash added, non-special urls will not
@@ -372,14 +370,14 @@ impl PyMultiHostUrl {
372370
format!("MultiHostUrl('{}')", self.__str__(py))
373371
}
374372

375-
fn __richcmp__(&self, other: &Self, op: CompareOp) -> PyResult<bool> {
373+
fn __richcmp__(&self, other: &Self, op: CompareOp, py: Python<'_>) -> PyResult<bool> {
376374
match op {
377-
CompareOp::Lt => Ok(self.unicode_string() < other.unicode_string()),
378-
CompareOp::Le => Ok(self.unicode_string() <= other.unicode_string()),
379-
CompareOp::Eq => Ok(self.unicode_string() == other.unicode_string()),
380-
CompareOp::Ne => Ok(self.unicode_string() != other.unicode_string()),
381-
CompareOp::Gt => Ok(self.unicode_string() > other.unicode_string()),
382-
CompareOp::Ge => Ok(self.unicode_string() >= other.unicode_string()),
375+
CompareOp::Lt => Ok(self.unicode_string(py) < other.unicode_string(py)),
376+
CompareOp::Le => Ok(self.unicode_string(py) <= other.unicode_string(py)),
377+
CompareOp::Eq => Ok(self.unicode_string(py) == other.unicode_string(py)),
378+
CompareOp::Ne => Ok(self.unicode_string(py) != other.unicode_string(py)),
379+
CompareOp::Gt => Ok(self.unicode_string(py) > other.unicode_string(py)),
380+
CompareOp::Ge => Ok(self.unicode_string(py) >= other.unicode_string(py)),
383381
}
384382
}
385383

@@ -520,19 +518,18 @@ fn host_to_dict<'a>(py: Python<'a>, lib_url: &Url) -> PyResult<Bound<'a, PyDict>
520518
Ok(dict)
521519
}
522520

523-
fn unicode_url(lib_url: &Url) -> String {
524-
let mut s = lib_url.to_string();
525-
521+
fn unicode_url<'s>(serialized: &'s str, lib_url: &Url) -> Cow<'s, str> {
526522
match lib_url.host() {
527523
Some(url::Host::Domain(domain)) if is_punnycode_domain(lib_url, domain) => {
524+
let mut s = serialized.to_string();
528525
if let Some(decoded) = decode_punycode(domain) {
529526
// replace the range containing the punycode domain with the decoded domain
530527
let start = lib_url.scheme().len() + 3;
531528
s.replace_range(start..start + domain.len(), &decoded);
532529
}
533-
s
530+
Cow::Owned(s)
534531
}
535-
_ => s,
532+
_ => Cow::Borrowed(serialized),
536533
}
537534
}
538535

@@ -565,10 +562,7 @@ fn serialize_url_without_path_slash(url: &Url) -> String {
565562
// use pointer arithmetic to find the pieces we need to build the string
566563
let s = url.as_str();
567564
let path = url.path();
568-
assert_eq!(
569-
path, "/",
570-
"`serialize_path_as_empty` expected to be set only when path is '/'"
571-
);
565+
assert_eq!(path, "/", "`path_is_empty` expected to be set only when path is '/'");
572566

573567
assert!(
574568
// Safety for the below: `s` and `path` should be from the same text slice, so

src/validators/url.rs

Lines changed: 25 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1+
use std::borrow::Cow;
12
use std::cell::RefCell;
23
use std::iter::Peekable;
34
use std::str::Chars;
45

56
use pyo3::intern;
67
use pyo3::prelude::*;
7-
use pyo3::sync::GILOnceCell;
88
use pyo3::types::{PyDict, PyList};
99

1010
use ahash::AHashSet;
@@ -19,7 +19,6 @@ use crate::input::Input;
1919
use crate::input::ValidationMatch;
2020
use crate::tools::SchemaDict;
2121
use crate::url::{scheme_is_special, PyMultiHostUrl, PyUrl};
22-
use crate::SchemaValidator;
2322

2423
use super::literal::expected_repr_name;
2524
use super::Exactness;
@@ -130,27 +129,25 @@ impl UrlValidator {
130129
if let Some(py_url) = downcast_python_input::<PyUrl>(input) {
131130
// we don't need to worry about whether the url was parsed in strict mode before,
132131
// even if it was, any syntax errors would have been fixed by the first validation
133-
self.check_length(input, py_url.get().url().as_str())?;
132+
self.check_length(input, py_url.get().__str__(py))?;
134133
return Ok(EitherUrl::Py(py_url.clone()));
135134
}
136135

137-
if let Some(multi_host_url) = downcast_python_input::<PyMultiHostUrl>(input) {
138-
let url_str = multi_host_url.get().__str__(py);
139-
self.check_length(input, &url_str)?;
140-
let url = parse_url(&url_str, input, strict)?;
141-
let serialize_path_as_empty = need_to_preserve_empty_path(&url, &url_str, self.preserve_empty_path);
142-
return Ok(EitherUrl::Rust(PyUrl::new(url, serialize_path_as_empty)));
136+
let either_str_owned;
137+
let url_str = if let Some(multi_host_url) = downcast_python_input::<PyMultiHostUrl>(input) {
138+
Cow::Owned(multi_host_url.get().__str__(py))
143139
} else if let Ok(either_str) = input.validate_str(strict, false).map(ValidationMatch::into_inner) {
144-
let cow = either_str.as_cow()?;
145-
let url_str = cow.as_ref();
146-
147-
self.check_length(input, url_str)?;
148-
let url = parse_url(url_str, input, strict)?;
149-
let serialize_path_as_empty = need_to_preserve_empty_path(&url, url_str, self.preserve_empty_path);
150-
return Ok(EitherUrl::Rust(PyUrl::new(url, serialize_path_as_empty)));
140+
either_str_owned = either_str; // to extend the lifetime outside the if let
141+
either_str_owned.as_cow()?
151142
} else {
152-
Err(ValError::new(ErrorTypeDefaults::UrlType, input))
153-
}
143+
return Err(ValError::new(ErrorTypeDefaults::UrlType, input));
144+
};
145+
146+
let url_str = url_str.as_ref();
147+
self.check_length(input, url_str)?;
148+
let url = parse_url(url_str, input, strict)?;
149+
let path_is_empty = need_to_preserve_empty_path(&url, url_str, self.preserve_empty_path);
150+
Ok(EitherUrl::Rust(PyUrl::new(url, path_is_empty)))
154151
}
155152

156153
fn check_length<'py>(&self, input: &(impl Input<'py> + ?Sized), url_str: &str) -> ValResult<()> {
@@ -307,7 +304,7 @@ impl MultiHostUrlValidator {
307304
self.check_length(input, || multi_url.get().__str__(py).len())?;
308305
Ok(EitherMultiHostUrl::Py(multi_url.clone()))
309306
} else if let Some(py_url) = downcast_python_input::<PyUrl>(input) {
310-
self.check_length(input, || py_url.get().url().as_str().len())?;
307+
self.check_length(input, || py_url.get().__str__(py).len())?;
311308
Ok(EitherMultiHostUrl::Rust(PyMultiHostUrl::new(
312309
py_url.get().clone(),
313310
None,
@@ -318,7 +315,7 @@ impl MultiHostUrlValidator {
318315

319316
self.check_length(input, || url_str.len())?;
320317

321-
parse_multihost_url(url_str, input, strict).map(EitherMultiHostUrl::Rust)
318+
parse_multihost_url(url_str, input, strict, self.preserve_empty_path).map(EitherMultiHostUrl::Rust)
322319
} else {
323320
Err(ValError::new(ErrorTypeDefaults::UrlType, input))
324321
}
@@ -384,6 +381,7 @@ fn parse_multihost_url<'py>(
384381
url_str: &str,
385382
input: &(impl Input<'py> + ?Sized),
386383
strict: bool,
384+
preserve_empty_path: bool,
387385
) -> ValResult<PyMultiHostUrl> {
388386
macro_rules! parsing_err {
389387
($parse_error:expr) => {
@@ -474,14 +472,16 @@ fn parse_multihost_url<'py>(
474472

475473
let reconstructed_url = format!("{prefix}{}", &url_str[start..]);
476474
let ref_url = parse_url(&reconstructed_url, input, strict)?;
475+
let path_is_empty = need_to_preserve_empty_path(&ref_url, &reconstructed_url, preserve_empty_path);
476+
477+
let ref_url = PyUrl::new(ref_url, path_is_empty);
477478

478479
if hosts.is_empty() {
479480
// if there's no one host (e.g. no `,`), we allow it to be empty to allow for default hosts
480-
// FIXME set serialize_path_as_empty correctly here
481-
Ok(PyMultiHostUrl::new(PyUrl::new(ref_url, false), None))
481+
Ok(PyMultiHostUrl::new(ref_url, None))
482482
} else {
483483
// with more than one host, none of them can be empty
484-
if !ref_url.has_host() {
484+
if !ref_url.url().has_host() {
485485
return parsing_err!(ParseError::EmptyHost);
486486
}
487487
let extra_urls: Vec<Url> = hosts
@@ -496,8 +496,7 @@ fn parse_multihost_url<'py>(
496496
return parsing_err!(ParseError::EmptyHost);
497497
}
498498

499-
// FIXME set serialize_path_as_empty correctly here
500-
Ok(PyMultiHostUrl::new(PyUrl::new(ref_url, false), Some(extra_urls)))
499+
Ok(PyMultiHostUrl::new(ref_url, Some(extra_urls)))
501500
}
502501
}
503502

@@ -551,7 +550,6 @@ fn parse_url<'py>(url_str: &str, input: &(impl Input<'py> + ?Sized), strict: boo
551550

552551
/// Check if the path got normalized to `/` and the original string had an empty path
553552
fn need_to_preserve_empty_path(url: &Url, url_str: &str, preserve_empty_path: bool) -> bool {
554-
dbg!(url.path(), url_str, preserve_empty_path);
555553
if !preserve_empty_path {
556554
return false;
557555
}
@@ -561,7 +559,7 @@ fn need_to_preserve_empty_path(url: &Url, url_str: &str, preserve_empty_path: bo
561559
return false;
562560
}
563561

564-
if !dbg!(scheme_is_special(url.scheme())) {
562+
if !scheme_is_special(url.scheme()) {
565563
// non-special schemes don't normalize the path
566564
return false;
567565
}

0 commit comments

Comments
 (0)