1+ use std:: borrow:: Cow ;
12use std:: collections:: hash_map:: DefaultHasher ;
23use std:: fmt;
34use std:: fmt:: Formatter ;
@@ -15,16 +16,14 @@ use url::Url;
1516use crate :: tools:: SchemaDict ;
1617use crate :: SchemaValidator ;
1718
18- static SCHEMA_DEFINITION_URL : GILOnceCell < SchemaValidator > = GILOnceCell :: new ( ) ;
19-
2019#[ pyclass( name = "Url" , module = "pydantic_core._pydantic_core" , subclass, frozen) ]
2120#[ derive( Clone ) ]
2221#[ cfg_attr( debug_assertions, derive( Debug ) ) ]
2322pub struct PyUrl {
2423 lib_url : Url ,
25- /// Whether to serialize the path as empty when it is `/`. The `url` crate always normalizes an empty path to `/`,
24+ /// Override to treat the path as empty when it is `/`. The `url` crate always normalizes an empty path to `/`,
2625 /// but users may want to preserve the empty path when round-tripping.
27- serialize_path_as_empty : bool ,
26+ path_is_empty : bool ,
2827 /// Cache for the serialized representation where this diverges from `lib_url.as_str()`
2928 /// (i.e. when trailing slash was added to the empty path, but user didn't want that)
3029 serialized : OnceLock < String > ,
@@ -33,16 +32,16 @@ pub struct PyUrl {
3332impl Hash for PyUrl {
3433 fn hash < H : Hasher > ( & self , state : & mut H ) {
3534 self . lib_url . hash ( state) ;
36- self . serialize_path_as_empty . hash ( state) ;
35+ self . path_is_empty . hash ( state) ;
3736 // no need to hash `serialized` as it's derived from the other two fields
3837 }
3938}
4039
4140impl PyUrl {
42- pub fn new ( lib_url : Url , serialize_path_as_empty : bool ) -> Self {
41+ pub fn new ( lib_url : Url , path_is_empty : bool ) -> Self {
4342 Self {
4443 lib_url,
45- serialize_path_as_empty ,
44+ path_is_empty ,
4645 serialized : OnceLock :: new ( ) ,
4746 }
4847 }
@@ -55,8 +54,8 @@ impl PyUrl {
5554 & mut self . lib_url
5655 }
5756
58- pub fn serialized ( & self , py : Python < ' _ > ) -> & str {
59- if self . serialize_path_as_empty {
57+ fn serialized ( & self , py : Python < ' _ > ) -> & str {
58+ if self . path_is_empty {
6059 self . serialized
6160 . get_or_init_py_attached ( py, || serialize_url_without_path_slash ( & self . lib_url ) )
6261 } else {
@@ -125,6 +124,7 @@ impl PyUrl {
125124 pub fn path ( & self ) -> Option < & str > {
126125 match self . lib_url . path ( ) {
127126 "" => None ,
127+ "/" if self . path_is_empty => None ,
128128 path => Some ( path) ,
129129 }
130130 }
@@ -149,12 +149,12 @@ impl PyUrl {
149149 }
150150
151151 // string representation of the URL, with punycode decoded when appropriate
152- pub fn unicode_string ( & self ) -> String {
153- unicode_url ( & self . lib_url )
152+ pub fn unicode_string ( & self , py : Python < ' _ > ) -> Cow < ' _ , str > {
153+ unicode_url ( self . serialized ( py ) , & self . lib_url )
154154 }
155155
156156 pub fn __str__ ( & self , py : Python < ' _ > ) -> & str {
157- dbg ! ( self . serialized( py) )
157+ self . serialized ( py)
158158 }
159159
160160 pub fn __repr__ ( & self , py : Python < ' _ > ) -> String {
@@ -250,8 +250,6 @@ impl PyMultiHostUrl {
250250 }
251251}
252252
253- static SCHEMA_DEFINITION_MULTI_HOST_URL : GILOnceCell < SchemaValidator > = GILOnceCell :: new ( ) ;
254-
255253#[ pymethods]
256254impl PyMultiHostUrl {
257255 #[ new]
@@ -312,12 +310,12 @@ impl PyMultiHostUrl {
312310 }
313311
314312 // string representation of the URL, with punycode decoded when appropriate
315- pub fn unicode_string ( & self ) -> String {
313+ pub fn unicode_string ( & self , py : Python < ' _ > ) -> Cow < ' _ , str > {
316314 if let Some ( extra_urls) = & self . extra_urls {
317315 let scheme = self . ref_url . lib_url . scheme ( ) ;
318316 let host_offset = scheme. len ( ) + 3 ;
319317
320- let mut full_url = self . ref_url . unicode_string ( ) ;
318+ let mut full_url = self . ref_url . unicode_string ( py ) . into_owned ( ) ;
321319 full_url. insert ( host_offset, ',' ) ;
322320
323321 // special urls will have had a trailing slash added, non-special urls will not
@@ -328,15 +326,15 @@ impl PyMultiHostUrl {
328326 let hosts = extra_urls
329327 . iter ( )
330328 . map ( |url| {
331- let str = unicode_url ( url) ;
329+ let str = unicode_url ( url. as_str ( ) , url ) ;
332330 str[ host_offset..str. len ( ) - sub] . to_string ( )
333331 } )
334332 . collect :: < Vec < String > > ( )
335333 . join ( "," ) ;
336334 full_url. insert_str ( host_offset, & hosts) ;
337- full_url
335+ Cow :: Owned ( full_url)
338336 } else {
339- self . ref_url . unicode_string ( )
337+ self . ref_url . unicode_string ( py )
340338 }
341339 }
342340
@@ -345,7 +343,7 @@ impl PyMultiHostUrl {
345343 let scheme = self . ref_url . lib_url . scheme ( ) ;
346344 let host_offset = scheme. len ( ) + 3 ;
347345
348- let mut full_url = self . ref_url . lib_url . to_string ( ) ;
346+ let mut full_url = self . ref_url . serialized ( py ) . to_string ( ) ;
349347 full_url. insert ( host_offset, ',' ) ;
350348
351349 // special urls will have had a trailing slash added, non-special urls will not
@@ -372,14 +370,14 @@ impl PyMultiHostUrl {
372370 format ! ( "MultiHostUrl('{}')" , self . __str__( py) )
373371 }
374372
375- fn __richcmp__ ( & self , other : & Self , op : CompareOp ) -> PyResult < bool > {
373+ fn __richcmp__ ( & self , other : & Self , op : CompareOp , py : Python < ' _ > ) -> PyResult < bool > {
376374 match op {
377- CompareOp :: Lt => Ok ( self . unicode_string ( ) < other. unicode_string ( ) ) ,
378- CompareOp :: Le => Ok ( self . unicode_string ( ) <= other. unicode_string ( ) ) ,
379- CompareOp :: Eq => Ok ( self . unicode_string ( ) == other. unicode_string ( ) ) ,
380- CompareOp :: Ne => Ok ( self . unicode_string ( ) != other. unicode_string ( ) ) ,
381- CompareOp :: Gt => Ok ( self . unicode_string ( ) > other. unicode_string ( ) ) ,
382- CompareOp :: Ge => Ok ( self . unicode_string ( ) >= other. unicode_string ( ) ) ,
375+ CompareOp :: Lt => Ok ( self . unicode_string ( py ) < other. unicode_string ( py ) ) ,
376+ CompareOp :: Le => Ok ( self . unicode_string ( py ) <= other. unicode_string ( py ) ) ,
377+ CompareOp :: Eq => Ok ( self . unicode_string ( py ) == other. unicode_string ( py ) ) ,
378+ CompareOp :: Ne => Ok ( self . unicode_string ( py ) != other. unicode_string ( py ) ) ,
379+ CompareOp :: Gt => Ok ( self . unicode_string ( py ) > other. unicode_string ( py ) ) ,
380+ CompareOp :: Ge => Ok ( self . unicode_string ( py ) >= other. unicode_string ( py ) ) ,
383381 }
384382 }
385383
@@ -520,19 +518,18 @@ fn host_to_dict<'a>(py: Python<'a>, lib_url: &Url) -> PyResult<Bound<'a, PyDict>
520518 Ok ( dict)
521519}
522520
523- fn unicode_url ( lib_url : & Url ) -> String {
524- let mut s = lib_url. to_string ( ) ;
525-
521+ fn unicode_url < ' s > ( serialized : & ' s str , lib_url : & Url ) -> Cow < ' s , str > {
526522 match lib_url. host ( ) {
527523 Some ( url:: Host :: Domain ( domain) ) if is_punnycode_domain ( lib_url, domain) => {
524+ let mut s = serialized. to_string ( ) ;
528525 if let Some ( decoded) = decode_punycode ( domain) {
529526 // replace the range containing the punycode domain with the decoded domain
530527 let start = lib_url. scheme ( ) . len ( ) + 3 ;
531528 s. replace_range ( start..start + domain. len ( ) , & decoded) ;
532529 }
533- s
530+ Cow :: Owned ( s )
534531 }
535- _ => s ,
532+ _ => Cow :: Borrowed ( serialized ) ,
536533 }
537534}
538535
@@ -565,10 +562,7 @@ fn serialize_url_without_path_slash(url: &Url) -> String {
565562 // use pointer arithmetic to find the pieces we need to build the string
566563 let s = url. as_str ( ) ;
567564 let path = url. path ( ) ;
568- assert_eq ! (
569- path, "/" ,
570- "`serialize_path_as_empty` expected to be set only when path is '/'"
571- ) ;
565+ assert_eq ! ( path, "/" , "`path_is_empty` expected to be set only when path is '/'" ) ;
572566
573567 assert ! (
574568 // Safety for the below: `s` and `path` should be from the same text slice, so
0 commit comments