@@ -2,11 +2,12 @@ use std::collections::hash_map::DefaultHasher;
22use std:: fmt;
33use std:: fmt:: Formatter ;
44use std:: hash:: { Hash , Hasher } ;
5+ use std:: sync:: OnceLock ;
56
67use idna:: punycode:: decode_to_string;
78use pyo3:: exceptions:: PyValueError ;
89use pyo3:: pyclass:: CompareOp ;
9- use pyo3:: sync:: GILOnceCell ;
10+ use pyo3:: sync:: { GILOnceCell , OnceLockExt } ;
1011use pyo3:: types:: { PyDict , PyType } ;
1112use pyo3:: { intern, prelude:: * , IntoPyObjectExt } ;
1213use url:: Url ;
@@ -17,35 +18,70 @@ use crate::SchemaValidator;
1718static SCHEMA_DEFINITION_URL : GILOnceCell < SchemaValidator > = GILOnceCell :: new ( ) ;
1819
1920#[ pyclass( name = "Url" , module = "pydantic_core._pydantic_core" , subclass, frozen) ]
20- #[ derive( Clone , Hash ) ]
21+ #[ derive( Clone ) ]
2122#[ cfg_attr( debug_assertions, derive( Debug ) ) ]
2223pub struct PyUrl {
2324 lib_url : Url ,
25+ /// Whether to serialize the path as empty when it is `/`. The `url` crate always normalizes an empty path to `/`,
26+ /// but users may want to preserve the empty path when round-tripping.
27+ serialize_path_as_empty : bool ,
28+ /// Cache for the serialized representation where this diverges from `lib_url.as_str()`
29+ /// (i.e. when trailing slash was added to the empty path, but user didn't want that)
30+ serialized : OnceLock < String > ,
31+ }
32+
33+ impl Hash for PyUrl {
34+ fn hash < H : Hasher > ( & self , state : & mut H ) {
35+ self . lib_url . hash ( state) ;
36+ self . serialize_path_as_empty . hash ( state) ;
37+ // no need to hash `serialized` as it's derived from the other two fields
38+ }
2439}
2540
2641impl PyUrl {
27- pub fn new ( lib_url : Url ) -> Self {
28- Self { lib_url }
42+ pub fn new ( lib_url : Url , serialize_path_as_empty : bool ) -> Self {
43+ Self {
44+ lib_url,
45+ serialize_path_as_empty,
46+ serialized : OnceLock :: new ( ) ,
47+ }
2948 }
3049
3150 pub fn url ( & self ) -> & Url {
3251 & self . lib_url
3352 }
34- }
3553
36- fn build_schema_validator ( py : Python , schema_type : & str ) -> SchemaValidator {
37- let schema = PyDict :: new ( py) ;
38- schema. set_item ( "type" , schema_type) . unwrap ( ) ;
39- SchemaValidator :: py_new ( py, & schema, None ) . unwrap ( )
54+ pub fn url_mut ( & mut self ) -> & mut Url {
55+ & mut self . lib_url
56+ }
57+
58+ pub fn serialized ( & self , py : Python < ' _ > ) -> & str {
59+ if self . serialize_path_as_empty {
60+ self . serialized
61+ . get_or_init_py_attached ( py, || serialize_url_without_path_slash ( & self . lib_url ) )
62+ } else {
63+ self . lib_url . as_str ( )
64+ }
65+ }
4066}
4167
4268#[ pymethods]
4369impl PyUrl {
4470 #[ new]
45- pub fn py_new ( py : Python , url : & Bound < ' _ , PyAny > ) -> PyResult < Self > {
46- let schema_obj = SCHEMA_DEFINITION_URL
47- . get_or_init ( py, || build_schema_validator ( py, "url" ) )
48- . validate_python ( py, url, None , None , None , None , None , false . into ( ) , None , None ) ?;
71+ #[ pyo3( signature = ( url, * , preserve_empty_path=false ) ) ]
72+ pub fn py_new ( py : Python , url : & Bound < ' _ , PyAny > , preserve_empty_path : bool ) -> PyResult < Self > {
73+ let schema_obj = get_schema_validator ( py, false , preserve_empty_path) ?. validate_python (
74+ py,
75+ url,
76+ None ,
77+ None ,
78+ None ,
79+ None ,
80+ None ,
81+ false . into ( ) ,
82+ None ,
83+ None ,
84+ ) ?;
4985 schema_obj. extract ( py)
5086 }
5187
@@ -117,12 +153,12 @@ impl PyUrl {
117153 unicode_url ( & self . lib_url )
118154 }
119155
120- pub fn __str__ ( & self ) -> & str {
121- self . lib_url . as_str ( )
156+ pub fn __str__ ( & self , py : Python < ' _ > ) -> & str {
157+ dbg ! ( self . serialized ( py ) )
122158 }
123159
124- pub fn __repr__ ( & self ) -> String {
125- format ! ( "Url('{}')" , self . lib_url )
160+ pub fn __repr__ ( & self , py : Python < ' _ > ) -> String {
161+ format ! ( "Url('{}')" , self . serialized ( py ) )
126162 }
127163
128164 fn __richcmp__ ( & self , other : & Self , op : CompareOp ) -> PyResult < bool > {
@@ -151,8 +187,8 @@ impl PyUrl {
151187 self . clone ( ) . into_py_any ( py)
152188 }
153189
154- fn __getnewargs__ ( & self ) -> ( & str , ) {
155- ( self . __str__ ( ) , )
190+ fn __getnewargs__ ( & self , py : Python < ' _ > ) -> ( & str , ) {
191+ ( self . __str__ ( py ) , )
156192 }
157193
158194 #[ classmethod]
@@ -201,11 +237,8 @@ pub struct PyMultiHostUrl {
201237}
202238
203239impl PyMultiHostUrl {
204- pub fn new ( ref_url : Url , extra_urls : Option < Vec < Url > > ) -> Self {
205- Self {
206- ref_url : PyUrl :: new ( ref_url) ,
207- extra_urls,
208- }
240+ pub fn new ( ref_url : PyUrl , extra_urls : Option < Vec < Url > > ) -> Self {
241+ Self { ref_url, extra_urls }
209242 }
210243
211244 pub fn lib_url ( & self ) -> & Url {
@@ -222,10 +255,20 @@ static SCHEMA_DEFINITION_MULTI_HOST_URL: GILOnceCell<SchemaValidator> = GILOnceC
222255#[ pymethods]
223256impl PyMultiHostUrl {
224257 #[ new]
225- pub fn py_new ( py : Python , url : & Bound < ' _ , PyAny > ) -> PyResult < Self > {
226- let schema_obj = SCHEMA_DEFINITION_MULTI_HOST_URL
227- . get_or_init ( py, || build_schema_validator ( py, "multi-host-url" ) )
228- . validate_python ( py, url, None , None , None , None , None , false . into ( ) , None , None ) ?;
258+ #[ pyo3( signature = ( url, * , preserve_empty_path=false ) ) ]
259+ pub fn py_new ( py : Python , url : & Bound < ' _ , PyAny > , preserve_empty_path : bool ) -> PyResult < Self > {
260+ let schema_obj = get_schema_validator ( py, true , preserve_empty_path) ?. validate_python (
261+ py,
262+ url,
263+ None ,
264+ None ,
265+ None ,
266+ None ,
267+ None ,
268+ false . into ( ) ,
269+ None ,
270+ None ,
271+ ) ?;
229272 schema_obj. extract ( py)
230273 }
231274
@@ -297,7 +340,7 @@ impl PyMultiHostUrl {
297340 }
298341 }
299342
300- pub fn __str__ ( & self ) -> String {
343+ pub fn __str__ ( & self , py : Python < ' _ > ) -> String {
301344 if let Some ( extra_urls) = & self . extra_urls {
302345 let scheme = self . ref_url . lib_url . scheme ( ) ;
303346 let host_offset = scheme. len ( ) + 3 ;
@@ -321,12 +364,12 @@ impl PyMultiHostUrl {
321364 full_url. insert_str ( host_offset, & hosts) ;
322365 full_url
323366 } else {
324- self . ref_url . __str__ ( ) . to_string ( )
367+ self . ref_url . __str__ ( py ) . to_string ( )
325368 }
326369 }
327370
328- pub fn __repr__ ( & self ) -> String {
329- format ! ( "MultiHostUrl('{}')" , self . __str__( ) )
371+ pub fn __repr__ ( & self , py : Python < ' _ > ) -> String {
372+ format ! ( "MultiHostUrl('{}')" , self . __str__( py ) )
330373 }
331374
332375 fn __richcmp__ ( & self , other : & Self , op : CompareOp ) -> PyResult < bool > {
@@ -354,8 +397,8 @@ impl PyMultiHostUrl {
354397 self . clone ( ) . into_py_any ( py)
355398 }
356399
357- fn __getnewargs__ ( & self ) -> ( String , ) {
358- ( self . __str__ ( ) , )
400+ fn __getnewargs__ ( & self , py : Python < ' _ > ) -> ( String , ) {
401+ ( self . __str__ ( py ) , )
359402 }
360403
361404 #[ classmethod]
@@ -517,3 +560,56 @@ fn is_punnycode_domain(lib_url: &Url, domain: &str) -> bool {
517560pub fn scheme_is_special ( scheme : & str ) -> bool {
518561 matches ! ( scheme, "http" | "https" | "ws" | "wss" | "ftp" | "file" )
519562}
563+
564+ fn serialize_url_without_path_slash ( url : & Url ) -> String {
565+ // use pointer arithmetic to find the pieces we need to build the string
566+ let s = url. as_str ( ) ;
567+ let path = url. path ( ) ;
568+ assert_eq ! (
569+ path, "/" ,
570+ "`serialize_path_as_empty` expected to be set only when path is '/'"
571+ ) ;
572+
573+ assert ! (
574+ // Safety for the below: `s` and `path` should be from the same text slice, so
575+ // we can pull out the slices of `s` that don't include `path`.
576+ s. as_ptr( ) <= path. as_ptr( ) && unsafe { s. as_ptr( ) . add( s. len( ) ) } >= unsafe { path. as_ptr( ) . add( path. len( ) ) }
577+ ) ;
578+
579+ let prefix_len = path. as_ptr ( ) as usize - s. as_ptr ( ) as usize ;
580+ let suffix_len = s. len ( ) - ( prefix_len + path. len ( ) ) ;
581+
582+ // Safety: prefix is the slice of `s` leading to `path`, protected by the assert above.
583+ let prefix = unsafe { std:: str:: from_utf8_unchecked ( std:: slice:: from_raw_parts ( s. as_ptr ( ) , prefix_len) ) } ;
584+ // Safety: suffix is the slice of `s` after `path`, protected by the assert above.
585+ let suffix =
586+ unsafe { std:: str:: from_utf8_unchecked ( std:: slice:: from_raw_parts ( path. as_ptr ( ) . add ( path. len ( ) ) , suffix_len) ) } ;
587+
588+ format ! ( "{prefix}{suffix}" )
589+ }
590+
591+ static SCHEMA_URL_SINGLE_TRUE : GILOnceCell < SchemaValidator > = GILOnceCell :: new ( ) ;
592+ static SCHEMA_URL_SINGLE_FALSE : GILOnceCell < SchemaValidator > = GILOnceCell :: new ( ) ;
593+ static SCHEMA_URL_MULTI_TRUE : GILOnceCell < SchemaValidator > = GILOnceCell :: new ( ) ;
594+ static SCHEMA_URL_MULTI_FALSE : GILOnceCell < SchemaValidator > = GILOnceCell :: new ( ) ;
595+
596+ macro_rules! make_schema_val {
597+ ( $py: ident, $schema_type: literal, $preserve_empty_path: literal) => { {
598+ let schema = PyDict :: new( $py) ;
599+ schema. set_item( intern!( $py, "type" ) , intern!( $py, $schema_type) ) ?;
600+ // preserve_empty_path defaults to false, so only set it if true
601+ if $preserve_empty_path {
602+ schema. set_item( intern!( $py, "preserve_empty_path" ) , true ) ?;
603+ }
604+ SchemaValidator :: py_new( $py, & schema, None )
605+ } } ;
606+ }
607+
608+ fn get_schema_validator ( py : Python < ' _ > , multi_host : bool , preserve_empty_path : bool ) -> PyResult < & SchemaValidator > {
609+ match ( multi_host, preserve_empty_path) {
610+ ( false , true ) => SCHEMA_URL_SINGLE_TRUE . get_or_try_init ( py, || make_schema_val ! ( py, "url" , true ) ) ,
611+ ( false , false ) => SCHEMA_URL_SINGLE_FALSE . get_or_try_init ( py, || make_schema_val ! ( py, "url" , false ) ) ,
612+ ( true , true ) => SCHEMA_URL_MULTI_TRUE . get_or_try_init ( py, || make_schema_val ! ( py, "multi-host-url" , true ) ) ,
613+ ( true , false ) => SCHEMA_URL_MULTI_FALSE . get_or_try_init ( py, || make_schema_val ! ( py, "multi-host-url" , false ) ) ,
614+ }
615+ }
0 commit comments