@@ -5,6 +5,8 @@ use pyo3::exceptions::{PyTypeError, PyValueError};
55use pyo3:: prelude:: * ;
66use pyo3:: types:: { PyBool , PyString } ;
77
8+ use crate :: string_decoder:: StringOutput ;
9+
810#[ derive( Debug , Clone , Copy ) ]
911pub enum StringCacheMode {
1012 All ,
@@ -50,38 +52,40 @@ impl From<bool> for StringCacheMode {
5052}
5153
5254pub trait StringMaybeCache {
53- fn get_key < ' py > ( py : Python < ' py > , json_str : & str , ascii_only : bool ) -> Bound < ' py , PyString > ;
55+ fn get_key < ' py > ( py : Python < ' py > , string_output : StringOutput < ' _ , ' _ > ) -> Bound < ' py , PyString > ;
5456
55- fn get_value < ' py > ( py : Python < ' py > , json_str : & str , ascii_only : bool ) -> Bound < ' py , PyString > {
56- Self :: get_key ( py, json_str , ascii_only )
57+ fn get_value < ' py > ( py : Python < ' py > , string_output : StringOutput < ' _ , ' _ > ) -> Bound < ' py , PyString > {
58+ Self :: get_key ( py, string_output )
5759 }
5860}
5961
6062pub struct StringCacheAll ;
6163
6264impl StringMaybeCache for StringCacheAll {
63- fn get_key < ' py > ( py : Python < ' py > , json_str : & str , ascii_only : bool ) -> Bound < ' py , PyString > {
64- cached_py_string ( py, json_str, ascii_only)
65+ fn get_key < ' py > ( py : Python < ' py > , string_output : StringOutput < ' _ , ' _ > ) -> Bound < ' py , PyString > {
66+ // Safety: string_output carries the safety information
67+ unsafe { cached_py_string_maybe_ascii ( py, string_output. as_str ( ) , string_output. ascii_only ( ) ) }
6568 }
6669}
6770
6871pub struct StringCacheKeys ;
6972
7073impl StringMaybeCache for StringCacheKeys {
71- fn get_key < ' py > ( py : Python < ' py > , json_str : & str , ascii_only : bool ) -> Bound < ' py , PyString > {
72- cached_py_string ( py, json_str, ascii_only)
74+ fn get_key < ' py > ( py : Python < ' py > , string_output : StringOutput < ' _ , ' _ > ) -> Bound < ' py , PyString > {
75+ // Safety: string_output carries the safety information
76+ unsafe { cached_py_string_maybe_ascii ( py, string_output. as_str ( ) , string_output. ascii_only ( ) ) }
7377 }
7478
75- fn get_value < ' py > ( py : Python < ' py > , json_str : & str , ascii_only : bool ) -> Bound < ' py , PyString > {
76- pystring_fast_new ( py, json_str , ascii_only)
79+ fn get_value < ' py > ( py : Python < ' py > , string_output : StringOutput < ' _ , ' _ > ) -> Bound < ' py , PyString > {
80+ unsafe { pystring_fast_new_maybe_ascii ( py, string_output . as_str ( ) , string_output . ascii_only ( ) ) }
7781 }
7882}
7983
8084pub struct StringNoCache ;
8185
8286impl StringMaybeCache for StringNoCache {
83- fn get_key < ' py > ( py : Python < ' py > , json_str : & str , ascii_only : bool ) -> Bound < ' py , PyString > {
84- pystring_fast_new ( py, json_str , ascii_only)
87+ fn get_key < ' py > ( py : Python < ' py > , string_output : StringOutput < ' _ , ' _ > ) -> Bound < ' py , PyString > {
88+ unsafe { pystring_fast_new_maybe_ascii ( py, string_output . as_str ( ) , string_output . ascii_only ( ) ) }
8589 }
8690}
8791
@@ -108,12 +112,33 @@ pub fn cache_clear() {
108112 get_string_cache ( ) . clear ( ) ;
109113}
110114
111- pub fn cached_py_string < ' py > ( py : Python < ' py > , s : & str , ascii_only : bool ) -> Bound < ' py , PyString > {
115+ /// Create a cached Python `str` from a string slice
116+ #[ inline]
117+ pub fn cached_py_string < ' py > ( py : Python < ' py > , s : & str ) -> Bound < ' py , PyString > {
118+ // SAFETY: not setting ascii-only
119+ unsafe { cached_py_string_maybe_ascii ( py, s, false ) }
120+ }
121+
122+ /// Create a cached Python `str` from a string slice.
123+ ///
124+ /// # Safety
125+ ///
126+ /// Caller must pass ascii-only string.
127+ #[ inline]
128+ pub unsafe fn cached_py_string_ascii < ' py > ( py : Python < ' py > , s : & str ) -> Bound < ' py , PyString > {
129+ // SAFETY: caller upholds invariant
130+ unsafe { cached_py_string_maybe_ascii ( py, s, true ) }
131+ }
132+
133+ /// # Safety
134+ ///
135+ /// Caller must match the ascii_only flag to the string passed in.
136+ unsafe fn cached_py_string_maybe_ascii < ' py > ( py : Python < ' py > , s : & str , ascii_only : bool ) -> Bound < ' py , PyString > {
112137 // from tests, 0 and 1 character strings are faster not cached
113138 if ( 2 ..64 ) . contains ( & s. len ( ) ) {
114139 get_string_cache ( ) . get_or_insert ( py, s, ascii_only)
115140 } else {
116- pystring_fast_new ( py, s, ascii_only)
141+ pystring_fast_new_maybe_ascii ( py, s, ascii_only)
117142 }
118143}
119144
@@ -146,13 +171,18 @@ impl Default for PyStringCache {
146171impl PyStringCache {
147172 /// Lookup the cache for an entry with the given string. If it exists, return it.
148173 /// If it is not set or has a different string, insert it and return it.
149- fn get_or_insert < ' py > ( & mut self , py : Python < ' py > , s : & str , ascii_only : bool ) -> Bound < ' py , PyString > {
174+ ///
175+ /// # Safety
176+ ///
177+ /// `ascii_only` must only be set to `true` if the string is guaranteed to be ASCII only.
178+ unsafe fn get_or_insert < ' py > ( & mut self , py : Python < ' py > , s : & str , ascii_only : bool ) -> Bound < ' py , PyString > {
150179 let hash = self . hash_builder . hash_one ( s) ;
151180
152181 let hash_index = hash as usize % CAPACITY ;
153182
154183 let set_entry = |entry : & mut Entry | {
155- let py_str = pystring_fast_new ( py, s, ascii_only) ;
184+ // SAFETY: caller upholds invariant
185+ let py_str = unsafe { pystring_fast_new_maybe_ascii ( py, s, ascii_only) } ;
156186 if let Some ( ( _, old_py_str) ) = entry. replace ( ( hash, py_str. clone ( ) . unbind ( ) ) ) {
157187 // micro-optimization: bind the old entry before dropping it so that PyO3 can
158188 // fast-path the drop (Bound::drop is faster than Py::drop)
@@ -199,8 +229,14 @@ impl PyStringCache {
199229 }
200230}
201231
202- pub fn pystring_fast_new < ' py > ( py : Python < ' py > , s : & str , ascii_only : bool ) -> Bound < ' py , PyString > {
232+ /// Creatate a new Python `str` from a string slice, with a fast path for ASCII strings
233+ ///
234+ /// # Safety
235+ ///
236+ /// `ascii_only` must only be set to `true` if the string is guaranteed to be ASCII only.
237+ unsafe fn pystring_fast_new_maybe_ascii < ' py > ( py : Python < ' py > , s : & str , ascii_only : bool ) -> Bound < ' py , PyString > {
203238 if ascii_only {
239+ // SAFETY: caller upholds invariant
204240 unsafe { pystring_ascii_new ( py, s) }
205241 } else {
206242 PyString :: new ( py, s)
@@ -209,22 +245,24 @@ pub fn pystring_fast_new<'py>(py: Python<'py>, s: &str, ascii_only: bool) -> Bou
209245
210246/// Faster creation of PyString from an ASCII string, inspired by
211247/// https://github.com/ijl/orjson/blob/3.10.0/src/str/create.rs#L41
212- #[ cfg( not( any( PyPy , GraalPy ) ) ) ]
213- unsafe fn pystring_ascii_new < ' py > ( py : Python < ' py > , s : & str ) -> Bound < ' py , PyString > {
214- // disabled on everything except tier-1 platforms because of a crash in the built wheels from CI,
215- // see https://github.com/pydantic/jiter/pull/175
216-
217- let ptr = pyo3:: ffi:: PyUnicode_New ( s. len ( ) as isize , 127 ) ;
218- // see https://github.com/pydantic/jiter/pull/72#discussion_r1545485907
219- debug_assert_eq ! ( pyo3:: ffi:: PyUnicode_KIND ( ptr) , pyo3:: ffi:: PyUnicode_1BYTE_KIND ) ;
220- let data_ptr = pyo3:: ffi:: PyUnicode_DATA ( ptr) . cast ( ) ;
221- core:: ptr:: copy_nonoverlapping ( s. as_ptr ( ) , data_ptr, s. len ( ) ) ;
222- core:: ptr:: write ( data_ptr. add ( s. len ( ) ) , 0 ) ;
223- Bound :: from_owned_ptr ( py, ptr) . downcast_into_unchecked ( )
224- }
225-
226- // unoptimized version (albeit not that much slower) on other platforms
227- #[ cfg( any( PyPy , GraalPy ) ) ]
228- unsafe fn pystring_ascii_new < ' py > ( py : Python < ' py > , s : & str ) -> Bound < ' py , PyString > {
229- PyString :: new ( py, s)
248+ ///
249+ /// # Safety
250+ ///
251+ /// `s` must be ASCII only
252+ pub unsafe fn pystring_ascii_new < ' py > ( py : Python < ' py > , s : & str ) -> Bound < ' py , PyString > {
253+ #[ cfg( not( any( PyPy , GraalPy , Py_LIMITED_API ) ) ) ]
254+ {
255+ let ptr = pyo3:: ffi:: PyUnicode_New ( s. len ( ) as isize , 127 ) ;
256+ // see https://github.com/pydantic/jiter/pull/72#discussion_r1545485907
257+ debug_assert_eq ! ( pyo3:: ffi:: PyUnicode_KIND ( ptr) , pyo3:: ffi:: PyUnicode_1BYTE_KIND ) ;
258+ let data_ptr = pyo3:: ffi:: PyUnicode_DATA ( ptr) . cast ( ) ;
259+ core:: ptr:: copy_nonoverlapping ( s. as_ptr ( ) , data_ptr, s. len ( ) ) ;
260+ core:: ptr:: write ( data_ptr. add ( s. len ( ) ) , 0 ) ;
261+ Bound :: from_owned_ptr ( py, ptr) . downcast_into_unchecked ( )
262+ }
263+
264+ #[ cfg( any( PyPy , GraalPy , Py_LIMITED_API ) ) ]
265+ {
266+ PyString :: new ( py, s)
267+ }
230268}
0 commit comments