1+ use std:: borrow:: Cow ;
2+
13use pyo3:: prelude:: * ;
24use pyo3:: sync:: GILOnceCell ;
35use pyo3:: { intern, Py , PyAny , Python } ;
@@ -61,17 +63,19 @@ fn strip_underscores(s: &str) -> Option<String> {
6163 // Double consecutive underscores are also not valid
6264 // If there are no underscores at all, no need to replace anything
6365 if s. starts_with ( '_' ) || s. ends_with ( '_' ) || !s. contains ( '_' ) || s. contains ( "__" ) {
64- // no underscores to strip
65- return None ;
66+ // no underscores to strip, or underscores in the wrong place
67+ None
68+ } else {
69+ Some ( s. replace ( '_' , "" ) )
6670 }
67- Some ( s. replace ( '_' , "" ) )
6871}
6972
7073/// parse a string as an int
74+ /// max length of the input is 4300 which is checked by jiter, see
75+ /// https://docs.python.org/3/whatsnew/3.11.html#other-cpython-implementation-changes and
76+ /// https://github.com/python/cpython/issues/95778 for more info in that length bound
7177pub fn str_as_int < ' py > ( input : & ( impl Input < ' py > + ?Sized ) , str : & str ) -> ValResult < EitherInt < ' py > > {
72- let str = str. trim ( ) ;
73-
74- // we have to call `NumberInt::try_from` directly first so we fail fast if the string is too long
78+ // we can't move `NumberInt::try_from` into its own function we fail fast if the string is too long
7579 match NumberInt :: try_from ( str. as_bytes ( ) ) {
7680 Ok ( NumberInt :: Int ( i) ) => return Ok ( EitherInt :: I64 ( i) ) ,
7781 Ok ( NumberInt :: BigInt ( i) ) => return Ok ( EitherInt :: BigInt ( i) ) ,
@@ -82,10 +86,12 @@ pub fn str_as_int<'py>(input: &(impl Input<'py> + ?Sized), str: &str) -> ValResu
8286 }
8387 }
8488
85- if let Some ( str_stripped) = strip_decimal_zeros ( str) {
86- _parse_str ( input, str_stripped)
87- } else if let Some ( str_stripped) = strip_underscores ( str) {
88- _parse_str ( input, & str_stripped)
89+ if let Some ( cleaned_str) = clean_int_str ( str) {
90+ match NumberInt :: try_from ( cleaned_str. as_ref ( ) . as_bytes ( ) ) {
91+ Ok ( NumberInt :: Int ( i) ) => Ok ( EitherInt :: I64 ( i) ) ,
92+ Ok ( NumberInt :: BigInt ( i) ) => Ok ( EitherInt :: BigInt ( i) ) ,
93+ Err ( _) => Err ( ValError :: new ( ErrorTypeDefaults :: IntParsing , input) ) ,
94+ }
8995 } else {
9096 Err ( ValError :: new ( ErrorTypeDefaults :: IntParsing , input) )
9197 }
@@ -102,30 +108,32 @@ pub fn str_as_float<'py>(input: &(impl Input<'py> + ?Sized), str: &str) -> ValRe
102108 }
103109}
104110
105- /// parse a string as an int, `input` is required here to get lifetimes to match up
106- /// max length of the input is 4300 which is checked by jiter, see
107- /// https://docs.python.org/3/whatsnew/3.11.html#other-cpython-implementation-changes and
108- /// https://github.com/python/cpython/issues/95778 for more info in that length bound
109- fn _parse_str < ' py > ( input : & ( impl Input < ' py > + ?Sized ) , str : & str ) -> ValResult < EitherInt < ' py > > {
110- match NumberInt :: try_from ( str. as_bytes ( ) ) {
111- Ok ( jiter:: NumberInt :: Int ( i) ) => Ok ( EitherInt :: I64 ( i) ) ,
112- Ok ( jiter:: NumberInt :: BigInt ( i) ) => Ok ( EitherInt :: BigInt ( i) ) ,
113- Err ( e) => match e. error_type {
114- JsonErrorType :: NumberOutOfRange => Err ( ValError :: new ( ErrorTypeDefaults :: IntParsingSize , input) ) ,
115- _ => Err ( ValError :: new ( ErrorTypeDefaults :: IntParsing , input) ) ,
116- } ,
117- }
118- }
111+ fn clean_int_str ( mut s : & str ) -> Option < Cow < str > > {
112+ let len_before = s. len ( ) ;
113+
114+ // strip leading and trailing whitespace
115+ s = s. trim ( ) ;
119116
120- /// we don't want to parse as f64 then call `float_as_int` as it can loose precision for large ints, therefore
121- /// we strip `.0+` manually instead, then parse as i64
122- fn strip_decimal_zeros ( s : & str ) -> Option < & str > {
117+ // strip loading zeros
118+ s = s. trim_start_matches ( '0' ) ;
119+
120+ // we don't want to parse as f64 then call `float_as_int` as it can lose precision for large ints, therefore
121+ // we strip `.0+` manually instead
123122 if let Some ( i) = s. find ( '.' ) {
124123 if s[ i + 1 ..] . chars ( ) . all ( |c| c == '0' ) {
125- return Some ( & s[ ..i] ) ;
124+ s = & s[ ..i] ;
125+ }
126+ }
127+
128+ // remove underscores
129+ if let Some ( str_stripped) = strip_underscores ( s) {
130+ Some ( str_stripped. into ( ) )
131+ } else {
132+ match len_before == s. len ( ) {
133+ true => None ,
134+ false => Some ( s. into ( ) ) ,
126135 }
127136 }
128- None
129137}
130138
131139pub fn float_as_int < ' py > ( input : & ( impl Input < ' py > + ?Sized ) , float : f64 ) -> ValResult < EitherInt < ' py > > {
0 commit comments