1
+ use std:: borrow:: Cow ;
2
+
1
3
use pyo3:: prelude:: * ;
2
4
use pyo3:: sync:: GILOnceCell ;
3
5
use pyo3:: { intern, Py , PyAny , Python } ;
@@ -61,17 +63,19 @@ fn strip_underscores(s: &str) -> Option<String> {
61
63
// Double consecutive underscores are also not valid
62
64
// If there are no underscores at all, no need to replace anything
63
65
if s. starts_with ( '_' ) || s. ends_with ( '_' ) || !s. contains ( '_' ) || s. contains ( "__" ) {
64
- // no underscores to strip
65
- return None ;
66
+ // no underscores to strip, or underscores in the wrong place
67
+ None
68
+ } else {
69
+ Some ( s. replace ( '_' , "" ) )
66
70
}
67
- Some ( s. replace ( '_' , "" ) )
68
71
}
69
72
70
73
/// parse a string as an int
74
+ /// max length of the input is 4300 which is checked by jiter, see
75
+ /// https://docs.python.org/3/whatsnew/3.11.html#other-cpython-implementation-changes and
76
+ /// https://github.com/python/cpython/issues/95778 for more info in that length bound
71
77
pub fn str_as_int < ' py > ( input : & ( impl Input < ' py > + ?Sized ) , str : & str ) -> ValResult < EitherInt < ' py > > {
72
- let str = str. trim ( ) ;
73
-
74
- // we have to call `NumberInt::try_from` directly first so we fail fast if the string is too long
78
+ // we can't move `NumberInt::try_from` into its own function we fail fast if the string is too long
75
79
match NumberInt :: try_from ( str. as_bytes ( ) ) {
76
80
Ok ( NumberInt :: Int ( i) ) => return Ok ( EitherInt :: I64 ( i) ) ,
77
81
Ok ( NumberInt :: BigInt ( i) ) => return Ok ( EitherInt :: BigInt ( i) ) ,
@@ -82,10 +86,12 @@ pub fn str_as_int<'py>(input: &(impl Input<'py> + ?Sized), str: &str) -> ValResu
82
86
}
83
87
}
84
88
85
- if let Some ( str_stripped) = strip_decimal_zeros ( str) {
86
- _parse_str ( input, str_stripped)
87
- } else if let Some ( str_stripped) = strip_underscores ( str) {
88
- _parse_str ( input, & str_stripped)
89
+ if let Some ( cleaned_str) = clean_int_str ( str) {
90
+ match NumberInt :: try_from ( cleaned_str. as_ref ( ) . as_bytes ( ) ) {
91
+ Ok ( NumberInt :: Int ( i) ) => Ok ( EitherInt :: I64 ( i) ) ,
92
+ Ok ( NumberInt :: BigInt ( i) ) => Ok ( EitherInt :: BigInt ( i) ) ,
93
+ Err ( _) => Err ( ValError :: new ( ErrorTypeDefaults :: IntParsing , input) ) ,
94
+ }
89
95
} else {
90
96
Err ( ValError :: new ( ErrorTypeDefaults :: IntParsing , input) )
91
97
}
@@ -102,30 +108,32 @@ pub fn str_as_float<'py>(input: &(impl Input<'py> + ?Sized), str: &str) -> ValRe
102
108
}
103
109
}
104
110
105
- /// parse a string as an int, `input` is required here to get lifetimes to match up
106
- /// max length of the input is 4300 which is checked by jiter, see
107
- /// https://docs.python.org/3/whatsnew/3.11.html#other-cpython-implementation-changes and
108
- /// https://github.com/python/cpython/issues/95778 for more info in that length bound
109
- fn _parse_str < ' py > ( input : & ( impl Input < ' py > + ?Sized ) , str : & str ) -> ValResult < EitherInt < ' py > > {
110
- match NumberInt :: try_from ( str. as_bytes ( ) ) {
111
- Ok ( jiter:: NumberInt :: Int ( i) ) => Ok ( EitherInt :: I64 ( i) ) ,
112
- Ok ( jiter:: NumberInt :: BigInt ( i) ) => Ok ( EitherInt :: BigInt ( i) ) ,
113
- Err ( e) => match e. error_type {
114
- JsonErrorType :: NumberOutOfRange => Err ( ValError :: new ( ErrorTypeDefaults :: IntParsingSize , input) ) ,
115
- _ => Err ( ValError :: new ( ErrorTypeDefaults :: IntParsing , input) ) ,
116
- } ,
117
- }
118
- }
111
+ fn clean_int_str ( mut s : & str ) -> Option < Cow < str > > {
112
+ let len_before = s. len ( ) ;
113
+
114
+ // strip leading and trailing whitespace
115
+ s = s. trim ( ) ;
119
116
120
- /// we don't want to parse as f64 then call `float_as_int` as it can loose precision for large ints, therefore
121
- /// we strip `.0+` manually instead, then parse as i64
122
- fn strip_decimal_zeros ( s : & str ) -> Option < & str > {
117
+ // strip loading zeros
118
+ s = s. trim_start_matches ( '0' ) ;
119
+
120
+ // we don't want to parse as f64 then call `float_as_int` as it can lose precision for large ints, therefore
121
+ // we strip `.0+` manually instead
123
122
if let Some ( i) = s. find ( '.' ) {
124
123
if s[ i + 1 ..] . chars ( ) . all ( |c| c == '0' ) {
125
- return Some ( & s[ ..i] ) ;
124
+ s = & s[ ..i] ;
125
+ }
126
+ }
127
+
128
+ // remove underscores
129
+ if let Some ( str_stripped) = strip_underscores ( s) {
130
+ Some ( str_stripped. into ( ) )
131
+ } else {
132
+ match len_before == s. len ( ) {
133
+ true => None ,
134
+ false => Some ( s. into ( ) ) ,
126
135
}
127
136
}
128
- None
129
137
}
130
138
131
139
pub fn float_as_int < ' py > ( input : & ( impl Input < ' py > + ?Sized ) , float : f64 ) -> ValResult < EitherInt < ' py > > {
0 commit comments