Skip to content

Commit f636403

Browse files
authored
trim leading zeros before parsing str -> int (#1266)
1 parent e23d7eb commit f636403

File tree

2 files changed

+42
-29
lines changed

2 files changed

+42
-29
lines changed

src/input/shared.rs

Lines changed: 37 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use std::borrow::Cow;
2+
13
use pyo3::prelude::*;
24
use pyo3::sync::GILOnceCell;
35
use pyo3::{intern, Py, PyAny, Python};
@@ -61,17 +63,19 @@ fn strip_underscores(s: &str) -> Option<String> {
6163
// Double consecutive underscores are also not valid
6264
// If there are no underscores at all, no need to replace anything
6365
if s.starts_with('_') || s.ends_with('_') || !s.contains('_') || s.contains("__") {
64-
// no underscores to strip
65-
return None;
66+
// no underscores to strip, or underscores in the wrong place
67+
None
68+
} else {
69+
Some(s.replace('_', ""))
6670
}
67-
Some(s.replace('_', ""))
6871
}
6972

7073
/// parse a string as an int
74+
/// max length of the input is 4300 which is checked by jiter, see
75+
/// https://docs.python.org/3/whatsnew/3.11.html#other-cpython-implementation-changes and
76+
/// https://github.com/python/cpython/issues/95778 for more info in that length bound
7177
pub fn str_as_int<'py>(input: &(impl Input<'py> + ?Sized), str: &str) -> ValResult<EitherInt<'py>> {
72-
let str = str.trim();
73-
74-
// we have to call `NumberInt::try_from` directly first so we fail fast if the string is too long
78+
// we can't move `NumberInt::try_from` into its own function we fail fast if the string is too long
7579
match NumberInt::try_from(str.as_bytes()) {
7680
Ok(NumberInt::Int(i)) => return Ok(EitherInt::I64(i)),
7781
Ok(NumberInt::BigInt(i)) => return Ok(EitherInt::BigInt(i)),
@@ -82,10 +86,12 @@ pub fn str_as_int<'py>(input: &(impl Input<'py> + ?Sized), str: &str) -> ValResu
8286
}
8387
}
8488

85-
if let Some(str_stripped) = strip_decimal_zeros(str) {
86-
_parse_str(input, str_stripped)
87-
} else if let Some(str_stripped) = strip_underscores(str) {
88-
_parse_str(input, &str_stripped)
89+
if let Some(cleaned_str) = clean_int_str(str) {
90+
match NumberInt::try_from(cleaned_str.as_ref().as_bytes()) {
91+
Ok(NumberInt::Int(i)) => Ok(EitherInt::I64(i)),
92+
Ok(NumberInt::BigInt(i)) => Ok(EitherInt::BigInt(i)),
93+
Err(_) => Err(ValError::new(ErrorTypeDefaults::IntParsing, input)),
94+
}
8995
} else {
9096
Err(ValError::new(ErrorTypeDefaults::IntParsing, input))
9197
}
@@ -102,30 +108,32 @@ pub fn str_as_float<'py>(input: &(impl Input<'py> + ?Sized), str: &str) -> ValRe
102108
}
103109
}
104110

105-
/// parse a string as an int, `input` is required here to get lifetimes to match up
106-
/// max length of the input is 4300 which is checked by jiter, see
107-
/// https://docs.python.org/3/whatsnew/3.11.html#other-cpython-implementation-changes and
108-
/// https://github.com/python/cpython/issues/95778 for more info in that length bound
109-
fn _parse_str<'py>(input: &(impl Input<'py> + ?Sized), str: &str) -> ValResult<EitherInt<'py>> {
110-
match NumberInt::try_from(str.as_bytes()) {
111-
Ok(jiter::NumberInt::Int(i)) => Ok(EitherInt::I64(i)),
112-
Ok(jiter::NumberInt::BigInt(i)) => Ok(EitherInt::BigInt(i)),
113-
Err(e) => match e.error_type {
114-
JsonErrorType::NumberOutOfRange => Err(ValError::new(ErrorTypeDefaults::IntParsingSize, input)),
115-
_ => Err(ValError::new(ErrorTypeDefaults::IntParsing, input)),
116-
},
117-
}
118-
}
111+
fn clean_int_str(mut s: &str) -> Option<Cow<str>> {
112+
let len_before = s.len();
113+
114+
// strip leading and trailing whitespace
115+
s = s.trim();
119116

120-
/// we don't want to parse as f64 then call `float_as_int` as it can loose precision for large ints, therefore
121-
/// we strip `.0+` manually instead, then parse as i64
122-
fn strip_decimal_zeros(s: &str) -> Option<&str> {
117+
// strip loading zeros
118+
s = s.trim_start_matches('0');
119+
120+
// we don't want to parse as f64 then call `float_as_int` as it can lose precision for large ints, therefore
121+
// we strip `.0+` manually instead
123122
if let Some(i) = s.find('.') {
124123
if s[i + 1..].chars().all(|c| c == '0') {
125-
return Some(&s[..i]);
124+
s = &s[..i];
125+
}
126+
}
127+
128+
// remove underscores
129+
if let Some(str_stripped) = strip_underscores(s) {
130+
Some(str_stripped.into())
131+
} else {
132+
match len_before == s.len() {
133+
true => None,
134+
false => Some(s.into()),
126135
}
127136
}
128-
None
129137
}
130138

131139
pub fn float_as_int<'py>(input: &(impl Input<'py> + ?Sized), float: f64) -> ValResult<EitherInt<'py>> {

tests/validators/test_int.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@
2626
('42', 42),
2727
(42.0, 42),
2828
('42.0', 42),
29+
('042', 42),
30+
('4_2', 42),
31+
('4_2.0', 42),
32+
('04_2.0', 42),
33+
('000001', 1),
2934
('123456789.0', 123_456_789),
3035
('123456789123456.00001', Err('Input should be a valid integer, unable to parse string as an integer')),
3136
(int(1e10), int(1e10)),

0 commit comments

Comments
 (0)