support multiple zeros as an int (#1269)

samuelcolvin · web-flow · commit 4adf47f10cc3 · 2024-04-16T09:49:59.000-05:00
diff --git a/src/input/shared.rs b/src/input/shared.rs
@@ -115,7 +115,7 @@ fn clean_int_str(mut s: &str) -> Option<Cow<str>> {
     s = s.trim();
 
     // strip loading zeros
-    s = s.trim_start_matches('0');
+    s = strip_leading_zeros(s)?;
 
     // we don't want to parse as f64 then call `float_as_int` as it can lose precision for large ints, therefore
     // we strip `.0+` manually instead
@@ -137,6 +137,37 @@ fn clean_int_str(mut s: &str) -> Option<Cow<str>> {
     }
 }
 
+/// strip leading zeros from a string, we can't simple use `s.trim_start_matches('0')`, because:
+/// - we need to keep one zero if the string is only zeros e.g. `000` -> `0`
+/// - we need to keep one zero if the string is a float which is an exact int e.g. `00.0` -> `0.0`
+/// - underscores within leading zeros should also be stripped e.g. `0_000` -> `0`, but not `_000`
+fn strip_leading_zeros(s: &str) -> Option<&str> {
+    let mut char_iter = s.char_indices();
+    match char_iter.next() {
+        // if we get a leading zero we continue
+        Some((_, '0')) => (),
+        // if we get another digit we return the whole string
+        Some((_, c)) if ('1'..='9').contains(&c) => return Some(s),
+        // anything else is invalid, we return None
+        _ => return None,
+    };
+    for (i, c) in char_iter {
+        match c {
+            // continue on more leading zeros or if we get an underscore we continue - we're "within the number"
+            '0' | '_' => (),
+            // any other digit we return the rest of the string
+            '1'..='9' => return Some(&s[i..]),
+            // if we get a dot we return the rest of the string but include the last zero
+            '.' => return Some(&s[(i - 1)..]),
+            // anything else is invalid, we return None
+            _ => return None,
+        }
+    }
+    // if the string is all zeros (or underscores), we return the last character
+    // generally this will be zero, but could be an underscore, which will fail
+    Some(&s[s.len() - 1..])
+}
+
 pub fn float_as_int<'py>(input: &(impl Input<'py> + ?Sized), float: f64) -> ValResult<EitherInt<'py>> {
     if float.is_infinite() || float.is_nan() {
         Err(ValError::new(ErrorTypeDefaults::FiniteNumber, input))
diff --git a/tests/validators/test_int.py b/tests/validators/test_int.py
@@ -20,22 +20,37 @@
         (True, 1),
         (0, 0),
         ('0', 0),
+        ('00', 0),
+        ('000', 0),
+        ('0_000', 0),
         (1, 1),
         ('  1  ', 1),
         (42, 42),
         ('42', 42),
         (42.0, 42),
+        ('0.0', 0),
+        ('00.0', 0),
+        ('00.00', 0),
         ('42.0', 42),
         ('42.00', 42),
         ('042', 42),
+        ('01', 1),
+        ('09', 9),
+        ('00_', Err('Input should be a valid integer, unable to parse string as an integer')),
+        # next character after 9 is not valid
+        ('0:', Err('Input should be a valid integer, unable to parse string as an integer')),
         ('4_2', 42),
+        ('0_42', 42),
         ('4_2.0', 42),
         ('04_2.0', 42),
         ('  04_2.0 ', 42),
-        # because zeros are striped before underscores this is not allowed
-        ('  0_42.0 ', Err('Input should be a valid integer, unable to parse string as an integer')),
+        ('  0_42.0 ', 42),
+        ('  _042.0 ', Err('Input should be a valid integer, unable to parse string as an integer')),
+        ('42_', Err('Input should be a valid integer, unable to parse string as an integer')),
+        ('42_.0', Err('Input should be a valid integer, unable to parse string as an integer')),
         ('000001', 1),
         ('123456789.0', 123_456_789),
+        (' ', Err('Input should be a valid integer, unable to parse string as an integer')),
         ('1.', Err('Input should be a valid integer, unable to parse string as an integer')),
         ('42.', Err('Input should be a valid integer, unable to parse string as an integer')),
         ('123456789123456.00001', Err('Input should be a valid integer, unable to parse string as an integer')),