Skip to content

Commit e73b2d1

Browse files
Adopt jiter 0.2.0 (#1250)
Co-authored-by: David Hewitt <[email protected]>
1 parent 1c4baac commit e73b2d1

File tree

6 files changed

+46
-45
lines changed

6 files changed

+46
-45
lines changed

Cargo.lock

Lines changed: 3 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ base64 = "0.21.7"
4444
num-bigint = "0.4.4"
4545
python3-dll-a = "0.2.7"
4646
uuid = "1.7.0"
47-
jiter = { version = "0.1.1", features = ["python"] }
47+
jiter = { version = "0.2.1", features = ["python"] }
4848

4949
[lib]
5050
name = "_pydantic_core"

src/input/return_enums.rs

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use serde::{ser::Error, Serialize, Serializer};
1919
use crate::errors::{
2020
py_err_string, ErrorType, ErrorTypeDefaults, InputValue, ToErrorValue, ValError, ValLineError, ValResult,
2121
};
22-
use crate::tools::{extract_i64, py_err};
22+
use crate::tools::{extract_i64, new_py_string, py_err};
2323
use crate::validators::{CombinedValidator, Exactness, ValidationState, Validator};
2424

2525
use super::{py_error_on_minusone, BorrowInput, Input};
@@ -437,13 +437,7 @@ impl<'a> EitherString<'a> {
437437

438438
pub fn as_py_string(&'a self, py: Python<'a>, cache_str: StringCacheMode) -> Bound<'a, PyString> {
439439
match self {
440-
Self::Cow(cow) => {
441-
if matches!(cache_str, StringCacheMode::All) {
442-
jiter::cached_py_string(py, cow.as_ref())
443-
} else {
444-
PyString::new_bound(py, cow.as_ref())
445-
}
446-
}
440+
Self::Cow(cow) => new_py_string(py, cow.as_ref(), cache_str),
447441
Self::Py(py_string) => py_string.clone(),
448442
}
449443
}

src/input/shared.rs

Lines changed: 26 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use pyo3::prelude::*;
22
use pyo3::sync::GILOnceCell;
33
use pyo3::{intern, Py, PyAny, Python};
44

5-
use num_bigint::BigInt;
5+
use jiter::{JsonErrorType, NumberInt};
66

77
use crate::errors::{ErrorTypeDefaults, ValError, ValResult};
88

@@ -68,29 +68,24 @@ fn strip_underscores(s: &str) -> Option<String> {
6868
}
6969

7070
/// parse a string as an int
71-
///
72-
/// max length of the input is 4300, see
73-
/// https://docs.python.org/3/whatsnew/3.11.html#other-cpython-implementation-changes and
74-
/// https://github.com/python/cpython/issues/95778 for more info in that length bound
7571
pub fn str_as_int<'py>(input: &(impl Input<'py> + ?Sized), str: &str) -> ValResult<EitherInt<'py>> {
7672
let str = str.trim();
77-
let len = str.len();
78-
if len > 4300 {
79-
Err(ValError::new(ErrorTypeDefaults::IntParsingSize, input))
80-
} else if let Some(int) = _parse_str(input, str, len) {
81-
Ok(int)
82-
} else if let Some(str_stripped) = strip_decimal_zeros(str) {
83-
if let Some(int) = _parse_str(input, str_stripped, len) {
84-
Ok(int)
85-
} else {
86-
Err(ValError::new(ErrorTypeDefaults::IntParsing, input))
73+
74+
// we have to call `NumberInt::try_from` directly first so we fail fast if the string is too long
75+
match NumberInt::try_from(str.as_bytes()) {
76+
Ok(NumberInt::Int(i)) => return Ok(EitherInt::I64(i)),
77+
Ok(NumberInt::BigInt(i)) => return Ok(EitherInt::BigInt(i)),
78+
Err(e) => {
79+
if e.error_type == JsonErrorType::NumberOutOfRange {
80+
return Err(ValError::new(ErrorTypeDefaults::IntParsingSize, input));
81+
}
8782
}
83+
}
84+
85+
if let Some(str_stripped) = strip_decimal_zeros(str) {
86+
_parse_str(input, str_stripped)
8887
} else if let Some(str_stripped) = strip_underscores(str) {
89-
if let Some(int) = _parse_str(input, &str_stripped, len) {
90-
Ok(int)
91-
} else {
92-
Err(ValError::new(ErrorTypeDefaults::IntParsing, input))
93-
}
88+
_parse_str(input, &str_stripped)
9489
} else {
9590
Err(ValError::new(ErrorTypeDefaults::IntParsing, input))
9691
}
@@ -108,16 +103,18 @@ pub fn str_as_float<'py>(input: &(impl Input<'py> + ?Sized), str: &str) -> ValRe
108103
}
109104

110105
/// parse a string as an int, `input` is required here to get lifetimes to match up
111-
///
112-
fn _parse_str<'py>(_input: &(impl Input<'py> + ?Sized), str: &str, len: usize) -> Option<EitherInt<'py>> {
113-
if len < 19 {
114-
if let Ok(i) = str.parse::<i64>() {
115-
return Some(EitherInt::I64(i));
116-
}
117-
} else if let Ok(i) = str.parse::<BigInt>() {
118-
return Some(EitherInt::BigInt(i));
106+
/// max length of the input is 4300 which is checked by jiter, see
107+
/// https://docs.python.org/3/whatsnew/3.11.html#other-cpython-implementation-changes and
108+
/// https://github.com/python/cpython/issues/95778 for more info in that length bound
109+
fn _parse_str<'py>(input: &(impl Input<'py> + ?Sized), str: &str) -> ValResult<EitherInt<'py>> {
110+
match NumberInt::try_from(str.as_bytes()) {
111+
Ok(jiter::NumberInt::Int(i)) => Ok(EitherInt::I64(i)),
112+
Ok(jiter::NumberInt::BigInt(i)) => Ok(EitherInt::BigInt(i)),
113+
Err(e) => match e.error_type {
114+
JsonErrorType::NumberOutOfRange => Err(ValError::new(ErrorTypeDefaults::IntParsingSize, input)),
115+
_ => Err(ValError::new(ErrorTypeDefaults::IntParsing, input)),
116+
},
119117
}
120-
None
121118
}
122119

123120
/// we don't want to parse as f64 then call `float_as_int` as it can loose precision for large ints, therefore

src/tools.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ use pyo3::prelude::*;
55
use pyo3::types::{PyDict, PyString};
66
use pyo3::{ffi, intern, FromPyObject};
77

8+
use jiter::{cached_py_string, pystring_fast_new, StringCacheMode};
9+
810
pub trait SchemaDict<'py> {
911
fn get_as<T>(&self, key: &Bound<'_, PyString>) -> PyResult<Option<T>>
1012
where
@@ -143,3 +145,13 @@ pub fn extract_i64(v: &Bound<'_, PyAny>) -> Option<i64> {
143145
None
144146
}
145147
}
148+
149+
pub(crate) fn new_py_string<'py>(py: Python<'py>, s: &str, cache_str: StringCacheMode) -> Bound<'py, PyString> {
150+
// we could use `bytecount::num_chars(s.as_bytes()) == s.len()` as orjson does, but it doesn't appear to be faster
151+
let ascii_only = false;
152+
if matches!(cache_str, StringCacheMode::All) {
153+
cached_py_string(py, s, ascii_only)
154+
} else {
155+
pystring_fast_new(py, s, ascii_only)
156+
}
157+
}

src/validators/validation_state.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use pyo3::types::PyString;
44
use jiter::StringCacheMode;
55

66
use crate::recursion_guard::{ContainsRecursionState, RecursionState};
7+
use crate::tools::new_py_string;
78

89
use super::Extra;
910

@@ -72,11 +73,7 @@ impl<'a, 'py> ValidationState<'a, 'py> {
7273
}
7374

7475
pub fn maybe_cached_str(&self, py: Python<'py>, s: &str) -> Bound<'py, PyString> {
75-
if matches!(self.extra.cache_str, StringCacheMode::All) {
76-
jiter::cached_py_string(py, s)
77-
} else {
78-
PyString::new_bound(py, s)
79-
}
76+
new_py_string(py, s, self.extra.cache_str)
8077
}
8178
}
8279

0 commit comments

Comments
 (0)