Skip to content

Commit c7daf16

Browse files
support newest jiter behaviour (#1092)
Co-authored-by: David Hewitt <[email protected]>
1 parent 5b63e7a commit c7daf16

File tree

12 files changed

+105
-113
lines changed

12 files changed

+105
-113
lines changed

Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,7 @@ base64 = "0.21.5"
4343
num-bigint = "0.4.4"
4444
python3-dll-a = "0.2.7"
4545
uuid = "1.5.0"
46-
jiter = {version = "0.0.4", features = ["python"]}
47-
#jiter = {path = "../jiter", features = ["python"]}
46+
jiter = {version = "0.0.5", features = ["python"]}
4847

4948
[lib]
5049
name = "_pydantic_core"

python/pydantic_core/_pydantic_core.pyi

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -385,7 +385,7 @@ def to_json(
385385
JSON bytes.
386386
"""
387387

388-
def from_json(data: str | bytes | bytearray, *, allow_inf_nan: bool = True) -> Any:
388+
def from_json(data: str | bytes | bytearray, *, allow_inf_nan: bool = True, cache_strings: bool = True) -> Any:
389389
"""
390390
Deserialize JSON data to a Python object.
391391
@@ -394,6 +394,8 @@ def from_json(data: str | bytes | bytearray, *, allow_inf_nan: bool = True) -> A
394394
Arguments:
395395
data: The JSON data to deserialize.
396396
allow_inf_nan: Whether to allow `Infinity`, `-Infinity` and `NaN` values as `json.loads()` does by default.
397+
cache_strings: Whether to cache strings to avoid constructing new Python objects,
398+
this should have a significant impact on performance while increasing memory usage slightly.
397399
398400
Raises:
399401
ValueError: If deserialization fails.

src/input/input_abstract.rs

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@ use pyo3::exceptions::PyValueError;
44
use pyo3::types::{PyDict, PyType};
55
use pyo3::{intern, prelude::*};
66

7-
use jiter::JsonValue;
8-
97
use crate::errors::{AsLocItem, ErrorTypeDefaults, InputValue, ValError, ValResult};
108
use crate::tools::py_err;
119
use crate::{PyMultiHostUrl, PyUrl};
@@ -89,8 +87,6 @@ pub trait Input<'a>: fmt::Debug + ToPyObject + AsLocItem + Sized {
8987

9088
fn validate_dataclass_args(&'a self, dataclass_name: &str) -> ValResult<GenericArguments<'a>>;
9189

92-
fn parse_json(&'a self) -> ValResult<JsonValue>;
93-
9490
fn validate_str(
9591
&'a self,
9692
strict: bool,

src/input/input_json.rs

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ use super::datetime::{
1414
float_as_time, int_as_datetime, int_as_duration, int_as_time, EitherDate, EitherDateTime, EitherTime,
1515
};
1616
use super::return_enums::ValidationMatch;
17-
use super::shared::{float_as_int, int_as_bool, map_json_err, str_as_bool, str_as_float, str_as_int};
17+
use super::shared::{float_as_int, int_as_bool, str_as_bool, str_as_float, str_as_int};
1818
use super::{
1919
BorrowInput, EitherBytes, EitherFloat, EitherInt, EitherString, EitherTimedelta, GenericArguments, GenericIterable,
2020
GenericIterator, GenericMapping, Input, JsonArgs,
@@ -84,13 +84,6 @@ impl<'a> Input<'a> for JsonValue {
8484
}
8585
}
8686

87-
fn parse_json(&'a self) -> ValResult<JsonValue> {
88-
match self {
89-
JsonValue::Str(s) => JsonValue::parse(s.as_bytes(), true).map_err(|e| map_json_err(self, e)),
90-
_ => Err(ValError::new(ErrorTypeDefaults::JsonType, self)),
91-
}
92-
}
93-
9487
fn exact_str(&'a self) -> ValResult<EitherString<'a>> {
9588
match self {
9689
JsonValue::Str(s) => Ok(s.as_str().into()),
@@ -367,10 +360,6 @@ impl<'a> Input<'a> for String {
367360
))
368361
}
369362

370-
fn parse_json(&'a self) -> ValResult<JsonValue> {
371-
JsonValue::parse(self.as_bytes(), true).map_err(|e| map_json_err(self, e))
372-
}
373-
374363
fn validate_str(
375364
&'a self,
376365
_strict: bool,

src/input/input_python.rs

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ use pyo3::types::{
1010
use pyo3::types::{PyDictItems, PyDictKeys, PyDictValues};
1111
use pyo3::{intern, PyTypeInfo};
1212

13-
use jiter::JsonValue;
1413
use speedate::MicrosecondsPrecisionOverflowBehavior;
1514

1615
use crate::errors::{AsLocItem, ErrorType, ErrorTypeDefaults, InputValue, LocItem, ValError, ValResult};
@@ -26,8 +25,7 @@ use super::datetime::{
2625
};
2726
use super::return_enums::ValidationMatch;
2827
use super::shared::{
29-
decimal_as_int, float_as_int, get_enum_meta_object, int_as_bool, map_json_err, str_as_bool, str_as_float,
30-
str_as_int,
28+
decimal_as_int, float_as_int, get_enum_meta_object, int_as_bool, str_as_bool, str_as_float, str_as_int,
3129
};
3230
use super::{
3331
py_string_str, BorrowInput, EitherBytes, EitherFloat, EitherInt, EitherString, EitherTimedelta, GenericArguments,
@@ -195,22 +193,6 @@ impl<'a> Input<'a> for PyAny {
195193
}
196194
}
197195

198-
fn parse_json(&'a self) -> ValResult<JsonValue> {
199-
let bytes = if let Ok(py_bytes) = self.downcast::<PyBytes>() {
200-
py_bytes.as_bytes()
201-
} else if let Ok(py_str) = self.downcast::<PyString>() {
202-
let str = py_string_str(py_str)?;
203-
str.as_bytes()
204-
} else if let Ok(py_byte_array) = self.downcast::<PyByteArray>() {
205-
// Safety: from_slice does not run arbitrary Python code and the GIL is held so the
206-
// bytes array will not be mutated while `JsonValue::parse` is reading it
207-
unsafe { py_byte_array.as_bytes() }
208-
} else {
209-
return Err(ValError::new(ErrorTypeDefaults::JsonType, self));
210-
};
211-
JsonValue::parse(bytes, true).map_err(|e| map_json_err(self, e))
212-
}
213-
214196
fn validate_str(
215197
&'a self,
216198
strict: bool,

src/input/input_string.rs

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
use pyo3::prelude::*;
22
use pyo3::types::{PyDict, PyString};
33

4-
use jiter::JsonValue;
54
use speedate::MicrosecondsPrecisionOverflowBehavior;
65

76
use crate::errors::{AsLocItem, ErrorTypeDefaults, InputValue, LocItem, ValError, ValResult};
@@ -12,7 +11,7 @@ use crate::validators::decimal::create_decimal;
1211
use super::datetime::{
1312
bytes_as_date, bytes_as_datetime, bytes_as_time, bytes_as_timedelta, EitherDate, EitherDateTime, EitherTime,
1413
};
15-
use super::shared::{map_json_err, str_as_bool, str_as_float};
14+
use super::shared::{str_as_bool, str_as_float};
1615
use super::{
1716
BorrowInput, EitherBytes, EitherFloat, EitherInt, EitherString, EitherTimedelta, GenericArguments, GenericIterable,
1817
GenericIterator, GenericMapping, Input, ValidationMatch,
@@ -86,16 +85,6 @@ impl<'a> Input<'a> for StringMapping<'a> {
8685
}
8786
}
8887

89-
fn parse_json(&'a self) -> ValResult<JsonValue> {
90-
match self {
91-
Self::String(s) => {
92-
let str = py_string_str(s)?;
93-
JsonValue::parse(str.as_bytes(), true).map_err(|e| map_json_err(self, e))
94-
}
95-
Self::Mapping(_) => Err(ValError::new(ErrorTypeDefaults::JsonType, self)),
96-
}
97-
}
98-
9988
fn validate_str(
10089
&'a self,
10190
_strict: bool,

src/input/shared.rs

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
use pyo3::sync::GILOnceCell;
22
use pyo3::{intern, Py, PyAny, Python, ToPyObject};
33

4-
use jiter::JsonValueError;
54
use num_bigint::BigInt;
65

7-
use crate::errors::{ErrorType, ErrorTypeDefaults, ValError, ValResult};
6+
use crate::errors::{ErrorTypeDefaults, ValError, ValResult};
87

98
use super::{EitherFloat, EitherInt, Input};
109
static ENUM_META_OBJECT: GILOnceCell<Py<PyAny>> = GILOnceCell::new();
@@ -20,16 +19,6 @@ pub fn get_enum_meta_object(py: Python) -> Py<PyAny> {
2019
.clone()
2120
}
2221

23-
pub fn map_json_err<'a>(input: &'a impl Input<'a>, error: JsonValueError) -> ValError {
24-
ValError::new(
25-
ErrorType::JsonInvalid {
26-
error: error.to_string(),
27-
context: None,
28-
},
29-
input,
30-
)
31-
}
32-
3322
pub fn str_as_bool<'a>(input: &'a impl Input<'a>, str: &str) -> ValResult<bool> {
3423
if str == "0"
3524
|| str.eq_ignore_ascii_case("f")

src/lib.rs

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ extern crate core;
55
use std::sync::OnceLock;
66

77
use pyo3::exceptions::PyTypeError;
8-
use pyo3::types::{PyByteArray, PyBytes, PyString};
98
use pyo3::{prelude::*, sync::GILOnceCell};
109

1110
// parse this first to get access to the contained macro
@@ -37,17 +36,16 @@ pub use serializers::{
3736
};
3837
pub use validators::{validate_core_schema, PySome, SchemaValidator};
3938

40-
#[pyfunction(signature = (data, *, allow_inf_nan=true))]
41-
pub fn from_json(py: Python, data: &PyAny, allow_inf_nan: bool) -> PyResult<PyObject> {
42-
if let Ok(py_bytes) = data.downcast::<PyBytes>() {
43-
jiter::python_parse(py, py_bytes.as_bytes(), allow_inf_nan)
44-
} else if let Ok(py_str) = data.downcast::<PyString>() {
45-
jiter::python_parse(py, py_str.to_str()?.as_bytes(), allow_inf_nan)
46-
} else if let Ok(py_byte_array) = data.downcast::<PyByteArray>() {
47-
jiter::python_parse(py, &py_byte_array.to_vec(), allow_inf_nan)
48-
} else {
49-
Err(PyTypeError::new_err("Expected bytes, bytearray or str"))
50-
}
39+
use crate::input::Input;
40+
41+
#[pyfunction(signature = (data, *, allow_inf_nan=true, cache_strings=true))]
42+
pub fn from_json(py: Python, data: &PyAny, allow_inf_nan: bool, cache_strings: bool) -> PyResult<PyObject> {
43+
let v_match = data
44+
.validate_bytes(false)
45+
.map_err(|_| PyTypeError::new_err("Expected bytes, bytearray or str"))?;
46+
let json_either_bytes = v_match.into_inner();
47+
let json_bytes = json_either_bytes.as_slice();
48+
jiter::python_parse(py, json_bytes, allow_inf_nan, cache_strings).map_err(|e| jiter::map_json_error(json_bytes, &e))
5149
}
5250

5351
pub fn get_pydantic_core_version() -> &'static str {

src/validators/json.rs

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@ use pyo3::intern;
22
use pyo3::prelude::*;
33
use pyo3::types::PyDict;
44

5-
use crate::errors::ValResult;
6-
use crate::input::Input;
5+
use jiter::JsonValue;
6+
7+
use crate::errors::{ErrorType, ErrorTypeDefaults, ValError, ValLineError, ValResult};
8+
use crate::input::{EitherBytes, Input, ValidationMatch};
79
use crate::tools::SchemaDict;
810

911
use super::{build_validator, BuildValidator, CombinedValidator, DefinitionsBuilder, ValidationState, Validator};
@@ -50,17 +52,52 @@ impl Validator for JsonValidator {
5052
input: &'data impl Input<'data>,
5153
state: &mut ValidationState,
5254
) -> ValResult<PyObject> {
53-
let json_value = input.parse_json()?;
55+
let v_match = validate_json_bytes(input)?;
56+
let json_either_bytes = v_match.unpack(state);
57+
let json_bytes = json_either_bytes.as_slice();
5458
match self.validator {
55-
Some(ref validator) => match validator.validate(py, &json_value, state) {
56-
Ok(v) => Ok(v),
57-
Err(err) => Err(err),
58-
},
59-
None => Ok(json_value.to_object(py)),
59+
Some(ref validator) => {
60+
let json_value = JsonValue::parse(json_bytes, true).map_err(|e| map_json_err(input, e, json_bytes))?;
61+
validator.validate(py, &json_value, state)
62+
}
63+
None => {
64+
let obj =
65+
jiter::python_parse(py, json_bytes, true, true).map_err(|e| map_json_err(input, e, json_bytes))?;
66+
Ok(obj)
67+
}
6068
}
6169
}
6270

6371
fn get_name(&self) -> &str {
6472
&self.name
6573
}
6674
}
75+
76+
pub fn validate_json_bytes<'data>(input: &'data impl Input<'data>) -> ValResult<ValidationMatch<EitherBytes<'data>>> {
77+
match input.validate_bytes(false) {
78+
Ok(v_match) => Ok(v_match),
79+
Err(ValError::LineErrors(e)) => Err(ValError::LineErrors(
80+
e.into_iter().map(map_bytes_error).collect::<Vec<_>>(),
81+
)),
82+
Err(e) => Err(e),
83+
}
84+
}
85+
86+
fn map_bytes_error(line_error: ValLineError) -> ValLineError {
87+
match line_error.error_type {
88+
ErrorType::BytesType { .. } => {
89+
ValLineError::new_custom_input(ErrorTypeDefaults::JsonType, line_error.input_value)
90+
}
91+
_ => line_error,
92+
}
93+
}
94+
95+
pub fn map_json_err<'a>(input: &'a impl Input<'a>, error: jiter::JsonError, json_bytes: &[u8]) -> ValError {
96+
ValError::new(
97+
ErrorType::JsonInvalid {
98+
error: error.description(json_bytes),
99+
context: None,
100+
},
101+
input,
102+
)
103+
}

0 commit comments

Comments
 (0)