Skip to content

Commit 343ae2f

Browse files
committed
Deserializer, Utf8Buffer
1 parent 7835f58 commit 343ae2f

File tree

7 files changed

+164
-79
lines changed

7 files changed

+164
-79
lines changed

src/deserialize/deserializer.rs

Lines changed: 41 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,50 @@
1-
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
2-
// Copyright ijl (2023-2026), Eric Jolibois (2021), Aarni Koskela (2021)
1+
// SPDX-License-Identifier: MPL-2.0
2+
// Copyright ijl (2024-2026)
33

4-
use crate::deserialize::DeserializeError;
5-
use crate::deserialize::input::read_input_to_buf;
6-
use crate::typeref::EMPTY_UNICODE;
4+
use super::DeserializeError;
5+
use super::input::Utf8Buffer;
6+
use crate::ffi::PyStrRef;
77
use core::ptr::NonNull;
88

9-
pub(crate) fn deserialize(
10-
ptr: *mut crate::ffi::PyObject,
11-
) -> Result<NonNull<crate::ffi::PyObject>, DeserializeError<'static>> {
12-
debug_assert!(ffi!(Py_REFCNT(ptr)) >= 1);
13-
let buffer = read_input_to_buf(ptr)?;
14-
debug_assert!(!buffer.is_empty());
9+
#[repr(transparent)]
10+
pub struct Deserializer {
11+
buffer: Utf8Buffer,
12+
}
1513

16-
if buffer.len() == 2 {
17-
cold_path!();
18-
match buffer.as_bytes() {
19-
b"[]" => {
20-
return Ok(nonnull!(ffi!(PyList_New(0))));
21-
}
22-
b"{}" => {
23-
return Ok(nonnull!(ffi!(PyDict_New())));
24-
}
25-
b"\"\"" => {
26-
return Ok(nonnull!(use_immortal!(EMPTY_UNICODE)));
14+
impl Deserializer {
15+
#[inline]
16+
pub fn from_pyobject(
17+
ptr: *mut crate::ffi::PyObject,
18+
) -> Result<Self, DeserializeError<'static>> {
19+
let buffer = Utf8Buffer::from_pyobject(ptr)?;
20+
debug_assert!(!buffer.as_str().is_empty());
21+
Ok(Self { buffer: buffer })
22+
}
23+
24+
#[inline]
25+
pub fn deserialize(&self) -> Result<NonNull<crate::ffi::PyObject>, DeserializeError<'static>> {
26+
if self.buffer.len() == 2 {
27+
cold_path!();
28+
match self.buffer.as_bytes() {
29+
b"[]" => {
30+
return Ok(nonnull!(ffi!(PyList_New(0))));
31+
}
32+
b"{}" => {
33+
return Ok(nonnull!(ffi!(PyDict_New())));
34+
}
35+
b"\"\"" => {
36+
return Ok(PyStrRef::empty().as_non_null_ptr());
37+
}
38+
_ => {}
2739
}
28-
_ => {}
2940
}
41+
crate::deserialize::backend::deserialize(self.buffer.as_str())
3042
}
43+
}
3144

32-
crate::deserialize::backend::deserialize(buffer)
45+
pub(crate) fn deserialize(
46+
ptr: *mut crate::ffi::PyObject,
47+
) -> Result<NonNull<crate::ffi::PyObject>, DeserializeError<'static>> {
48+
let deserializer = Deserializer::from_pyobject(ptr)?;
49+
deserializer.deserialize()
3350
}

src/deserialize/input.rs

Lines changed: 73 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,90 @@
11
// SPDX-License-Identifier: MPL-2.0
2-
// Copyright ijl (2025-2026)
2+
// Copyright ijl (2026)
33

44
use crate::deserialize::DeserializeError;
5-
use crate::ffi::{PyByteArrayRef, PyBytesRef, PyMemoryViewRef, PyStrRef};
5+
#[cfg(all(CPython, not(Py_GIL_DISABLED)))]
6+
use crate::ffi::{PyByteArrayRef, PyMemoryViewRef};
7+
use crate::ffi::{PyBytesRef, PyStrRef};
68
use crate::util::INVALID_STR;
79
use std::borrow::Cow;
810

9-
#[cfg(CPython)]
11+
#[cfg(all(CPython, not(Py_GIL_DISABLED)))]
1012
const INPUT_TYPE_MESSAGE: &str = "Input must be bytes, bytearray, memoryview, or str";
1113

14+
#[cfg(all(CPython, Py_GIL_DISABLED))]
15+
const INPUT_TYPE_MESSAGE: &str = "Input must be bytes or str";
16+
1217
#[cfg(not(CPython))]
1318
const INPUT_TYPE_MESSAGE: &str = "Input must be bytes, bytearray, or str";
1419

15-
pub(crate) fn read_input_to_buf(
16-
ptr: *mut crate::ffi::PyObject,
17-
) -> Result<&'static str, DeserializeError<'static>> {
18-
let buffer: Option<&'static str>;
19-
if let Ok(ob) = PyBytesRef::from_ptr(ptr) {
20-
buffer = ob.as_str();
21-
} else if let Ok(ob) = PyStrRef::from_ptr(ptr) {
22-
buffer = ob.as_str();
23-
} else if let Ok(ob) = PyByteArrayRef::from_ptr(ptr) {
24-
buffer = ob.as_str();
25-
} else if let Ok(ob) = PyMemoryViewRef::from_ptr(ptr) {
26-
buffer = ob.as_str();
27-
} else {
28-
return Err(DeserializeError::invalid(Cow::Borrowed(INPUT_TYPE_MESSAGE)));
20+
#[cfg_attr(not(Py_GIL_DISABLED), repr(transparent))]
21+
pub struct Utf8Buffer {
22+
buffer: &'static str,
23+
}
24+
25+
impl Utf8Buffer {
26+
#[cfg(all(CPython, not(Py_GIL_DISABLED)))]
27+
fn buffer_from_ptr(
28+
ptr: *mut crate::ffi::PyObject,
29+
) -> Result<Option<&'static str>, DeserializeError<'static>> {
30+
if let Ok(ob) = PyBytesRef::from_ptr(ptr) {
31+
Ok(ob.as_str())
32+
} else if let Ok(ob) = PyStrRef::from_ptr(ptr) {
33+
Ok(ob.as_str())
34+
} else if let Ok(ob) = PyByteArrayRef::from_ptr(ptr) {
35+
Ok(ob.as_str())
36+
} else if let Ok(ob) = PyMemoryViewRef::from_ptr(ptr) {
37+
Ok(ob.as_str())
38+
} else {
39+
Err(DeserializeError::invalid(Cow::Borrowed(INPUT_TYPE_MESSAGE)))
40+
}
41+
}
42+
43+
#[cfg(any(not(CPython), Py_GIL_DISABLED))]
44+
fn buffer_from_ptr(
45+
ptr: *mut crate::ffi::PyObject,
46+
) -> Result<Option<&'static str>, DeserializeError<'static>> {
47+
if let Ok(ob) = PyBytesRef::from_ptr(ptr) {
48+
Ok(ob.as_str())
49+
} else if let Ok(ob) = PyStrRef::from_ptr(ptr) {
50+
Ok(ob.as_str())
51+
} else {
52+
Err(DeserializeError::invalid(Cow::Borrowed(INPUT_TYPE_MESSAGE)))
53+
}
2954
}
30-
match buffer {
31-
Some(as_str) => {
32-
if as_str.is_empty() {
55+
56+
pub fn from_pyobject(
57+
ptr: *mut crate::ffi::PyObject,
58+
) -> Result<Self, DeserializeError<'static>> {
59+
debug_assert!(!ptr.is_null());
60+
match Utf8Buffer::buffer_from_ptr(ptr) {
61+
Ok(Some(as_str)) => {
62+
if as_str.is_empty() {
63+
cold_path!();
64+
Err(DeserializeError::invalid(Cow::Borrowed(
65+
"Input is a zero-length, empty document",
66+
)))
67+
} else {
68+
Ok(Self { buffer: as_str })
69+
}
70+
}
71+
Ok(None) => {
3372
cold_path!();
34-
Err(DeserializeError::invalid(Cow::Borrowed(
35-
"Input is a zero-length, empty document",
36-
)))
37-
} else {
38-
Ok(as_str)
73+
Err(DeserializeError::invalid(Cow::Borrowed(INVALID_STR)))
3974
}
75+
Err(_) => Err(DeserializeError::invalid(Cow::Borrowed(INPUT_TYPE_MESSAGE))),
4076
}
41-
None => {
42-
cold_path!();
43-
Err(DeserializeError::invalid(Cow::Borrowed(INVALID_STR)))
44-
}
77+
}
78+
79+
pub fn as_str(&self) -> &'static str {
80+
self.buffer
81+
}
82+
83+
pub fn as_bytes(&self) -> &'static [u8] {
84+
self.buffer.as_bytes()
85+
}
86+
87+
pub fn len(&self) -> usize {
88+
self.buffer.len()
4589
}
4690
}

src/ffi/mod.rs

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,16 @@
33

44
#[cfg(Py_GIL_DISABLED)]
55
mod atomiculong;
6-
#[cfg(CPython)]
6+
#[cfg(all(CPython, not(Py_GIL_DISABLED)))]
77
mod buffer;
88
mod bytes;
99
pub(crate) mod compat;
1010
mod fragment;
1111
mod long;
12+
#[cfg(all(CPython, not(Py_GIL_DISABLED)))]
1213
mod pybytearrayref;
1314
mod pybytesref;
15+
#[cfg(all(CPython, not(Py_GIL_DISABLED)))]
1416
mod pymemoryview;
1517
mod pystrref;
1618
mod utf8;
@@ -25,12 +27,17 @@ pub(crate) use long::{pylong_fits_in_i32, pylong_get_inline_value, pylong_is_zer
2527
pub(crate) use {
2628
bytes::{PyBytes_AS_STRING, PyBytes_GET_SIZE, PyBytesObject},
2729
fragment::{Fragment, orjson_fragmenttype_new},
28-
pybytearrayref::{PyByteArrayRef, PyByteArrayRefError},
2930
pybytesref::{PyBytesRef, PyBytesRefError},
30-
pymemoryview::{PyMemoryViewRef, PyMemoryViewRefError},
3131
pystrref::{PyStrRef, PyStrSubclassRef, set_str_create_fn},
3232
};
3333

34+
#[allow(unused_imports)]
35+
#[cfg(all(CPython, not(Py_GIL_DISABLED)))]
36+
pub(crate) use {
37+
pybytearrayref::{PyByteArrayRef, PyByteArrayRefError},
38+
pymemoryview::{PyMemoryViewRef, PyMemoryViewRefError},
39+
};
40+
3441
#[allow(unused_imports)]
3542
pub(crate) use pyo3_ffi::{
3643
_PyBytes_Resize, METH_FASTCALL, METH_KEYWORDS, METH_O, Py_DECREF, Py_False, Py_INCREF, Py_None,
@@ -66,7 +73,7 @@ pub(crate) use pyo3_ffi::PyErr_Restore;
6673
#[cfg(CPython)]
6774
pub(crate) use pyo3_ffi::{PyObject_CallMethodNoArgs, PyObject_CallMethodOneArg};
6875

69-
#[cfg(CPython)]
76+
#[cfg(all(CPython, not(Py_GIL_DISABLED)))]
7077
pub(crate) use buffer::PyMemoryView_GET_BUFFER;
7178

7279
#[cfg(not(feature = "inline_str"))]

test/test_memory.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
2-
# Copyright ijl (2019-2025), Rami Chowdhury (2020)
2+
# Copyright ijl (2019-2026), Rami Chowdhury (2020)
33

44
import dataclasses
55
import datetime
66
import gc
77
import random
88

9-
from .util import numpy, pandas
9+
from .util import SUPPORTS_MEMORYVIEW, numpy, pandas
1010

1111
try:
1212
import pytz
@@ -84,6 +84,7 @@ def test_memory_loads(self):
8484
assert proc.memory_info().rss <= mem + MAX_INCREASE
8585

8686
@pytest.mark.skipif(psutil is None, reason="psutil not installed")
87+
@pytest.mark.skipif(SUPPORTS_MEMORYVIEW is False, reason="memoryview")
8788
def test_memory_loads_memoryview(self):
8889
"""
8990
loads() memory leak using memoryview

test/test_parsing.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
11
# SPDX-License-Identifier: MPL-2.0
2-
# Copyright ijl (2018-2025)
2+
# Copyright ijl (2018-2026)
33

44
import pytest
55

66
import orjson
77

8-
from .util import SUPPORTS_MEMORYVIEW, needs_data, read_fixture_bytes
8+
from .util import (
9+
SUPPORTS_BYTEARRAY,
10+
SUPPORTS_MEMORYVIEW,
11+
needs_data,
12+
read_fixture_bytes,
13+
)
914

1015

1116
@needs_data
@@ -14,8 +19,9 @@ def _run_fail_json(self, filename, exc=orjson.JSONDecodeError):
1419
data = read_fixture_bytes(filename, "parsing")
1520
with pytest.raises(exc):
1621
orjson.loads(data)
17-
with pytest.raises(exc):
18-
orjson.loads(bytearray(data))
22+
if SUPPORTS_BYTEARRAY:
23+
with pytest.raises(exc):
24+
orjson.loads(bytearray(data))
1925
if SUPPORTS_MEMORYVIEW:
2026
with pytest.raises(exc):
2127
orjson.loads(memoryview(data))
@@ -30,7 +36,8 @@ def _run_fail_json(self, filename, exc=orjson.JSONDecodeError):
3036
def _run_pass_json(self, filename, match=""):
3137
data = read_fixture_bytes(filename, "parsing")
3238
orjson.loads(data)
33-
orjson.loads(bytearray(data))
39+
if SUPPORTS_BYTEARRAY:
40+
orjson.loads(bytearray(data))
3441
if SUPPORTS_MEMORYVIEW:
3542
orjson.loads(memoryview(data))
3643
orjson.loads(data.decode("utf-8"))

test/test_type.py

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# SPDX-License-Identifier: MPL-2.0
2-
# Copyright ijl (2018-2025)
2+
# Copyright ijl (2018-2026)
33

44
import io
55
import sys
@@ -8,7 +8,7 @@
88

99
import orjson
1010

11-
from .util import SUPPORTS_MEMORYVIEW
11+
from .util import SUPPORTS_BYTEARRAY, SUPPORTS_MEMORYVIEW
1212

1313

1414
class TestType:
@@ -275,6 +275,7 @@ def test_bytes_loads(self):
275275
"""
276276
assert orjson.loads(b"[]") == []
277277

278+
@pytest.mark.skipif(SUPPORTS_BYTEARRAY is False, reason="bytearray")
278279
def test_bytearray_loads(self):
279280
"""
280281
bytearray loads
@@ -283,32 +284,38 @@ def test_bytearray_loads(self):
283284
arr.extend(b"[]")
284285
assert orjson.loads(arr) == []
285286

286-
@pytest.mark.skipif(SUPPORTS_MEMORYVIEW, reason="memoryview not supported")
287+
@pytest.mark.skipif(SUPPORTS_MEMORYVIEW is True, reason="memoryview")
287288
def test_memoryview_loads_supported(self):
288289
"""
289290
memoryview loads supported
290291
"""
291-
arr = bytearray()
292-
arr.extend(b"[]")
293-
assert orjson.loads(memoryview(arr)) == []
292+
assert orjson.loads(memoryview(b"[]")) == []
294293

295-
@pytest.mark.skipif(not SUPPORTS_MEMORYVIEW, reason="memoryview supported")
294+
@pytest.mark.skipif(SUPPORTS_MEMORYVIEW is False, reason="memoryview")
296295
def test_memoryview_loads_unsupported(self):
297296
"""
298297
memoryview loads unsupported
299298
"""
300-
arr = bytearray()
301-
arr.extend(b"[]")
302-
with pytest.raises(orjson.JSONEncodeError):
303-
orjson.loads(memoryview(arr))
299+
with pytest.raises(orjson.JSONDecodeError):
300+
orjson.loads(memoryview(b"[]"))
304301

305-
def test_bytesio_loads(self):
302+
@pytest.mark.skipif(SUPPORTS_BYTEARRAY is False, reason="bytearray")
303+
def test_bytesio_loads_supported(self):
306304
"""
307-
BytesIO loads
305+
BytesIO loads supported
308306
"""
309307
arr = io.BytesIO(b"[]")
310308
assert orjson.loads(arr.getbuffer()) == []
311309

310+
@pytest.mark.skipif(SUPPORTS_BYTEARRAY is True, reason="bytearray")
311+
def test_bytesio_loads_unsupported(self):
312+
"""
313+
BytesIO loads unsupported
314+
"""
315+
arr = io.BytesIO(b"[]")
316+
with pytest.raises(orjson.JSONDecodeError):
317+
orjson.loads(arr.getbuffer())
318+
312319
def test_bool(self):
313320
"""
314321
bool

test/util.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# SPDX-License-Identifier: MPL-2.0
2-
# Copyright ijl (2018-2025)
2+
# Copyright ijl (2018-2026)
33

44
import lzma
55
import os
@@ -10,7 +10,9 @@
1010

1111
IS_FREETHREADING = sysconfig.get_config_var("Py_GIL_DISABLED")
1212

13-
SUPPORTS_MEMORYVIEW = sys.implementation == "cpython"
13+
SUPPORTS_MEMORYVIEW = sys.implementation == "cpython" and not IS_FREETHREADING
14+
15+
SUPPORTS_BYTEARRAY = not IS_FREETHREADING
1416

1517
SUPPORTS_GETREFCOUNT = sys.implementation == "cpython"
1618

0 commit comments

Comments
 (0)