Skip to content

Commit b09b969

Browse files
feat: Make num-bigint optional (#130)
1 parent 754395c commit b09b969

File tree

4 files changed

+97
-64
lines changed

4 files changed

+97
-64
lines changed

crates/jiter-python/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ repository = {workspace = true}
1111

1212
[dependencies]
1313
pyo3 = { workspace = true, features = ["num-bigint"] }
14-
jiter = { path = "../jiter", features = ["python"] }
14+
jiter = { path = "../jiter", features = ["python", "num-bigint"] }
1515

1616
[features]
1717
# must be enabled when building with `cargo build`, maturin enables this automatically

crates/jiter/Cargo.toml

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,37 @@
22
name = "jiter"
33
description = "Fast Iterable JSON parser"
44
readme = "../../README.md"
5-
version = {workspace = true}
6-
edition = {workspace = true}
7-
authors = {workspace = true}
8-
license = {workspace = true}
9-
keywords = {workspace = true}
10-
categories = {workspace = true}
11-
homepage = {workspace = true}
12-
repository = {workspace = true}
5+
version = { workspace = true }
6+
edition = { workspace = true }
7+
authors = { workspace = true }
8+
license = { workspace = true }
9+
keywords = { workspace = true }
10+
categories = { workspace = true }
11+
homepage = { workspace = true }
12+
repository = { workspace = true }
1313

1414
[dependencies]
15-
num-bigint = "0.4.4"
15+
num-bigint = { version = "0.4.4", optional = true }
1616
num-traits = "0.2.16"
1717
ahash = "0.8.0"
1818
smallvec = "1.11.0"
19-
pyo3 = { workspace = true, optional = true, features = ["num-bigint"] }
20-
lexical-parse-float = { version = "0.8.5", features = ["format"] }
19+
pyo3 = { workspace = true, optional = true }
20+
lexical-parse-float = { version = "0.8.5", features = ["format"] }
2121
bitvec = "1.0.1"
2222

2323
[features]
24+
default = ["num-bigint"]
2425
python = ["dep:pyo3", "dep:pyo3-build-config"]
26+
num-bigint = ["dep:num-bigint", "pyo3/num-bigint"]
2527

2628
[dev-dependencies]
2729
bencher = "0.1.5"
2830
paste = "1.0.7"
29-
serde_json = {version = "1.0.87", features = ["preserve_order", "arbitrary_precision", "float_roundtrip"]}
31+
serde_json = { version = "1.0.87", features = [
32+
"preserve_order",
33+
"arbitrary_precision",
34+
"float_roundtrip",
35+
] }
3036
serde = "1.0.147"
3137
pyo3 = { workspace = true, features = ["auto-initialize"] }
3238
codspeed-bencher-compat = "2.7.1"
@@ -71,5 +77,5 @@ doc_markdown = "allow"
7177
implicit_clone = "allow"
7278
iter_without_into_iter = "allow"
7379
return_self_not_must_use = "allow"
74-
inline_always = "allow" # TODO remove?
75-
match_same_arms = "allow" # TODO remove?
80+
inline_always = "allow" # TODO remove?
81+
match_same_arms = "allow" # TODO remove?

crates/jiter/src/number_decoder.rs

Lines changed: 71 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1+
#[cfg(feature = "num-bigint")]
12
use num_bigint::BigInt;
3+
#[cfg(feature = "num-bigint")]
24
use num_traits::cast::ToPrimitive;
5+
36
use std::ops::Range;
47

58
use lexical_parse_float::{format as lexical_format, FromLexicalWithOptions, Options as ParseFloatOptions};
@@ -16,13 +19,15 @@ pub trait AbstractNumberDecoder {
1619
#[derive(Debug, Clone, PartialEq)]
1720
pub enum NumberInt {
1821
Int(i64),
22+
#[cfg(feature = "num-bigint")]
1923
BigInt(BigInt),
2024
}
2125

2226
impl From<NumberInt> for f64 {
2327
fn from(num: NumberInt) -> Self {
2428
match num {
2529
NumberInt::Int(int) => int as f64,
30+
#[cfg(feature = "num-bigint")]
2631
NumberInt::BigInt(big_int) => big_int.to_f64().unwrap_or(f64::NAN),
2732
}
2833
}
@@ -118,6 +123,7 @@ impl pyo3::ToPyObject for NumberAny {
118123
fn to_object(&self, py: pyo3::Python<'_>) -> pyo3::PyObject {
119124
match self {
120125
Self::Int(NumberInt::Int(int)) => int.to_object(py),
126+
#[cfg(feature = "num-bigint")]
121127
Self::Int(NumberInt::BigInt(big_int)) => big_int.to_object(py),
122128
Self::Float(float) => float.to_object(py),
123129
}
@@ -220,8 +226,8 @@ impl IntParse {
220226
index += 1;
221227
let (chunk, new_index) = IntChunk::parse_small(data, index, first_value);
222228

223-
let mut big_value: BigInt = match chunk {
224-
IntChunk::Ongoing(value) => value.into(),
229+
let ongoing: u64 = match chunk {
230+
IntChunk::Ongoing(value) => value,
225231
IntChunk::Done(value) => {
226232
let mut value_i64 = value as i64;
227233
if !positive {
@@ -231,62 +237,76 @@ impl IntParse {
231237
}
232238
IntChunk::Float => return Ok((Self::Float, new_index)),
233239
};
234-
index = new_index;
235240

236-
// number is too big for i64, we need ot use a big int
237-
loop {
238-
let (chunk, new_index) = IntChunk::parse_big(data, index);
239-
if (new_index - start) > 4300 {
240-
return json_err!(NumberOutOfRange, start + 4301);
241-
}
242-
match chunk {
243-
IntChunk::Ongoing(value) => {
244-
big_value *= ONGOING_CHUNK_MULTIPLIER;
245-
big_value += value;
246-
index = new_index;
241+
// number is too big for i64, we need to use a BigInt,
242+
// or error out if num-bigint is not enabled
243+
244+
#[cfg(not(feature = "num-bigint"))]
245+
{
246+
// silence unused variable warning
247+
let _ = (ongoing, start);
248+
return json_err!(NumberOutOfRange, index);
249+
}
250+
251+
#[cfg(feature = "num-bigint")]
252+
{
253+
#[cfg(target_arch = "aarch64")]
254+
// in aarch64 we use a 128 bit registers - 16 bytes
255+
const ONGOING_CHUNK_MULTIPLIER: u64 = 10u64.pow(16);
256+
#[cfg(not(target_arch = "aarch64"))]
257+
// decode_int_chunk_fallback - we parse 18 bytes when the number is ongoing
258+
const ONGOING_CHUNK_MULTIPLIER: u64 = 10u64.pow(18);
259+
260+
const POW_10: [u64; 18] = [
261+
10u64.pow(0),
262+
10u64.pow(1),
263+
10u64.pow(2),
264+
10u64.pow(3),
265+
10u64.pow(4),
266+
10u64.pow(5),
267+
10u64.pow(6),
268+
10u64.pow(7),
269+
10u64.pow(8),
270+
10u64.pow(9),
271+
10u64.pow(10),
272+
10u64.pow(11),
273+
10u64.pow(12),
274+
10u64.pow(13),
275+
10u64.pow(14),
276+
10u64.pow(15),
277+
10u64.pow(16),
278+
10u64.pow(17),
279+
];
280+
281+
let mut big_value: BigInt = ongoing.into();
282+
index = new_index;
283+
284+
loop {
285+
let (chunk, new_index) = IntChunk::parse_big(data, index);
286+
if (new_index - start) > 4300 {
287+
return json_err!(NumberOutOfRange, start + 4301);
247288
}
248-
IntChunk::Done(value) => {
249-
big_value *= POW_10[new_index - index];
250-
big_value += value;
251-
if !positive {
252-
big_value = -big_value;
289+
match chunk {
290+
IntChunk::Ongoing(value) => {
291+
big_value *= ONGOING_CHUNK_MULTIPLIER;
292+
big_value += value;
293+
index = new_index;
294+
}
295+
IntChunk::Done(value) => {
296+
big_value *= POW_10[new_index - index];
297+
big_value += value;
298+
if !positive {
299+
big_value = -big_value;
300+
}
301+
return Ok((Self::Int(NumberInt::BigInt(big_value)), new_index));
253302
}
254-
return Ok((Self::Int(NumberInt::BigInt(big_value)), new_index));
303+
IntChunk::Float => return Ok((Self::Float, new_index)),
255304
}
256-
IntChunk::Float => return Ok((Self::Float, new_index)),
257305
}
258306
}
259307
}
260308
}
261309

262-
static POW_10: [u64; 18] = [
263-
10u64.pow(0),
264-
10u64.pow(1),
265-
10u64.pow(2),
266-
10u64.pow(3),
267-
10u64.pow(4),
268-
10u64.pow(5),
269-
10u64.pow(6),
270-
10u64.pow(7),
271-
10u64.pow(8),
272-
10u64.pow(9),
273-
10u64.pow(10),
274-
10u64.pow(11),
275-
10u64.pow(12),
276-
10u64.pow(13),
277-
10u64.pow(14),
278-
10u64.pow(15),
279-
10u64.pow(16),
280-
10u64.pow(17),
281-
];
282-
283-
#[cfg(target_arch = "aarch64")]
284-
// in aarch64 we use a 128 bit registers - 16 bytes
285-
static ONGOING_CHUNK_MULTIPLIER: u64 = 10u64.pow(16);
286-
#[cfg(not(target_arch = "aarch64"))]
287-
// decode_int_chunk_fallback - we parse 18 bytes when the number is ongoing
288-
static ONGOING_CHUNK_MULTIPLIER: u64 = 10u64.pow(18);
289-
290310
pub(crate) enum IntChunk {
291311
Ongoing(u64),
292312
Done(u64),
@@ -362,6 +382,8 @@ pub(crate) static INT_CHAR_MAP: [bool; 256] = {
362382

363383
pub struct NumberRange {
364384
pub range: Range<usize>,
385+
// in some cfg configurations, this field is never read.
386+
#[allow(dead_code)]
365387
pub is_int: bool,
366388
}
367389

crates/jiter/src/value.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use std::borrow::Cow;
22
use std::sync::Arc;
33

4+
#[cfg(feature = "num-bigint")]
45
use num_bigint::BigInt;
56
use smallvec::SmallVec;
67

@@ -16,6 +17,7 @@ pub enum JsonValue<'s> {
1617
Null,
1718
Bool(bool),
1819
Int(i64),
20+
#[cfg(feature = "num-bigint")]
1921
BigInt(BigInt),
2022
Float(f64),
2123
Str(Cow<'s, str>),
@@ -34,6 +36,7 @@ impl pyo3::ToPyObject for JsonValue<'_> {
3436
Self::Null => py.None().to_object(py),
3537
Self::Bool(b) => b.to_object(py),
3638
Self::Int(i) => i.to_object(py),
39+
#[cfg(feature = "num-bigint")]
3740
Self::BigInt(b) => b.to_object(py),
3841
Self::Float(f) => f.to_object(py),
3942
Self::Str(s) => s.to_object(py),
@@ -78,6 +81,7 @@ fn value_static(v: JsonValue<'_>) -> JsonValue<'static> {
7881
JsonValue::Null => JsonValue::Null,
7982
JsonValue::Bool(b) => JsonValue::Bool(b),
8083
JsonValue::Int(i) => JsonValue::Int(i),
84+
#[cfg(feature = "num-bigint")]
8185
JsonValue::BigInt(b) => JsonValue::BigInt(b),
8286
JsonValue::Float(f) => JsonValue::Float(f),
8387
JsonValue::Str(s) => JsonValue::Str(s.into_owned().into()),
@@ -200,6 +204,7 @@ fn take_value<'j, 's>(
200204
let n = parser.consume_number::<NumberAny>(peek.into_inner(), allow_inf_nan);
201205
match n {
202206
Ok(NumberAny::Int(NumberInt::Int(int))) => Ok(JsonValue::Int(int)),
207+
#[cfg(feature = "num-bigint")]
203208
Ok(NumberAny::Int(NumberInt::BigInt(big_int))) => Ok(JsonValue::BigInt(big_int)),
204209
Ok(NumberAny::Float(float)) => Ok(JsonValue::Float(float)),
205210
Err(e) => {

0 commit comments

Comments
 (0)