feat: Make num-bigint optional (#130)

fasterthanlime · web-flow · commit b09b969044c8 · 2024-09-12T11:54:04.000+01:00
diff --git a/crates/jiter-python/Cargo.toml b/crates/jiter-python/Cargo.toml
@@ -11,7 +11,7 @@ repository = {workspace = true}
 
 [dependencies]
 pyo3 = { workspace = true, features = ["num-bigint"] }
-jiter = { path = "../jiter", features = ["python"] }
+jiter = { path = "../jiter", features = ["python", "num-bigint"] }
 
 [features]
 # must be enabled when building with `cargo build`, maturin enables this automatically
diff --git a/crates/jiter/Cargo.toml b/crates/jiter/Cargo.toml
@@ -2,31 +2,37 @@
 name = "jiter"
 description = "Fast Iterable JSON parser"
 readme = "../../README.md"
-version = {workspace = true}
-edition = {workspace = true}
-authors = {workspace = true}
-license = {workspace = true}
-keywords = {workspace = true}
-categories = {workspace = true}
-homepage = {workspace = true}
-repository = {workspace = true}
+version = { workspace = true }
+edition = { workspace = true }
+authors = { workspace = true }
+license = { workspace = true }
+keywords = { workspace = true }
+categories = { workspace = true }
+homepage = { workspace = true }
+repository = { workspace = true }
 
 [dependencies]
-num-bigint = "0.4.4"
+num-bigint = { version = "0.4.4", optional = true }
 num-traits = "0.2.16"
 ahash = "0.8.0"
 smallvec = "1.11.0"
-pyo3 = { workspace = true, optional = true, features = ["num-bigint"] }
-lexical-parse-float = { version = "0.8.5", features =  ["format"] }
+pyo3 = { workspace = true, optional = true }
+lexical-parse-float = { version = "0.8.5", features = ["format"] }
 bitvec = "1.0.1"
 
 [features]
+default = ["num-bigint"]
 python = ["dep:pyo3", "dep:pyo3-build-config"]
+num-bigint = ["dep:num-bigint", "pyo3/num-bigint"]
 
 [dev-dependencies]
 bencher = "0.1.5"
 paste = "1.0.7"
-serde_json = {version = "1.0.87", features = ["preserve_order", "arbitrary_precision", "float_roundtrip"]}
+serde_json = { version = "1.0.87", features = [
+    "preserve_order",
+    "arbitrary_precision",
+    "float_roundtrip",
+] }
 serde = "1.0.147"
 pyo3 = { workspace = true, features = ["auto-initialize"] }
 codspeed-bencher-compat = "2.7.1"
@@ -71,5 +77,5 @@ doc_markdown = "allow"
 implicit_clone = "allow"
 iter_without_into_iter = "allow"
 return_self_not_must_use = "allow"
-inline_always = "allow"  # TODO remove?
-match_same_arms = "allow"  # TODO remove?
+inline_always = "allow"                      # TODO remove?
+match_same_arms = "allow"                    # TODO remove?
diff --git a/crates/jiter/src/number_decoder.rs b/crates/jiter/src/number_decoder.rs
@@ -1,5 +1,8 @@
+#[cfg(feature = "num-bigint")]
 use num_bigint::BigInt;
+#[cfg(feature = "num-bigint")]
 use num_traits::cast::ToPrimitive;
+
 use std::ops::Range;
 
 use lexical_parse_float::{format as lexical_format, FromLexicalWithOptions, Options as ParseFloatOptions};
@@ -16,13 +19,15 @@ pub trait AbstractNumberDecoder {
 #[derive(Debug, Clone, PartialEq)]
 pub enum NumberInt {
     Int(i64),
+    #[cfg(feature = "num-bigint")]
     BigInt(BigInt),
 }
 
 impl From<NumberInt> for f64 {
     fn from(num: NumberInt) -> Self {
         match num {
             NumberInt::Int(int) => int as f64,
+            #[cfg(feature = "num-bigint")]
             NumberInt::BigInt(big_int) => big_int.to_f64().unwrap_or(f64::NAN),
         }
     }
@@ -118,6 +123,7 @@ impl pyo3::ToPyObject for NumberAny {
     fn to_object(&self, py: pyo3::Python<'_>) -> pyo3::PyObject {
         match self {
             Self::Int(NumberInt::Int(int)) => int.to_object(py),
+            #[cfg(feature = "num-bigint")]
             Self::Int(NumberInt::BigInt(big_int)) => big_int.to_object(py),
             Self::Float(float) => float.to_object(py),
         }
@@ -220,8 +226,8 @@ impl IntParse {
         index += 1;
         let (chunk, new_index) = IntChunk::parse_small(data, index, first_value);
 
-        let mut big_value: BigInt = match chunk {
-            IntChunk::Ongoing(value) => value.into(),
+        let ongoing: u64 = match chunk {
+            IntChunk::Ongoing(value) => value,
             IntChunk::Done(value) => {
                 let mut value_i64 = value as i64;
                 if !positive {
@@ -231,62 +237,76 @@ impl IntParse {
             }
             IntChunk::Float => return Ok((Self::Float, new_index)),
         };
-        index = new_index;
 
-        // number is too big for i64, we need ot use a big int
-        loop {
-            let (chunk, new_index) = IntChunk::parse_big(data, index);
-            if (new_index - start) > 4300 {
-                return json_err!(NumberOutOfRange, start + 4301);
-            }
-            match chunk {
-                IntChunk::Ongoing(value) => {
-                    big_value *= ONGOING_CHUNK_MULTIPLIER;
-                    big_value += value;
-                    index = new_index;
+        // number is too big for i64, we need to use a BigInt,
+        // or error out if num-bigint is not enabled
+
+        #[cfg(not(feature = "num-bigint"))]
+        {
+            // silence unused variable warning
+            let _ = (ongoing, start);
+            return json_err!(NumberOutOfRange, index);
+        }
+
+        #[cfg(feature = "num-bigint")]
+        {
+            #[cfg(target_arch = "aarch64")]
+            // in aarch64 we use a 128 bit registers - 16 bytes
+            const ONGOING_CHUNK_MULTIPLIER: u64 = 10u64.pow(16);
+            #[cfg(not(target_arch = "aarch64"))]
+            // decode_int_chunk_fallback - we parse 18 bytes when the number is ongoing
+            const ONGOING_CHUNK_MULTIPLIER: u64 = 10u64.pow(18);
+
+            const POW_10: [u64; 18] = [
+                10u64.pow(0),
+                10u64.pow(1),
+                10u64.pow(2),
+                10u64.pow(3),
+                10u64.pow(4),
+                10u64.pow(5),
+                10u64.pow(6),
+                10u64.pow(7),
+                10u64.pow(8),
+                10u64.pow(9),
+                10u64.pow(10),
+                10u64.pow(11),
+                10u64.pow(12),
+                10u64.pow(13),
+                10u64.pow(14),
+                10u64.pow(15),
+                10u64.pow(16),
+                10u64.pow(17),
+            ];
+
+            let mut big_value: BigInt = ongoing.into();
+            index = new_index;
+
+            loop {
+                let (chunk, new_index) = IntChunk::parse_big(data, index);
+                if (new_index - start) > 4300 {
+                    return json_err!(NumberOutOfRange, start + 4301);
                 }
-                IntChunk::Done(value) => {
-                    big_value *= POW_10[new_index - index];
-                    big_value += value;
-                    if !positive {
-                        big_value = -big_value;
+                match chunk {
+                    IntChunk::Ongoing(value) => {
+                        big_value *= ONGOING_CHUNK_MULTIPLIER;
+                        big_value += value;
+                        index = new_index;
+                    }
+                    IntChunk::Done(value) => {
+                        big_value *= POW_10[new_index - index];
+                        big_value += value;
+                        if !positive {
+                            big_value = -big_value;
+                        }
+                        return Ok((Self::Int(NumberInt::BigInt(big_value)), new_index));
                     }
-                    return Ok((Self::Int(NumberInt::BigInt(big_value)), new_index));
+                    IntChunk::Float => return Ok((Self::Float, new_index)),
                 }
-                IntChunk::Float => return Ok((Self::Float, new_index)),
             }
         }
     }
 }
 
-static POW_10: [u64; 18] = [
-    10u64.pow(0),
-    10u64.pow(1),
-    10u64.pow(2),
-    10u64.pow(3),
-    10u64.pow(4),
-    10u64.pow(5),
-    10u64.pow(6),
-    10u64.pow(7),
-    10u64.pow(8),
-    10u64.pow(9),
-    10u64.pow(10),
-    10u64.pow(11),
-    10u64.pow(12),
-    10u64.pow(13),
-    10u64.pow(14),
-    10u64.pow(15),
-    10u64.pow(16),
-    10u64.pow(17),
-];
-
-#[cfg(target_arch = "aarch64")]
-// in aarch64 we use a 128 bit registers - 16 bytes
-static ONGOING_CHUNK_MULTIPLIER: u64 = 10u64.pow(16);
-#[cfg(not(target_arch = "aarch64"))]
-// decode_int_chunk_fallback - we parse 18 bytes when the number is ongoing
-static ONGOING_CHUNK_MULTIPLIER: u64 = 10u64.pow(18);
-
 pub(crate) enum IntChunk {
     Ongoing(u64),
     Done(u64),
@@ -362,6 +382,8 @@ pub(crate) static INT_CHAR_MAP: [bool; 256] = {
 
 pub struct NumberRange {
     pub range: Range<usize>,
+    // in some cfg configurations, this field is never read.
+    #[allow(dead_code)]
     pub is_int: bool,
 }
 
diff --git a/crates/jiter/src/value.rs b/crates/jiter/src/value.rs
@@ -1,6 +1,7 @@
 use std::borrow::Cow;
 use std::sync::Arc;
 
+#[cfg(feature = "num-bigint")]
 use num_bigint::BigInt;
 use smallvec::SmallVec;
 
@@ -16,6 +17,7 @@ pub enum JsonValue<'s> {
     Null,
     Bool(bool),
     Int(i64),
+    #[cfg(feature = "num-bigint")]
     BigInt(BigInt),
     Float(f64),
     Str(Cow<'s, str>),
@@ -34,6 +36,7 @@ impl pyo3::ToPyObject for JsonValue<'_> {
             Self::Null => py.None().to_object(py),
             Self::Bool(b) => b.to_object(py),
             Self::Int(i) => i.to_object(py),
+            #[cfg(feature = "num-bigint")]
             Self::BigInt(b) => b.to_object(py),
             Self::Float(f) => f.to_object(py),
             Self::Str(s) => s.to_object(py),
@@ -78,6 +81,7 @@ fn value_static(v: JsonValue<'_>) -> JsonValue<'static> {
         JsonValue::Null => JsonValue::Null,
         JsonValue::Bool(b) => JsonValue::Bool(b),
         JsonValue::Int(i) => JsonValue::Int(i),
+        #[cfg(feature = "num-bigint")]
         JsonValue::BigInt(b) => JsonValue::BigInt(b),
         JsonValue::Float(f) => JsonValue::Float(f),
         JsonValue::Str(s) => JsonValue::Str(s.into_owned().into()),
@@ -200,6 +204,7 @@ fn take_value<'j, 's>(
             let n = parser.consume_number::<NumberAny>(peek.into_inner(), allow_inf_nan);
             match n {
                 Ok(NumberAny::Int(NumberInt::Int(int))) => Ok(JsonValue::Int(int)),
+                #[cfg(feature = "num-bigint")]
                 Ok(NumberAny::Int(NumberInt::BigInt(big_int))) => Ok(JsonValue::BigInt(big_int)),
                 Ok(NumberAny::Float(float)) => Ok(JsonValue::Float(float)),
                 Err(e) => {