Skip to content

Commit e3a95a8

Browse files
committed
support bigint
1 parent a297310 commit e3a95a8

File tree

12 files changed

+107
-88
lines changed

12 files changed

+107
-88
lines changed

Cargo.toml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,13 @@ debug = true
3232
[workspace.dependencies]
3333
jiter = { path = "crates/jiter", version = "0.5.0" }
3434
batson = { path = "crates/batson", version = "0.5.0" }
35-
pyo3 = { version = "0.22.0" }
36-
pyo3-build-config = { version = "0.22.0" }
3735
bencher = "0.1.5"
38-
paste = "1.0.7"
3936
codspeed-bencher-compat = "2.7.1"
37+
num-bigint = "0.4.4"
38+
num-traits = "0.2.16"
39+
paste = "1.0.7"
40+
pyo3 = { version = "0.22.0" }
41+
pyo3-build-config = { version = "0.22.0" }
4042
smallvec = "2.0.0-alpha.7"
4143
serde = "1.0.210"
4244
serde_json = "1.0.128"

crates/batson/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ repository = {workspace = true}
1414
[dependencies]
1515
bytemuck = { version = "1.17.1", features = ["aarch64_simd", "derive", "align_offset"] }
1616
jiter = { workspace = true }
17-
num = "0.4.3"
17+
num-bigint = { workspace = true }
1818
serde = { workspace = true }
1919
serde_json = { workspace = true }
2020
simdutf8 = { version = "0.1.4", features = ["aarch64_neon"] }

crates/batson/src/array.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ impl<'b> HetArray<'b> {
5454
}
5555
}
5656

57-
pub fn to_json(&self, d: &mut Decoder<'b>) -> DecodeResult<JsonArray<'b>> {
57+
pub fn to_value(&self, d: &mut Decoder<'b>) -> DecodeResult<JsonArray<'b>> {
5858
(0..self.len())
5959
.map(|_| d.take_value())
6060
.collect::<DecodeResult<SmallVec<_, 8>>>()
@@ -98,7 +98,7 @@ pub(crate) fn header_array_to_json<'b>(d: &mut Decoder<'b>, length: Length) -> D
9898
let length = length.decode(d)?;
9999
d.take_slice(length)?
100100
.iter()
101-
.map(|b| Header::decode(*b, d).map(|h| h.as_value(d)))
101+
.map(|b| Header::decode(*b, d).map(|h| h.header_as_value(d)))
102102
.collect::<DecodeResult<_>>()
103103
.map(Arc::new)
104104
}
@@ -296,7 +296,7 @@ impl PackedArray {
296296
}
297297
}
298298
}
299-
JsonValue::BigInt(b) => todo!("BigInt {b:?}"),
299+
JsonValue::BigInt(_) => return None,
300300
JsonValue::Float(f) => {
301301
u8_only = None;
302302
i64_only = None;
@@ -373,7 +373,7 @@ mod test {
373373
};
374374

375375
assert_eq!(offsets, &[0, 1, 3]);
376-
let decode_array = het_array.to_json(&mut decoder).unwrap();
376+
let decode_array = het_array.to_value(&mut decoder).unwrap();
377377
assert_arrays_eq!(decode_array, array);
378378
}
379379

@@ -391,7 +391,7 @@ mod test {
391391

392392
let het_array = HetArray::decode_header(&mut decoder, 0.into()).unwrap();
393393
assert_eq!(het_array.len(), 0);
394-
let decode_array = het_array.to_json(&mut decoder).unwrap();
394+
let decode_array = het_array.to_value(&mut decoder).unwrap();
395395
assert_arrays_eq!(decode_array, array);
396396
}
397397

@@ -500,7 +500,7 @@ mod test {
500500
let mut d = decoder.clone();
501501
assert!(!het_array.get(&mut d, 200));
502502

503-
let decode_array = het_array.to_json(&mut decoder).unwrap();
503+
let decode_array = het_array.to_value(&mut decoder).unwrap();
504504
assert_arrays_eq!(decode_array, array);
505505
}
506506

crates/batson/src/decoder.rs

Lines changed: 25 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1+
use jiter::JsonValue;
2+
use num_bigint::{BigInt, Sign};
13
use std::fmt;
24
use std::mem::{align_of, size_of};
35

4-
use jiter::JsonValue;
5-
66
use crate::array::{
77
header_array_to_json, header_array_write_to_json, i64_array_slice, i64_array_to_json, u8_array_slice,
88
u8_array_to_json, HetArray,
@@ -56,16 +56,16 @@ impl<'b> Decoder<'b> {
5656
Header::Null => Ok(JsonValue::Null),
5757
Header::Bool(b) => Ok(JsonValue::Bool(b)),
5858
Header::Int(n) => n.decode_i64(self).map(JsonValue::Int),
59-
Header::IntBig(i) => todo!("decoding for bigint {i:?}"),
59+
Header::IntBig(s, l) => self.take_big_int(s, l).map(JsonValue::BigInt),
6060
Header::Float(n) => n.decode_f64(self).map(JsonValue::Float),
61-
Header::Str(l) => self.decode_str(l).map(|s| JsonValue::Str(s.into())),
61+
Header::Str(l) => self.take_str_len(l).map(|s| JsonValue::Str(s.into())),
6262
Header::Object(length) => {
6363
let obj = Object::decode_header(self, length)?;
64-
obj.to_json(self).map(JsonValue::Object)
64+
obj.to_value(self).map(JsonValue::Object)
6565
}
6666
Header::HetArray(length) => {
6767
let het = HetArray::decode_header(self, length)?;
68-
het.to_json(self).map(JsonValue::Array)
68+
het.to_value(self).map(JsonValue::Array)
6969
}
7070
Header::U8Array(length) => u8_array_to_json(self, length).map(JsonValue::Array),
7171
Header::HeaderArray(length) => header_array_to_json(self, length).map(JsonValue::Array),
@@ -81,13 +81,16 @@ impl<'b> Decoder<'b> {
8181
let i = n.decode_i64(self)?;
8282
writer.write_value(i)?;
8383
}
84-
Header::IntBig(i) => todo!("decoding for bigint {i:?}"),
84+
Header::IntBig(s, l) => {
85+
let int = self.take_big_int(s, l)?;
86+
writer.write_value(int)?;
87+
}
8588
Header::Float(n) => {
8689
let f = n.decode_f64(self)?;
8790
writer.write_value(f)?;
8891
}
8992
Header::Str(l) => {
90-
let s = self.decode_str(l)?;
93+
let s = self.take_str_len(l)?;
9194
writer.write_value(s)?;
9295
}
9396
Header::Object(length) => {
@@ -130,32 +133,23 @@ impl<'b> Decoder<'b> {
130133
Ok(t)
131134
}
132135

133-
pub fn decode_str(&mut self, length: Length) -> DecodeResult<&'b str> {
136+
fn take_str_len(&mut self, length: Length) -> DecodeResult<&'b str> {
134137
let len = length.decode(self)?;
135-
if len == 0 {
136-
Ok("")
137-
} else {
138-
self.take_str(len)
139-
}
138+
self.take_str(len)
140139
}
141140

142-
pub fn decode_bytes(&mut self, length: Length) -> DecodeResult<&'b [u8]> {
143-
let len = length.decode(self)?;
144-
if len == 0 {
145-
Ok(b"")
141+
pub fn take_str(&mut self, length: usize) -> DecodeResult<&'b str> {
142+
if length == 0 {
143+
Ok("")
146144
} else {
147-
self.take_slice(len)
145+
let end = self.index + length;
146+
let slice = self.bytes.get(self.index..end).ok_or_else(|| self.eof())?;
147+
let s = simdutf8::basic::from_utf8(slice).map_err(|e| DecodeError::from_utf8_error(self.index, e))?;
148+
self.index = end;
149+
Ok(s)
148150
}
149151
}
150152

151-
pub fn take_str(&mut self, length: usize) -> DecodeResult<&'b str> {
152-
let end = self.index + length;
153-
let slice = self.bytes.get(self.index..end).ok_or_else(|| self.eof())?;
154-
let s = simdutf8::basic::from_utf8(slice).map_err(|e| DecodeError::from_utf8_error(self.index, e))?;
155-
self.index = end;
156-
Ok(s)
157-
}
158-
159153
pub fn take_u8(&mut self) -> DecodeResult<u8> {
160154
self.next().ok_or_else(|| self.eof())
161155
}
@@ -187,9 +181,10 @@ impl<'b> Decoder<'b> {
187181
Ok(i64::from_le_bytes(slice.try_into().unwrap()))
188182
}
189183

190-
pub fn take_f32(&mut self) -> DecodeResult<f32> {
191-
let slice = self.take_slice(4)?;
192-
Ok(f32::from_le_bytes(slice.try_into().unwrap()))
184+
pub fn take_big_int(&mut self, sign: Sign, length: Length) -> DecodeResult<BigInt> {
185+
let size = length.decode(self)?;
186+
let slice = self.take_slice(size)?;
187+
Ok(BigInt::from_bytes_le(sign, slice))
193188
}
194189

195190
pub fn take_f64(&mut self) -> DecodeResult<f64> {

crates/batson/src/encoder.rs

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
use std::mem::align_of;
2-
31
use jiter::{JsonArray, JsonObject, JsonValue};
2+
use num_bigint::{BigInt, Sign};
3+
use std::mem::align_of;
44

55
use crate::array::encode_array;
66
use crate::errors::{EncodeError, EncodeResult};
@@ -35,7 +35,7 @@ impl Encoder {
3535
JsonValue::Null => self.encode_null(),
3636
JsonValue::Bool(b) => self.encode_bool(*b),
3737
JsonValue::Int(int) => self.encode_i64(*int),
38-
JsonValue::BigInt(_) => todo!("encoding BigInt"),
38+
JsonValue::BigInt(big_int) => self.encode_big_int(big_int)?,
3939
JsonValue::Float(f) => self.encode_f64(*f),
4040
JsonValue::Str(s) => self.encode_str(s.as_ref())?,
4141
JsonValue::Array(array) => self.encode_array(array)?,
@@ -99,15 +99,19 @@ impl Encoder {
9999
}
100100
}
101101

102-
pub fn encode_str(&mut self, s: &str) -> EncodeResult<()> {
103-
self.encode_length(Category::Str, s.len())?;
104-
self.extend(s.as_bytes());
102+
pub fn encode_big_int(&mut self, int: &BigInt) -> EncodeResult<()> {
103+
let (sign, bytes) = int.to_bytes_le();
104+
match sign {
105+
Sign::Minus => self.encode_length(Category::BigIntNeg, bytes.len())?,
106+
_ => self.encode_length(Category::BigIntPos, bytes.len())?,
107+
}
108+
self.extend(&bytes);
105109
Ok(())
106110
}
107111

108-
pub fn encode_bytes(&mut self, b: &[u8]) -> EncodeResult<()> {
109-
self.encode_length(Category::U8Array, b.len())?;
110-
self.extend(b);
112+
pub fn encode_str(&mut self, s: &str) -> EncodeResult<()> {
113+
self.encode_length(Category::Str, s.len())?;
114+
self.extend(s.as_bytes());
111115
Ok(())
112116
}
113117

crates/batson/src/header.rs

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,20 @@
1+
use std::sync::Arc;
2+
3+
use jiter::{JsonValue, LazyIndexMap};
4+
use num_bigint::Sign;
5+
use smallvec::smallvec;
6+
17
use crate::decoder::Decoder;
28
use crate::errors::{DecodeErrorType, DecodeResult};
39
use crate::json_writer::JsonWriter;
410
use crate::ToJsonResult;
5-
use jiter::{JsonValue, LazyIndexMap};
6-
use smallvec::smallvec;
7-
use std::sync::Arc;
811

912
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
1013
pub(crate) enum Header {
1114
Null,
1215
Bool(bool),
1316
Int(NumberHint),
14-
IntBig(Length),
17+
IntBig(Sign, Length),
1518
Float(NumberHint),
1619
Str(Length),
1720
Object(Length),
@@ -31,7 +34,8 @@ impl Header {
3134
match cat {
3235
Category::Primitive => Primitive::from_u8(right, d).map(Primitive::header_value),
3336
Category::Int => NumberHint::from_u8(right, d).map(Self::Int),
34-
Category::BigInt => Length::from_u8(right, d).map(Self::IntBig),
37+
Category::BigIntPos => Length::from_u8(right, d).map(|l| Self::IntBig(Sign::Plus, l)),
38+
Category::BigIntNeg => Length::from_u8(right, d).map(|l| Self::IntBig(Sign::Minus, l)),
3539
Category::Float => NumberHint::from_u8(right, d).map(Self::Float),
3640
Category::Str => Length::from_u8(right, d).map(Self::Str),
3741
Category::Object => Length::from_u8(right, d).map(Self::Object),
@@ -43,12 +47,12 @@ impl Header {
4347
}
4448

4549
/// TODO `'static` should be okay as return lifetime, I don't know why it's not
46-
pub fn as_value<'b>(self, _: &Decoder<'b>) -> JsonValue<'b> {
50+
pub fn header_as_value<'b>(self, _: &Decoder<'b>) -> JsonValue<'b> {
4751
match self {
4852
Header::Null => JsonValue::Null,
4953
Header::Bool(b) => JsonValue::Bool(b),
5054
Header::Int(n) => JsonValue::Int(n.decode_i64_header()),
51-
Header::IntBig(_) => todo!(),
55+
Header::IntBig(..) => unreachable!("Big ints are not supported as header only values"),
5256
Header::Float(n) => JsonValue::Float(n.decode_f64_header()),
5357
Header::Str(_) => JsonValue::Str("".into()),
5458
Header::Object(_) => JsonValue::Object(Arc::new(LazyIndexMap::default())),
@@ -61,7 +65,7 @@ impl Header {
6165
Header::Null => writer.write_null(),
6266
Header::Bool(b) => writer.write_value(b)?,
6367
Header::Int(n) => writer.write_value(n.decode_i64_header())?,
64-
Header::IntBig(_) => todo!(),
68+
Header::IntBig(..) => return Err("Big ints are not supported as header only values".into()),
6569
Header::Float(n) => writer.write_value(n.decode_f64_header())?,
6670
// TODO check the
6771
Header::Str(len) => {
@@ -111,16 +115,17 @@ macro_rules! impl_from_u8 {
111115
pub(crate) enum Category {
112116
Primitive = 0,
113117
Int = 1,
114-
BigInt = 2,
115-
Float = 3,
116-
Str = 4,
117-
Object = 5,
118-
HeaderArray = 6,
119-
U8Array = 7,
120-
I64Array = 8,
121-
HetArray = 9,
118+
BigIntPos = 2,
119+
BigIntNeg = 3,
120+
Float = 4,
121+
Str = 5,
122+
Object = 6,
123+
HeaderArray = 7,
124+
U8Array = 8,
125+
I64Array = 9,
126+
HetArray = 10,
122127
}
123-
impl_from_u8!(Category, 9);
128+
impl_from_u8!(Category, 10);
124129

125130
impl Category {
126131
pub fn encode_with(self, right: u8) -> u8 {
@@ -208,12 +213,11 @@ impl NumberHint {
208213

209214
pub fn decode_f64(self, d: &mut Decoder) -> DecodeResult<f64> {
210215
match self {
211-
// f8 doesn't exist
212-
NumberHint::Size8 => Err(d.error(DecodeErrorType::HeaderInvalid {
216+
// f8 doesn't exist, and currently we don't use f32 anywhere
217+
NumberHint::Size8 | NumberHint::Size32 => Err(d.error(DecodeErrorType::HeaderInvalid {
213218
value: self as u8,
214219
ty: "f64",
215220
})),
216-
NumberHint::Size32 => d.take_f32().map(f64::from),
217221
NumberHint::Size64 => d.take_f64(),
218222
// TODO check this has same performance as inline match
219223
_ => Ok(self.decode_f64_header()),

crates/batson/src/json_writer.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use num_bigint::BigInt;
12
use serde::ser::Serializer as _;
23
use serde_json::ser::Serializer;
34

@@ -116,3 +117,10 @@ impl WriteJson for f64 {
116117
ser.serialize_f64(*self).map_err(Into::into)
117118
}
118119
}
120+
121+
impl WriteJson for BigInt {
122+
fn write_json(&self, writer: &mut JsonWriter) -> ToJsonResult<()> {
123+
writer.vec.extend_from_slice(self.to_str_radix(10).as_bytes());
124+
Ok(())
125+
}
126+
}

crates/batson/src/lib.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
#![allow(dead_code)]
21
mod array;
32
mod decoder;
43
mod encoder;

0 commit comments

Comments
 (0)