Skip to content

Commit bada50d

Browse files
committed
add get_batson
1 parent 9da84a2 commit bada50d

File tree

7 files changed

+175
-1
lines changed

7 files changed

+175
-1
lines changed

crates/batson/src/array.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,16 @@ impl<'b> HetArray<'b> {
7474
writer.end_array();
7575
Ok(())
7676
}
77+
78+
pub fn move_to_end(&self, d: &mut Decoder<'b>) -> DecodeResult<()> {
79+
d.index += match &self.offsets {
80+
HetArrayOffsets::U8(v) => v.last().copied().unwrap() as usize,
81+
HetArrayOffsets::U16(v) => v.last().copied().unwrap() as usize,
82+
HetArrayOffsets::U32(v) => v.last().copied().unwrap() as usize,
83+
};
84+
let header = d.take_header()?;
85+
d.move_to_end(header)
86+
}
7787
}
7888

7989
fn take_slice_as<'b, T: bytemuck::Pod>(d: &mut Decoder<'b>, length: Length) -> DecodeResult<&'b [T]> {

crates/batson/src/decoder.rs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,38 @@ impl<'b> Decoder<'b> {
3939
Self { bytes, index: 0 }
4040
}
4141

42+
pub fn get_range(&self, start: usize, end: usize) -> DecodeResult<&'b [u8]> {
43+
self.bytes
44+
.get(start..end)
45+
.ok_or_else(|| self.error(DecodeErrorType::EOF))
46+
}
47+
48+
/// Get the length of the data that follows a header
49+
pub fn move_to_end(&mut self, header: Header) -> DecodeResult<()> {
50+
match header {
51+
Header::Null | Header::Bool(_) => (),
52+
Header::Int(n) | Header::Float(n) => {
53+
self.index += n.data_length();
54+
}
55+
Header::Object(l) => {
56+
let obj = Object::decode_header(self, l)?;
57+
obj.move_to_end(self)?;
58+
}
59+
Header::I64Array(l) => {
60+
let length = l.decode(self)?;
61+
self.index += length * size_of::<i64>();
62+
}
63+
Header::HetArray(l) => {
64+
let het = HetArray::decode_header(self, l)?;
65+
het.move_to_end(self)?;
66+
}
67+
Header::IntBig(_, l) | Header::Str(l) | Header::HeaderArray(l) | Header::U8Array(l) => {
68+
self.index += l.decode(self)?;
69+
}
70+
};
71+
Ok(())
72+
}
73+
4274
pub fn take_header(&mut self) -> DecodeResult<Header> {
4375
let byte = self.next().ok_or_else(|| self.eof())?;
4476
Header::decode(byte, self)

crates/batson/src/encoder.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@ impl Encoder {
1717
Self { data: Vec::new() }
1818
}
1919

20+
pub fn with_capacity(capacity: usize) -> Self {
21+
Self {
22+
data: Vec::with_capacity(capacity),
23+
}
24+
}
25+
2026
pub fn align<T>(&mut self) {
2127
let align = align_of::<T>();
2228
// same calculation as in `Decoder::align`

crates/batson/src/get.rs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
#![allow(clippy::module_name_repetitions)]
2+
23
use crate::array::{header_array_get, i64_array_get, u8_array_get, HetArray};
34
use crate::decoder::Decoder;
5+
use crate::encoder::Encoder;
46
use crate::errors::{DecodeError, DecodeResult};
57
use crate::header::Header;
68
use crate::object::Object;
9+
use std::borrow::Cow;
710

811
#[derive(Debug)]
912
pub enum BatsonPath<'s> {
@@ -35,6 +38,14 @@ pub fn get_int(bytes: &[u8], path: &[BatsonPath]) -> DecodeResult<Option<i64>> {
3538
get_try_into(bytes, path)
3639
}
3740

41+
pub fn get_batson<'b>(bytes: &'b [u8], path: &[BatsonPath]) -> DecodeResult<Option<Cow<'b, [u8]>>> {
42+
if let Some(v) = GetValue::get(bytes, path)? {
43+
v.into_batson().map(Some)
44+
} else {
45+
Ok(None)
46+
}
47+
}
48+
3849
pub fn contains(bytes: &[u8], path: &[BatsonPath]) -> DecodeResult<bool> {
3950
GetValue::get(bytes, path).map(|v| v.is_some())
4051
}
@@ -137,6 +148,27 @@ impl<'b> GetValue<'b> {
137148
_ => Ok(None),
138149
}
139150
}
151+
152+
fn into_batson(self) -> DecodeResult<Cow<'b, [u8]>> {
153+
match self {
154+
Self::Header(mut decoder, header) => {
155+
let start = decoder.index - 1;
156+
decoder.move_to_end(header)?;
157+
let end = decoder.index;
158+
decoder.get_range(start, end).map(Cow::Borrowed)
159+
}
160+
Self::U8(int) => {
161+
let mut encoder = Encoder::with_capacity(2);
162+
encoder.encode_i64(int.into());
163+
Ok(Cow::Owned(encoder.into()))
164+
}
165+
Self::I64(int) => {
166+
let mut encoder = Encoder::with_capacity(9);
167+
encoder.encode_i64(int);
168+
Ok(Cow::Owned(encoder.into()))
169+
}
170+
}
171+
}
140172
}
141173

142174
impl From<GetValue<'_>> for Option<bool> {

crates/batson/src/header.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,16 @@ impl NumberHint {
275275
_ => None,
276276
}
277277
}
278+
279+
/// Get the length of the data that follows the header
280+
pub fn data_length(self) -> usize {
281+
match self {
282+
Self::Size8 => 1,
283+
Self::Size32 => 4,
284+
Self::Size64 => 8,
285+
_ => 0,
286+
}
287+
}
278288
}
279289

280290
/// String, object, and array lengths

crates/batson/src/object.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,15 @@ impl<'b> Object<'b> {
5050
ObjectChoice::U32(o) => o.write_json(d, writer),
5151
}
5252
}
53+
54+
/// Get the length of the data that follows the header
55+
pub fn move_to_end(self, d: &mut Decoder<'b>) -> DecodeResult<()> {
56+
match self.0 {
57+
ObjectChoice::U8(o) => o.move_to_end(d),
58+
ObjectChoice::U16(o) => o.move_to_end(d),
59+
ObjectChoice::U32(o) => o.move_to_end(d),
60+
}
61+
}
5362
}
5463

5564
#[derive(Debug)]
@@ -136,6 +145,14 @@ impl<'b, S: SuperHeaderItem> ObjectSized<'b, S> {
136145
None => Err(d.error(DecodeErrorType::ObjectBodyIndexInvalid)),
137146
}
138147
}
148+
149+
/// the offset of the end of the last value
150+
pub fn move_to_end(self, d: &mut Decoder<'b>) -> DecodeResult<()> {
151+
let h = self.super_header.last().unwrap();
152+
d.index += h.offset() + h.key_length();
153+
let header = d.take_header()?;
154+
d.move_to_end(header)
155+
}
139156
}
140157

141158
trait SuperHeaderItem: fmt::Debug + Copy + Clone + Pod + Zeroable + Eq + PartialEq {

crates/batson/tests/main.rs

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use std::sync::Arc;
66
use jiter::{JsonValue, LazyIndexMap};
77
use smallvec::smallvec;
88

9-
use batson::get::{contains, get_bool, get_int, get_length, get_str};
9+
use batson::get::{contains, get_batson, get_bool, get_int, get_length, get_str};
1010
use batson::{batson_to_json_string, compare_json_values, decode_to_json_value, encode_from_json};
1111

1212
#[test]
@@ -187,6 +187,73 @@ fn test_get_length() {
187187
assert_eq!(get_length(&bytes, &["foo".into(), 1.into()]).unwrap().unwrap(), 2);
188188
}
189189

190+
#[test]
191+
fn test_get_batson() {
192+
let bytes = json_to_batson(br#"{"foo": [null, {"a": 1, "b": 22}, 4294967299]}"#);
193+
194+
assert_eq!(get_batson(&bytes, &[]).unwrap().unwrap(), bytes);
195+
196+
let null_bytes = get_batson(&bytes, &["foo".into(), 0.into()]).unwrap().unwrap();
197+
assert_eq!(null_bytes, [0u8].as_ref());
198+
assert_eq!(batson_to_json_string(&null_bytes).unwrap(), "null");
199+
200+
let foo_bytes = get_batson(&bytes, &["foo".into()]).unwrap().unwrap();
201+
assert_eq!(
202+
batson_to_json_string(&foo_bytes).unwrap(),
203+
r#"[null,{"a":1,"b":22},4294967299]"#
204+
);
205+
206+
let missing = get_batson(&bytes, &["bar".into()]).unwrap();
207+
assert!(missing.is_none());
208+
209+
let missing = get_batson(&bytes, &["foo".into(), "bar".into()]).unwrap();
210+
assert!(missing.is_none());
211+
212+
let obj_bytes = get_batson(&bytes, &["foo".into(), 1.into()]).unwrap().unwrap();
213+
assert_eq!(batson_to_json_string(&obj_bytes).unwrap(), r#"{"a":1,"b":22}"#);
214+
215+
let a_bytes = get_batson(&bytes, &["foo".into(), 1.into(), "a".into()])
216+
.unwrap()
217+
.unwrap();
218+
assert_eq!(batson_to_json_string(&a_bytes).unwrap(), "1");
219+
220+
let b_bytes = get_batson(&bytes, &["foo".into(), 1.into(), "b".into()])
221+
.unwrap()
222+
.unwrap();
223+
assert_eq!(batson_to_json_string(&b_bytes).unwrap(), "22");
224+
225+
let int_bytes = get_batson(&bytes, &["foo".into(), 2.into()]).unwrap().unwrap();
226+
assert_eq!(batson_to_json_string(&int_bytes).unwrap(), "4294967299");
227+
}
228+
229+
#[test]
230+
fn test_get_batson_u8array() {
231+
let bytes = json_to_batson(br#"[1, 2, 0, 255, 128]"#);
232+
233+
assert_eq!(get_batson(&bytes, &[]).unwrap().unwrap(), bytes);
234+
235+
let zeroth_bytes = get_batson(&bytes, &[0.into()]).unwrap().unwrap();
236+
assert_eq!(batson_to_json_string(&zeroth_bytes).unwrap(), "1");
237+
238+
let first_bytes = get_batson(&bytes, &[1.into()]).unwrap().unwrap();
239+
assert_eq!(batson_to_json_string(&first_bytes).unwrap(), "2");
240+
241+
let second_bytes = get_batson(&bytes, &[2.into()]).unwrap().unwrap();
242+
assert_eq!(batson_to_json_string(&second_bytes).unwrap(), "0");
243+
244+
let third_bytes = get_batson(&bytes, &[3.into()]).unwrap().unwrap();
245+
assert_eq!(batson_to_json_string(&third_bytes).unwrap(), "255");
246+
247+
let fourth_bytes = get_batson(&bytes, &[4.into()]).unwrap().unwrap();
248+
assert_eq!(batson_to_json_string(&fourth_bytes).unwrap(), "128");
249+
250+
let missing = get_batson(&bytes, &[5.into()]).unwrap();
251+
assert!(missing.is_none());
252+
253+
let missing = get_batson(&bytes, &[4.into(), 0.into()]).unwrap();
254+
assert!(missing.is_none());
255+
}
256+
190257
#[test]
191258
fn test_to_json() {
192259
let bytes = json_to_batson(br" [true, 123] ");

0 commit comments

Comments
 (0)