1- use std:: mem:: size_of;
2- use std:: sync:: Arc ;
3-
1+ use bytemuck:: NoUninit ;
42use jiter:: { JsonArray , JsonValue } ;
53use smallvec:: SmallVec ;
4+ use std:: mem:: size_of;
5+ use std:: sync:: Arc ;
66
77use crate :: decoder:: Decoder ;
88use crate :: encoder:: Encoder ;
99use crate :: errors:: { DecodeResult , EncodeResult , ToJsonResult } ;
1010use crate :: header:: { Category , Header , Length , NumberHint , Primitive } ;
1111use crate :: json_writer:: JsonWriter ;
12+ use crate :: object:: minimum_value_size_estimate;
13+ use crate :: EncodeError ;
1214
1315#[ cfg( target_endian = "big" ) ]
1416compile_error ! ( "big-endian architectures are not yet supported as we use `bytemuck` for zero-copy header decoding." ) ;
1517
1618/// Batson heterogeneous array representation
1719#[ derive( Debug ) ]
1820pub ( crate ) struct HetArray < ' b > {
19- offsets : & ' b [ u32 ] ,
21+ offsets : HetArrayOffsets < ' b > ,
2022}
2123
2224impl < ' b > HetArray < ' b > {
2325 pub fn decode_header ( d : & mut Decoder < ' b > , length : Length ) -> DecodeResult < Self > {
24- if matches ! ( length , Length :: Empty ) {
25- Ok ( Self { offsets : & [ ] } )
26- } else {
27- let length = length . decode ( d ) ? ;
28- let positions = d . take_slice_as ( length) ?;
29- Ok ( Self { offsets : positions } )
30- }
26+ let offsets = match length {
27+ Length :: Empty => HetArrayOffsets :: U8 ( & [ ] ) ,
28+ Length :: U32 => HetArrayOffsets :: U32 ( take_slice_as ( d , length ) ? ) ,
29+ Length :: U16 => HetArrayOffsets :: U16 ( take_slice_as ( d , length ) ? ) ,
30+ _ => HetArrayOffsets :: U8 ( take_slice_as ( d , length) ?) ,
31+ } ;
32+ Ok ( Self { offsets } )
3133 }
3234
3335 pub fn len ( & self ) -> usize {
34- self . offsets . len ( )
36+ match self . offsets {
37+ HetArrayOffsets :: U8 ( v) => v. len ( ) ,
38+ HetArrayOffsets :: U16 ( v) => v. len ( ) ,
39+ HetArrayOffsets :: U32 ( v) => v. len ( ) ,
40+ }
3541 }
3642
3743 pub fn get ( & self , d : & mut Decoder < ' b > , index : usize ) -> bool {
38- if let Some ( offset) = self . offsets . get ( index) {
39- d. index += * offset as usize ;
44+ let opt_offset = match & self . offsets {
45+ HetArrayOffsets :: U8 ( v) => v. get ( index) . map ( |& o| o as usize ) ,
46+ HetArrayOffsets :: U16 ( v) => v. get ( index) . map ( |& o| o as usize ) ,
47+ HetArrayOffsets :: U32 ( v) => v. get ( index) . map ( |& o| o as usize ) ,
48+ } ;
49+ if let Some ( offset) = opt_offset {
50+ d. index += offset;
4051 true
4152 } else {
4253 false
4354 }
4455 }
4556
4657 pub fn to_json ( & self , d : & mut Decoder < ' b > ) -> DecodeResult < JsonArray < ' b > > {
47- self . offsets
48- . iter ( )
58+ ( 0 ..self . len ( ) )
4959 . map ( |_| d. take_value ( ) )
5060 . collect :: < DecodeResult < SmallVec < _ , 8 > > > ( )
5161 . map ( Arc :: new)
@@ -66,6 +76,18 @@ impl<'b> HetArray<'b> {
6676 }
6777}
6878
79+ fn take_slice_as < ' b , T : bytemuck:: Pod > ( d : & mut Decoder < ' b > , length : Length ) -> DecodeResult < & ' b [ T ] > {
80+ let length = length. decode ( d) ?;
81+ d. take_slice_as ( length)
82+ }
83+
84+ #[ derive( Debug ) ]
85+ enum HetArrayOffsets < ' b > {
86+ U8 ( & ' b [ u8 ] ) ,
87+ U16 ( & ' b [ u16 ] ) ,
88+ U32 ( & ' b [ u32 ] ) ,
89+ }
90+
6991pub ( crate ) fn header_array_get ( d : & mut Decoder , length : Length , index : usize ) -> DecodeResult < Option < Header > > {
7092 u8_array_get ( d, length, index) ?
7193 . map ( |b| Header :: decode ( b, d) )
@@ -161,22 +183,58 @@ pub(crate) fn encode_array(encoder: &mut Encoder, array: &JsonArray) -> EncodeRe
161183 }
162184 Ok ( ( ) )
163185 } else {
164- encoder. encode_length ( Category :: HetArray , array. len ( ) ) ?;
186+ let min_size = minimum_array_size_estimate ( array) ;
187+ let encoder_position = encoder. position ( ) ;
165188
166- let mut offsets: Vec < u32 > = Vec :: with_capacity ( array. len ( ) ) ;
167- encoder. align :: < u32 > ( ) ;
168- let positions_start = encoder. ring_fence ( array. len ( ) * size_of :: < u32 > ( ) ) ;
189+ if min_size <= u8:: MAX as usize {
190+ encoder. encode_length ( Category :: HetArray , array. len ( ) ) ?;
191+ if encode_array_sized :: < u8 > ( encoder, array) ? {
192+ return Ok ( ( ) ) ;
193+ }
194+ encoder. reset_position ( encoder_position) ;
195+ }
169196
170- let offset_start = encoder. position ( ) ;
171- for value in array. iter ( ) {
172- offsets. push ( ( encoder. position ( ) - offset_start) as u32 ) ;
173- encoder. encode_value ( value) ?;
197+ if min_size <= u16:: MAX as usize {
198+ encoder. encode_len_u16 ( Category :: HetArray , u16:: try_from ( array. len ( ) ) . unwrap ( ) ) ;
199+ if encode_array_sized :: < u16 > ( encoder, array) ? {
200+ return Ok ( ( ) ) ;
201+ }
202+ encoder. reset_position ( encoder_position) ;
203+ }
204+
205+ encoder. encode_len_u32 ( Category :: HetArray , array. len ( ) ) ?;
206+ if encode_array_sized :: < u32 > ( encoder, array) ? {
207+ Ok ( ( ) )
208+ } else {
209+ Err ( EncodeError :: ArrayTooLarge )
174210 }
175- encoder. set_range ( positions_start, bytemuck:: cast_slice ( & offsets) ) ;
176- Ok ( ( ) )
177211 }
178212}
179213
214+ fn encode_array_sized < T : TryFrom < usize > + NoUninit > ( encoder : & mut Encoder , array : & JsonArray ) -> EncodeResult < bool > {
215+ let mut offsets: Vec < T > = Vec :: with_capacity ( array. len ( ) ) ;
216+ encoder. align :: < T > ( ) ;
217+ let positions_start = encoder. ring_fence ( array. len ( ) * size_of :: < T > ( ) ) ;
218+
219+ let offset_start = encoder. position ( ) ;
220+ for value in array. iter ( ) {
221+ let Ok ( offset) = T :: try_from ( encoder. position ( ) - offset_start) else {
222+ return Ok ( false ) ;
223+ } ;
224+ offsets. push ( offset) ;
225+ encoder. encode_value ( value) ?;
226+ }
227+ encoder. set_range ( positions_start, bytemuck:: cast_slice ( & offsets) ) ;
228+ Ok ( true )
229+ }
230+
231+ /// Estimate the minimize amount of space needed to encode the object.
232+ ///
233+ /// This is NOT recursive, instead it makes very optimistic guesses about how long arrays and objects might be.
234+ fn minimum_array_size_estimate ( array : & JsonArray ) -> usize {
235+ array. iter ( ) . map ( minimum_value_size_estimate) . sum ( )
236+ }
237+
180238#[ derive( Debug ) ]
181239enum PackedArray {
182240 Header ( Vec < u8 > ) ,
@@ -295,6 +353,9 @@ mod test {
295353 #[ test]
296354 fn array_round_trip ( ) {
297355 let array = Arc :: new ( smallvec ! [ JsonValue :: Null , JsonValue :: Int ( 123 ) , JsonValue :: Bool ( false ) , ] ) ;
356+ let min_size = minimum_array_size_estimate ( & array) ;
357+ assert_eq ! ( min_size, 4 ) ;
358+
298359 let mut encoder = Encoder :: new ( ) ;
299360 encoder. encode_array ( & array) . unwrap ( ) ;
300361 let bytes: Vec < u8 > = encoder. into ( ) ;
@@ -305,7 +366,13 @@ mod test {
305366
306367 let het_array = HetArray :: decode_header ( & mut decoder, 3 . into ( ) ) . unwrap ( ) ;
307368 assert_eq ! ( het_array. len( ) , 3 ) ;
308- assert_eq ! ( het_array. offsets, & [ 0 , 1 , 3 ] ) ;
369+
370+ let offsets = match het_array. offsets {
371+ HetArrayOffsets :: U8 ( v) => v,
372+ _ => panic ! ( "expected u8 offsets" ) ,
373+ } ;
374+
375+ assert_eq ! ( offsets, & [ 0 , 1 , 3 ] ) ;
309376 let decode_array = het_array. to_json ( & mut decoder) . unwrap ( ) ;
310377 assert_arrays_eq ! ( decode_array, array) ;
311378 }
@@ -388,4 +455,89 @@ mod test {
388455 let i64_array = i64_array_to_json ( & mut decoder, 5 . into ( ) ) . unwrap ( ) ;
389456 assert_arrays_eq ! ( i64_array, array) ;
390457 }
458+
459+ #[ test]
460+ fn test_u16_array ( ) {
461+ let mut array = vec ! [ JsonValue :: Bool ( true ) ; 100 ] ;
462+ array. extend ( vec ! [ JsonValue :: Int ( i64 :: MAX ) ; 100 ] ) ;
463+ let array = Arc :: new ( array. into ( ) ) ;
464+
465+ let mut encoder = Encoder :: new ( ) ;
466+ encoder. encode_array ( & array) . unwrap ( ) ;
467+ let bytes: Vec < u8 > = encoder. into ( ) ;
468+
469+ let mut decoder = Decoder :: new ( & bytes) ;
470+ let header = decoder. take_header ( ) . unwrap ( ) ;
471+ assert_eq ! ( header, Header :: HetArray ( Length :: U16 ) ) ;
472+
473+ let het_array = HetArray :: decode_header ( & mut decoder, Length :: U16 ) . unwrap ( ) ;
474+ assert_eq ! ( het_array. len( ) , 200 ) ;
475+
476+ let offsets = match het_array. offsets {
477+ HetArrayOffsets :: U16 ( v) => v,
478+ _ => panic ! ( "expected U16 offsets" ) ,
479+ } ;
480+ assert_eq ! ( offsets. len( ) , 200 ) ;
481+ assert_eq ! ( offsets[ 0 ] , 0 ) ;
482+ assert_eq ! ( offsets[ 1 ] , 1 ) ;
483+
484+ let mut d = decoder. clone ( ) ;
485+ assert ! ( het_array. get( & mut d, 0 ) ) ;
486+ assert ! ( compare_json_values( & d. take_value( ) . unwrap( ) , & JsonValue :: Bool ( true ) ) ) ;
487+
488+ let mut d = decoder. clone ( ) ;
489+ assert ! ( het_array. get( & mut d, 99 ) ) ;
490+ assert ! ( compare_json_values( & d. take_value( ) . unwrap( ) , & JsonValue :: Bool ( true ) ) ) ;
491+
492+ let mut d = decoder. clone ( ) ;
493+ assert ! ( het_array. get( & mut d, 100 ) ) ;
494+ assert ! ( compare_json_values( & d. take_value( ) . unwrap( ) , & JsonValue :: Int ( i64 :: MAX ) ) ) ;
495+
496+ let mut d = decoder. clone ( ) ;
497+ assert ! ( het_array. get( & mut d, 199 ) ) ;
498+ assert ! ( compare_json_values( & d. take_value( ) . unwrap( ) , & JsonValue :: Int ( i64 :: MAX ) ) ) ;
499+
500+ let mut d = decoder. clone ( ) ;
501+ assert ! ( !het_array. get( & mut d, 200 ) ) ;
502+
503+ let decode_array = het_array. to_json ( & mut decoder) . unwrap ( ) ;
504+ assert_arrays_eq ! ( decode_array, array) ;
505+ }
506+
507+ #[ test]
508+ fn test_u32_array ( ) {
509+ let long_string = "a" . repeat ( u16:: MAX as usize ) ;
510+ let array = Arc :: new ( smallvec ! [
511+ JsonValue :: Str ( long_string. clone( ) . into( ) ) ,
512+ JsonValue :: Int ( 42 ) ,
513+ ] ) ;
514+
515+ let mut encoder = Encoder :: new ( ) ;
516+ encoder. encode_array ( & array) . unwrap ( ) ;
517+ let bytes: Vec < u8 > = encoder. into ( ) ;
518+
519+ let mut decoder = Decoder :: new ( & bytes) ;
520+ let header = decoder. take_header ( ) . unwrap ( ) ;
521+ assert_eq ! ( header, Header :: HetArray ( Length :: U32 ) ) ;
522+
523+ let het_array = HetArray :: decode_header ( & mut decoder, Length :: U32 ) . unwrap ( ) ;
524+ assert_eq ! ( het_array. len( ) , 2 ) ;
525+
526+ let offsets = match het_array. offsets {
527+ HetArrayOffsets :: U32 ( v) => v,
528+ _ => panic ! ( "expected U32 offsets" ) ,
529+ } ;
530+ assert_eq ! ( offsets, [ 0 , 65538 ] ) ;
531+
532+ let mut d = decoder. clone ( ) ;
533+ assert ! ( het_array. get( & mut d, 0 ) ) ;
534+ assert ! ( compare_json_values(
535+ & d. take_value( ) . unwrap( ) ,
536+ & JsonValue :: Str ( long_string. into( ) )
537+ ) ) ;
538+
539+ let mut d = decoder. clone ( ) ;
540+ assert ! ( het_array. get( & mut d, 1 ) ) ;
541+ assert ! ( compare_json_values( & d. take_value( ) . unwrap( ) , & JsonValue :: Int ( 42 ) ) ) ;
542+ }
391543}
0 commit comments