1515// specific language governing permissions and limitations
1616// under the License.
1717use crate :: decoder:: { VariantBasicType , VariantPrimitiveType } ;
18- use crate :: { ShortString , Variant } ;
19- use std:: collections:: HashMap ;
18+ use crate :: { ShortString , Variant , VariantDecimal16 , VariantDecimal4 , VariantDecimal8 } ;
19+ use std:: collections:: BTreeMap ;
2020
2121const BASIC_TYPE_BITS : u8 = 2 ;
2222const UNIX_EPOCH_DATE : chrono:: NaiveDate = chrono:: NaiveDate :: from_ymd_opt ( 1970 , 1 , 1 ) . unwrap ( ) ;
@@ -166,15 +166,15 @@ fn make_room_for_header(buffer: &mut Vec<u8>, start_pos: usize, header_size: usi
166166///
167167pub struct VariantBuilder {
168168 buffer : Vec < u8 > ,
169- dict : HashMap < String , u32 > ,
169+ dict : BTreeMap < String , u32 > ,
170170 dict_keys : Vec < String > ,
171171}
172172
173173impl VariantBuilder {
174174 pub fn new ( ) -> Self {
175175 Self {
176176 buffer : Vec :: new ( ) ,
177- dict : HashMap :: new ( ) ,
177+ dict : BTreeMap :: new ( ) ,
178178 dict_keys : Vec :: new ( ) ,
179179 }
180180 }
@@ -296,7 +296,7 @@ impl VariantBuilder {
296296
297297 /// Add key to dictionary, return its ID
298298 fn add_key ( & mut self , key : & str ) -> u32 {
299- use std:: collections:: hash_map :: Entry ;
299+ use std:: collections:: btree_map :: Entry ;
300300 match self . dict . entry ( key. to_string ( ) ) {
301301 Entry :: Occupied ( entry) => * entry. get ( ) ,
302302 Entry :: Vacant ( entry) => {
@@ -384,9 +384,15 @@ impl VariantBuilder {
384384 Variant :: Date ( v) => self . append_date ( v) ,
385385 Variant :: TimestampMicros ( v) => self . append_timestamp_micros ( v) ,
386386 Variant :: TimestampNtzMicros ( v) => self . append_timestamp_ntz_micros ( v) ,
387- Variant :: Decimal4 { integer, scale } => self . append_decimal4 ( integer, scale) ,
388- Variant :: Decimal8 { integer, scale } => self . append_decimal8 ( integer, scale) ,
389- Variant :: Decimal16 { integer, scale } => self . append_decimal16 ( integer, scale) ,
387+ Variant :: Decimal4 ( VariantDecimal4 { integer, scale } ) => {
388+ self . append_decimal4 ( integer, scale)
389+ }
390+ Variant :: Decimal8 ( VariantDecimal8 { integer, scale } ) => {
391+ self . append_decimal8 ( integer, scale)
392+ }
393+ Variant :: Decimal16 ( VariantDecimal16 { integer, scale } ) => {
394+ self . append_decimal16 ( integer, scale)
395+ }
390396 Variant :: Float ( v) => self . append_float ( v) ,
391397 Variant :: Double ( v) => self . append_double ( v) ,
392398 Variant :: Binary ( v) => self . append_binary ( v) ,
@@ -482,7 +488,7 @@ impl<'a> ListBuilder<'a> {
482488pub struct ObjectBuilder < ' a > {
483489 parent : & ' a mut VariantBuilder ,
484490 start_pos : usize ,
485- fields : Vec < ( u32 , usize ) > , // (field_id, offset)
491+ fields : BTreeMap < u32 , usize > , // (field_id, offset)
486492}
487493
488494impl < ' a > ObjectBuilder < ' a > {
@@ -491,7 +497,7 @@ impl<'a> ObjectBuilder<'a> {
491497 Self {
492498 parent,
493499 start_pos,
494- fields : Vec :: new ( ) ,
500+ fields : BTreeMap :: new ( ) ,
495501 }
496502 }
497503
@@ -500,25 +506,27 @@ impl<'a> ObjectBuilder<'a> {
500506 let id = self . parent . add_key ( key) ;
501507 let field_start = self . parent . offset ( ) - self . start_pos ;
502508 self . parent . append_value ( value) ;
503- self . fields . push ( ( id, field_start) ) ;
509+ let res = self . fields . insert ( id, field_start) ;
510+ debug_assert ! ( res. is_none( ) ) ;
504511 }
505512
506513 /// Finalize object with sorted fields
507- pub fn finish ( mut self ) {
508- // Sort fields by key name
509- self . fields . sort_by ( |a, b| {
510- let key_a = & self . parent . dict_keys [ a. 0 as usize ] ;
511- let key_b = & self . parent . dict_keys [ b. 0 as usize ] ;
512- key_a. cmp ( key_b)
513- } ) ;
514-
514+ pub fn finish ( self ) {
515515 let data_size = self . parent . offset ( ) - self . start_pos ;
516516 let num_fields = self . fields . len ( ) ;
517517 let is_large = num_fields > u8:: MAX as usize ;
518518 let size_bytes = if is_large { 4 } else { 1 } ;
519519
520- let max_id = self . fields . iter ( ) . map ( |& ( id, _) | id) . max ( ) . unwrap_or ( 0 ) ;
521- let id_size = int_size ( max_id as usize ) ;
520+ let field_ids_by_sorted_field_name = self
521+ . parent
522+ . dict
523+ . iter ( )
524+ . filter_map ( |( _, id) | self . fields . contains_key ( id) . then_some ( * id) )
525+ . collect :: < Vec < _ > > ( ) ;
526+
527+ let max_id = self . fields . keys ( ) . last ( ) . copied ( ) . unwrap_or ( 0 ) as usize ;
528+
529+ let id_size = int_size ( max_id) ;
522530 let offset_size = int_size ( data_size) ;
523531
524532 let header_size = 1
@@ -542,17 +550,18 @@ impl<'a> ObjectBuilder<'a> {
542550 }
543551
544552 // Write field IDs (sorted order)
545- for & ( id , _ ) in & self . fields {
553+ for id in & field_ids_by_sorted_field_name {
546554 write_offset (
547555 & mut self . parent . buffer [ pos..pos + id_size as usize ] ,
548- id as usize ,
556+ * id as usize ,
549557 id_size,
550558 ) ;
551559 pos += id_size as usize ;
552560 }
553561
554562 // Write field offsets
555- for & ( _, offset) in & self . fields {
563+ for id in & field_ids_by_sorted_field_name {
564+ let & offset = self . fields . get ( id) . unwrap ( ) ;
556565 write_offset (
557566 & mut self . parent . buffer [ pos..pos + offset_size as usize ] ,
558567 offset,
@@ -749,6 +758,77 @@ mod tests {
749758 assert_eq ! ( field_ids, vec![ 1 , 2 , 0 ] ) ;
750759 }
751760
761+ #[ test]
762+ fn test_object_and_metadata_ordering ( ) {
763+ let mut builder = VariantBuilder :: new ( ) ;
764+
765+ let mut obj = builder. new_object ( ) ;
766+
767+ obj. append_value ( "zebra" , "stripes" ) ; // ID = 0
768+ obj. append_value ( "apple" , "red" ) ; // ID = 1
769+
770+ {
771+ // fields_map is ordered by insertion order (field id)
772+ let fields_map = obj. fields . keys ( ) . copied ( ) . collect :: < Vec < _ > > ( ) ;
773+ assert_eq ! ( fields_map, vec![ 0 , 1 ] ) ;
774+
775+ // dict is ordered by field names
776+ // NOTE: when we support nested objects, we'll want to perform a filter by fields_map field ids
777+ let dict_metadata = obj
778+ . parent
779+ . dict
780+ . iter ( )
781+ . map ( |( f, i) | ( f. as_str ( ) , * i) )
782+ . collect :: < Vec < _ > > ( ) ;
783+
784+ assert_eq ! ( dict_metadata, vec![ ( "apple" , 1 ) , ( "zebra" , 0 ) ] ) ;
785+
786+ // dict_keys is ordered by insertion order (field id)
787+ let dict_keys = obj
788+ . parent
789+ . dict_keys
790+ . iter ( )
791+ . map ( |k| k. as_str ( ) )
792+ . collect :: < Vec < _ > > ( ) ;
793+ assert_eq ! ( dict_keys, vec![ "zebra" , "apple" ] ) ;
794+ }
795+
796+ obj. append_value ( "banana" , "yellow" ) ; // ID = 2
797+
798+ {
799+ // fields_map is ordered by insertion order (field id)
800+ let fields_map = obj. fields . keys ( ) . copied ( ) . collect :: < Vec < _ > > ( ) ;
801+ assert_eq ! ( fields_map, vec![ 0 , 1 , 2 ] ) ;
802+
803+ // dict is ordered by field names
804+ // NOTE: when we support nested objects, we'll want to perform a filter by fields_map field ids
805+ let dict_metadata = obj
806+ . parent
807+ . dict
808+ . iter ( )
809+ . map ( |( f, i) | ( f. as_str ( ) , * i) )
810+ . collect :: < Vec < _ > > ( ) ;
811+
812+ assert_eq ! (
813+ dict_metadata,
814+ vec![ ( "apple" , 1 ) , ( "banana" , 2 ) , ( "zebra" , 0 ) ]
815+ ) ;
816+
817+ // dict_keys is ordered by insertion order (field id)
818+ let dict_keys = obj
819+ . parent
820+ . dict_keys
821+ . iter ( )
822+ . map ( |k| k. as_str ( ) )
823+ . collect :: < Vec < _ > > ( ) ;
824+ assert_eq ! ( dict_keys, vec![ "zebra" , "apple" , "banana" ] ) ;
825+ }
826+
827+ obj. finish ( ) ;
828+
829+ builder. finish ( ) ;
830+ }
831+
752832 #[ test]
753833 fn test_append_object ( ) {
754834 let ( object_metadata, object_value) = {
0 commit comments