@@ -583,7 +583,14 @@ fn quote_arrow_field_serializer(
583
583
}
584
584
}
585
585
586
- DataType :: Utf8 => {
586
+ DataType :: Binary | DataType :: Utf8 => {
587
+ let is_binary = datatype. to_logical_type ( ) == & DataType :: Binary ;
588
+ let as_bytes = if is_binary {
589
+ quote ! ( )
590
+ } else {
591
+ quote ! ( . as_bytes( ) )
592
+ } ;
593
+
587
594
// NOTE: We need values for all slots, regardless of what the validity says,
588
595
// hence `unwrap_or_default`.
589
596
let ( quoted_member_accessor, quoted_transparent_length) = if inner_is_arrow_transparent
@@ -623,7 +630,7 @@ fn quote_arrow_field_serializer(
623
630
624
631
let inner_data_and_offsets = if elements_are_nullable {
625
632
quote ! {
626
- let offsets = arrow:: buffer:: OffsetBuffer :: < i32 > :: from_lengths(
633
+ let offsets = arrow:: buffer:: OffsetBuffer :: from_lengths(
627
634
#data_src. iter( ) . map( |opt| opt. as_ref( ) #quoted_transparent_length . unwrap_or_default( ) )
628
635
) ;
629
636
@@ -636,13 +643,13 @@ fn quote_arrow_field_serializer(
636
643
// NOTE: Flattening to remove the guaranteed layer of nullability: we don't care
637
644
// about it while building the backing buffer since it's all offsets driven.
638
645
for data in #data_src. iter( ) . flatten( ) {
639
- buffer_builder. append_slice( data #quoted_member_accessor. as_bytes( ) ) ;
646
+ buffer_builder. append_slice( data #quoted_member_accessor # as_bytes) ;
640
647
}
641
648
let inner_data: arrow:: buffer:: Buffer = buffer_builder. finish( ) ;
642
649
}
643
650
} else {
644
651
quote ! {
645
- let offsets = arrow:: buffer:: OffsetBuffer :: < i32 > :: from_lengths(
652
+ let offsets = arrow:: buffer:: OffsetBuffer :: from_lengths(
646
653
#data_src. iter( ) #quoted_transparent_length
647
654
) ;
648
655
@@ -653,22 +660,29 @@ fn quote_arrow_field_serializer(
653
660
654
661
let mut buffer_builder = arrow:: array:: builder:: BufferBuilder :: <u8 >:: new( capacity) ;
655
662
for data in & #data_src {
656
- buffer_builder. append_slice( data #quoted_member_accessor. as_bytes( ) ) ;
663
+ buffer_builder. append_slice( data #quoted_member_accessor # as_bytes) ;
657
664
}
658
665
let inner_data: arrow:: buffer:: Buffer = buffer_builder. finish( ) ;
659
666
}
660
667
} ;
661
668
662
- quote ! { {
663
- #inner_data_and_offsets
664
-
665
- // Safety: we're building this from actual native strings, so no need to do the
666
- // whole utf8 validation _again_.
667
- // It would be nice to use quote_comment here and put this safety notice in the generated code,
668
- // but that seems to push us over some complexity limit causing rustfmt to fail.
669
- #[ allow( unsafe_code, clippy:: undocumented_unsafe_blocks) ]
670
- as_array_ref( unsafe { StringArray :: new_unchecked( offsets, inner_data, #validity_src) } )
671
- } }
669
+ if is_binary {
670
+ quote ! { {
671
+ #inner_data_and_offsets
672
+ as_array_ref( LargeBinaryArray :: new( offsets, inner_data, #validity_src) )
673
+ } }
674
+ } else {
675
+ quote ! { {
676
+ #inner_data_and_offsets
677
+
678
+ // Safety: we're building this from actual native strings, so no need to do the
679
+ // whole utf8 validation _again_.
680
+ // It would be nice to use quote_comment here and put this safety notice in the generated code,
681
+ // but that seems to push us over some complexity limit causing rustfmt to fail.
682
+ #[ allow( unsafe_code, clippy:: undocumented_unsafe_blocks) ]
683
+ as_array_ref( unsafe { StringArray :: new_unchecked( offsets, inner_data, #validity_src) } )
684
+ } }
685
+ }
672
686
}
673
687
674
688
DataType :: List ( inner_field) | DataType :: FixedSizeList ( inner_field, _) => {
@@ -919,6 +933,6 @@ fn quote_arrow_field_serializer(
919
933
} }
920
934
}
921
935
922
- _ => unimplemented ! ( "{datatype:#?}" ) ,
936
+ DataType :: Object { .. } => unimplemented ! ( "{datatype:#?}" ) ,
923
937
}
924
938
}
0 commit comments