@@ -5,70 +5,100 @@ use std::sync::Arc;
55
66use arrow_array:: ArrayRef as ArrowArrayRef ;
77use arrow_array:: StructArray ;
8+ use arrow_buffer:: NullBuffer ;
89use arrow_schema:: Fields ;
910use itertools:: Itertools ;
1011use vortex_error:: VortexResult ;
12+ use vortex_error:: vortex_bail;
1113use vortex_error:: vortex_ensure;
1214use vortex_session:: VortexSession ;
1315
1416use crate :: Array ;
1517use crate :: ArrayRef ;
18+ use crate :: arrays:: ScalarFnVTable ;
1619use crate :: arrays:: StructVTable ;
1720use crate :: arrow:: ArrowArrayExecutor ;
18- use crate :: arrow:: null_buffer :: to_null_buffer ;
21+ use crate :: arrow:: executor :: validity :: to_arrow_null_buffer ;
1922use crate :: builtins:: ArrayBuiltins ;
23+ use crate :: expr:: Pack ;
24+ use crate :: vtable:: ValidityHelper ;
2025
2126pub ( super ) fn to_arrow_struct (
2227 array : ArrayRef ,
2328 fields : & Fields ,
2429 session : & VortexSession ,
2530) -> VortexResult < ArrowArrayRef > {
2631 let len = array. len ( ) ;
27- let validity = array. validity_mask ( ) ;
2832
29- let mut field_arrays = Vec :: with_capacity ( fields. len ( ) ) ;
30-
31- match array. try_into :: < StructVTable > ( ) {
33+ // First, we attempt to short-circuit if the array is already a StructVTable:
34+ let array = match array. try_into :: < StructVTable > ( ) {
3235 Ok ( array) => {
33- // If the array is already a struct type, then we can convert each field.
34- for ( field, child) in fields. iter ( ) . zip_eq ( array. into_fields ( ) . into_iter ( ) ) {
35- let field_array = child. execute_arrow ( field. data_type ( ) , session) ?;
36- vortex_ensure ! (
37- field. is_nullable( ) || field_array. null_count( ) == 0 ,
38- "Cannot convert field '{}' to non-nullable Arrow field because it contains nulls" ,
39- field. name( )
40- ) ;
41- field_arrays. push ( field_array) ;
42- }
36+ let validity = to_arrow_null_buffer ( array. validity ( ) , array. len ( ) , session) ?;
37+ return create_from_fields ( fields, array. into_fields ( ) , validity, len, session) ;
4338 }
44- Err ( array) => {
45- // Otherwise, we have some options:
46- // 1. Use get_item expression to extract each field? This is a bit sad because get_item
47- // will perform the validity masking again.
48- // 2. Execute a full struct vector. But this may do unnecessary work on fields that may
49- // have a more direct conversion to the desired Arrow field type.
50- // 3. Something else?
51- //
52- // For now, we go with option 1. Although we really ought to figure out CSE for this.
53- for field in fields. iter ( ) {
54- let field_array = array
55- . get_item ( field. name ( ) . as_str ( ) ) ?
56- . execute_arrow ( field. data_type ( ) , session) ?;
57- vortex_ensure ! (
58- field. is_nullable( ) || field_array. null_count( ) == 0 ,
59- "Cannot convert field '{}' to non-nullable Arrow field because it contains nulls" ,
60- field. name( )
61- ) ;
62- field_arrays. push ( field_array) ;
63- }
39+ Err ( array) => array,
40+ } ;
41+
42+ // We can also short-circuit if the array is a `pack` scalar function:
43+ if let Some ( array) = array. as_opt :: < ScalarFnVTable > ( ) {
44+ if let Some ( _pack_options) = array. scalar_fn ( ) . as_opt :: < Pack > ( ) {
45+ return create_from_fields (
46+ fields,
47+ array. children ( ) . to_vec ( ) ,
48+ None , // Pack is never null,
49+ len,
50+ session,
51+ ) ;
6452 }
6553 }
6654
55+ // Otherwise, we have some options:
56+ // 1. Use get_item expression to extract each field? This is a bit sad because get_item
57+ // will perform the validity masking again.
58+ // 2. Execute a full struct vector. But this may do unnecessary work on fields that may
59+ // have a more direct conversion to the desired Arrow field type.
60+ // 3. Something else?
61+ //
62+ // For now, we go with option 1. Although we really ought to figure out CSE for this.
63+ let field_arrays = fields
64+ . iter ( )
65+ . map ( |f| array. get_item ( f. name ( ) . as_str ( ) ) )
66+ . try_collect ( ) ?;
67+
68+ if !array. all_valid ( ) {
69+ // TODO(ngates): we should grab the nullability using the is_not_null expression.
70+ vortex_bail ! (
71+ "Cannot convert nullable Struct array with nulls to Arrow\n {}" ,
72+ array. display_tree( )
73+ ) ;
74+ }
75+
76+ create_from_fields ( fields, field_arrays, None , len, session)
77+ }
78+
79+ fn create_from_fields (
80+ fields : & Fields ,
81+ vortex_fields : Vec < ArrayRef > ,
82+ null_buffer : Option < NullBuffer > ,
83+ len : usize ,
84+ session : & VortexSession ,
85+ ) -> VortexResult < ArrowArrayRef > {
86+ let mut arrow_fields = Vec :: with_capacity ( vortex_fields. len ( ) ) ;
87+ for ( field, vx_field) in fields. iter ( ) . zip ( vortex_fields. into_iter ( ) ) {
88+ let arrow_field = vx_field. execute_arrow ( field. data_type ( ) , session) ?;
89+ vortex_ensure ! (
90+ field. is_nullable( ) || arrow_field. null_count( ) == 0 ,
91+ "Cannot convert field '{}' to non-nullable Arrow field because it contains nulls" ,
92+ field. name( )
93+ ) ;
94+ arrow_fields. push ( arrow_field) ;
95+ }
96+
6797 Ok ( Arc :: new ( unsafe {
6898 StructArray :: new_unchecked_with_length (
6999 fields. clone ( ) ,
70- field_arrays . into ( ) ,
71- to_null_buffer ( validity ) ,
100+ arrow_fields . into ( ) ,
101+ null_buffer ,
72102 len,
73103 )
74104 } ) )
0 commit comments