@@ -21,12 +21,12 @@ use arrow_array::{
21
21
LargeListArray , LargeStringArray , ListArray , MapArray , StringArray , StructArray ,
22
22
Time64MicrosecondArray , TimestampMicrosecondArray , TimestampNanosecondArray ,
23
23
} ;
24
- use arrow_schema:: DataType ;
24
+ use arrow_schema:: { DataType , Schema as ArrowSchema } ;
25
25
use uuid:: Uuid ;
26
26
27
- use super :: get_field_id;
27
+ use super :: { get_field_id, schema_to_arrow_schema } ;
28
28
use crate :: spec:: {
29
- ListType , Literal , Map , MapType , NestedField , PartnerAccessor , PrimitiveType ,
29
+ ListType , Literal , Map , MapType , NestedField , PartnerAccessor , PrimitiveType , Schema ,
30
30
SchemaWithPartnerVisitor , Struct , StructType , visit_struct_with_partner,
31
31
} ;
32
32
use crate :: { Error , ErrorKind , Result } ;
@@ -426,7 +426,24 @@ impl SchemaWithPartnerVisitor<ArrayRef> for ArrowArrayToIcebergStructConverter {
426
426
}
427
427
428
428
/// Partner type representing accessing and walking arrow arrays alongside iceberg schema
429
- pub struct ArrowArrayAccessor ;
429
+ pub struct ArrowArrayAccessor {
430
+ arrow_schema : Option < ArrowSchema > ,
431
+ }
432
+
433
+ impl ArrowArrayAccessor {
434
+ /// Creates a new instance of ArrowArrayAccessor without arrow schema fallback
435
+ pub fn new ( ) -> Result < Self > {
436
+ Ok ( Self { arrow_schema : None } )
437
+ }
438
+
439
+ /// Creates a new instance of ArrowArrayAccessor with arrow schema converted from table schema
440
+ /// for field ID resolution fallback
441
+ pub fn new_with_table_schema ( table_schema : & Schema ) -> Result < Self > {
442
+ Ok ( Self {
443
+ arrow_schema : Some ( schema_to_arrow_schema ( table_schema) ?) ,
444
+ } )
445
+ }
446
+ }
430
447
431
448
impl PartnerAccessor < ArrayRef > for ArrowArrayAccessor {
432
449
fn struct_partner < ' a > ( & self , schema_partner : & ' a ArrayRef ) -> Result < & ' a ArrayRef > {
@@ -459,9 +476,13 @@ impl PartnerAccessor<ArrayRef> for ArrowArrayAccessor {
459
476
. fields ( )
460
477
. iter ( )
461
478
. position ( |arrow_field| {
462
- // match by ID if available, otherwise try matching by name
463
- get_field_id ( arrow_field)
464
- . map_or ( arrow_field. name ( ) == & field. name , |id| id == field. id )
479
+ get_field_id ( arrow_field) . map_or ( false , |id| id == field. id )
480
+ || self
481
+ . arrow_schema
482
+ . as_ref ( )
483
+ . and_then ( |schema| schema. field_with_name ( & field. name ) . ok ( ) )
484
+ . and_then ( |field_from_schema| get_field_id ( field_from_schema) . ok ( ) )
485
+ . map_or ( false , |id| id == field. id )
465
486
} )
466
487
. ok_or_else ( || {
467
488
Error :: new (
@@ -559,7 +580,7 @@ pub fn arrow_struct_to_literal(
559
580
ty,
560
581
struct_array,
561
582
& mut ArrowArrayToIcebergStructConverter ,
562
- & ArrowArrayAccessor ,
583
+ & ArrowArrayAccessor :: new ( ) ? ,
563
584
)
564
585
}
565
586
@@ -910,86 +931,50 @@ mod test {
910
931
}
911
932
912
933
#[ test]
913
- fn test_field_name_fallback_when_id_unavailable ( ) {
914
- // Create an Arrow struct array with fields that don't have field IDs in metadata
915
- let int32_array = Int32Array :: from ( vec ! [ Some ( 1 ) , Some ( 2 ) , None ] ) ;
916
- let string_array = StringArray :: from ( vec ! [ Some ( "hello" ) , Some ( "world" ) , None ] ) ;
934
+ fn test_field_id_fallback_with_arrow_schema ( ) {
935
+ // Create an Arrow struct array with a field that doesn't have field ID in metadata
936
+ let int32_array = Int32Array :: from ( vec ! [ Some ( 42 ) , Some ( 43 ) , None ] ) ;
917
937
918
- let struct_array =
919
- Arc :: new ( StructArray :: from ( vec ! [
920
- (
921
- // Field without field ID metadata - should fallback to name matching
922
- Arc :: new( Field :: new( "field_a" , DataType :: Int32 , true ) ) ,
923
- Arc :: new( int32_array) as ArrayRef ,
924
- ) ,
925
- (
926
- // Field with the correct field ID metadata
927
- Arc :: new( Field :: new( "field_b" , DataType :: Utf8 , true ) . with_metadata(
928
- HashMap :: from( [ ( PARQUET_FIELD_ID_META_KEY . to_string( ) , "2" . to_string( ) ) ] ) ,
929
- ) ) ,
930
- Arc :: new( string_array) as ArrayRef ,
931
- ) ,
932
- ] ) ) as ArrayRef ;
938
+ // Create the struct array with a field that has no field ID metadata
939
+ let struct_array = Arc :: new ( StructArray :: from ( vec ! [ (
940
+ Arc :: new( Field :: new( "field_a" , DataType :: Int32 , true ) ) , // No field ID metadata
941
+ Arc :: new( int32_array) as ArrayRef ,
942
+ ) ] ) ) as ArrayRef ;
933
943
934
- // Create Iceberg struct type with field IDs that don't match the Arrow metadata
935
- let iceberg_struct_type = StructType :: new ( vec ! [
936
- Arc :: new( NestedField :: optional(
937
- 1 , // Different ID than what's in Arrow metadata (or no metadata)
938
- "field_a" , // Same name as Arrow field
944
+ // Create an Iceberg schema with field ID
945
+ let iceberg_schema = Schema :: builder ( )
946
+ . with_schema_id ( 1 )
947
+ . with_fields ( vec ! [ Arc :: new( NestedField :: optional(
948
+ 100 , // Field ID that we'll look for
949
+ "field_a" ,
939
950
Type :: Primitive ( PrimitiveType :: Int ) ,
940
- ) ) ,
941
- Arc :: new( NestedField :: optional(
942
- 2 , // Same ID
943
- "field_b" , // Same name as Arrow field
944
- Type :: Primitive ( PrimitiveType :: String ) ,
945
- ) ) ,
946
- ] ) ;
947
-
948
- // This should succeed by falling back to field name matching
949
- let result = arrow_struct_to_literal ( & struct_array, & iceberg_struct_type) . unwrap ( ) ;
950
-
951
- assert_eq ! ( result, vec![
952
- Some ( Literal :: Struct ( Struct :: from_iter( vec![
953
- Some ( Literal :: int( 1 ) ) ,
954
- Some ( Literal :: string( "hello" . to_string( ) ) ) ,
955
- ] ) ) ) ,
956
- Some ( Literal :: Struct ( Struct :: from_iter( vec![
957
- Some ( Literal :: int( 2 ) ) ,
958
- Some ( Literal :: string( "world" . to_string( ) ) ) ,
959
- ] ) ) ) ,
960
- Some ( Literal :: Struct ( Struct :: from_iter( vec![ None , None , ] ) ) ) ,
961
- ] ) ;
962
- }
951
+ ) ) ] )
952
+ . build ( )
953
+ . unwrap ( ) ;
963
954
964
- #[ test]
965
- fn test_field_not_found_error ( ) {
966
- // Test that we get an appropriate error when neither field ID nor name matches
955
+ // Create an ArrowArrayAccessor with the table schema for fallback
956
+ let accessor = ArrowArrayAccessor :: new_with_table_schema ( & iceberg_schema) . unwrap ( ) ;
967
957
968
- let int32_array = Int32Array :: from ( vec ! [ Some ( 1 ) , Some ( 2 ) ] ) ;
958
+ // Create a nested field to look up
959
+ let field = NestedField :: optional ( 100 , "field_a" , Type :: Primitive ( PrimitiveType :: Int ) ) ;
969
960
970
- let struct_array = Arc :: new ( StructArray :: from ( vec ! [ (
971
- Arc :: new( Field :: new( "arrow_field_name" , DataType :: Int32 , true ) ) ,
972
- Arc :: new( int32_array) as ArrayRef ,
973
- ) ] ) ) as ArrayRef ;
961
+ // This should succeed by using the arrow_schema fallback
962
+ let result = accessor. field_partner ( & struct_array, & field) ;
974
963
975
- // Create Iceberg struct type with field that doesn't match by ID or name
976
- let iceberg_struct_type = StructType :: new ( vec ! [ Arc :: new( NestedField :: optional(
977
- 10 ,
978
- "different_field_name" , // Different name than Arrow field
979
- Type :: Primitive ( PrimitiveType :: Int ) ,
980
- ) ) ] ) ;
964
+ // Verify that the field was found
965
+ assert ! ( result. is_ok( ) ) ;
981
966
982
- // This should fail with an appropriate error message
983
- let result = arrow_struct_to_literal ( & struct_array, & iceberg_struct_type) ;
967
+ // Verify that the field has the expected value
968
+ let array_ref = result. unwrap ( ) ;
969
+ let int_array = array_ref. as_any ( ) . downcast_ref :: < Int32Array > ( ) . unwrap ( ) ;
970
+ assert_eq ! ( int_array. value( 0 ) , 42 ) ;
971
+ assert_eq ! ( int_array. value( 1 ) , 43 ) ;
972
+ assert ! ( int_array. is_null( 2 ) ) ;
984
973
974
+ // Now try with an accessor without arrow_schema - this should fail
975
+ let accessor_without_schema = ArrowArrayAccessor :: new ( ) . unwrap ( ) ;
976
+ let result = accessor_without_schema. field_partner ( & struct_array, & field) ;
985
977
assert ! ( result. is_err( ) ) ;
986
- let error = result. unwrap_err ( ) ;
987
- assert_eq ! ( error. kind( ) , ErrorKind :: DataInvalid ) ;
988
- assert ! (
989
- error. message( ) . contains(
990
- "Field with id=10 or name=different_field_name not found in struct array"
991
- )
992
- ) ;
993
978
}
994
979
995
980
#[ test]
0 commit comments