@@ -459,14 +459,27 @@ impl PartnerAccessor<ArrayRef> for ArrowArrayAccessor {
459
459
. fields ( )
460
460
. iter ( )
461
461
. position ( |arrow_field| {
462
- get_field_id ( arrow_field)
463
- . map ( |id| id == field. id )
464
- . unwrap_or ( false )
462
+ if let Ok ( arrow_field_id) = get_field_id ( arrow_field) {
463
+ arrow_field_id == field. id
464
+ } else {
465
+ // Fallback to name matching (for DataFusion compatibility)
466
+ arrow_field. name ( ) == & field. name
467
+ }
465
468
} )
466
469
. ok_or_else ( || {
467
470
Error :: new (
468
471
ErrorKind :: DataInvalid ,
469
- format ! ( "Field id {} not found in struct array" , field. id) ,
472
+ format ! (
473
+ "Field with name '{}' (id: {}) not found in struct array. Available fields: [{}]" ,
474
+ field. name,
475
+ field. id,
476
+ struct_array
477
+ . fields( )
478
+ . iter( )
479
+ . map( |f| f. name( ) . as_str( ) )
480
+ . collect:: <Vec <_>>( )
481
+ . join( ", " )
482
+ ) ,
470
483
)
471
484
} ) ?;
472
485
@@ -1226,4 +1239,69 @@ mod test {
1226
1239
] ) ) ) ,
1227
1240
] ) ;
1228
1241
}
1242
+
1243
+ #[ test]
1244
+ fn test_field_partner_with_datafusion_schema ( ) {
1245
+ use arrow_schema:: { DataType , Field as ArrowField , Schema as ArrowSchema } ;
1246
+
1247
+ use crate :: spec:: { NestedField , PrimitiveType , Type } ;
1248
+
1249
+ let id_field = "id" ;
1250
+ let score_field = "score" ;
1251
+
1252
+ // Create an Arrow schema with id and PARQUET:field_id metadata
1253
+ // And score without PARQUET:field_id metadata (like DataFusion)
1254
+ let arrow_schema = ArrowSchema :: new ( vec ! [
1255
+ ArrowField :: new( id_field, DataType :: Int64 , false ) . with_metadata( HashMap :: from( [ (
1256
+ PARQUET_FIELD_ID_META_KEY . to_string( ) ,
1257
+ "1" . to_string( ) ,
1258
+ ) ] ) ) ,
1259
+ ArrowField :: new( score_field, DataType :: Float64 , true ) ,
1260
+ ] ) ;
1261
+
1262
+ // Create test data
1263
+ let id_array = Arc :: new ( Int64Array :: from ( vec ! [ 1 , 2 , 3 ] ) ) as ArrayRef ;
1264
+ let score_array =
1265
+ Arc :: new ( Float64Array :: from ( vec ! [ Some ( 95.5 ) , Some ( 87.2 ) , None ] ) ) as ArrayRef ;
1266
+
1267
+ let struct_array = Arc :: new ( StructArray :: new (
1268
+ arrow_schema. fields ( ) . clone ( ) ,
1269
+ vec ! [ id_array, score_array] ,
1270
+ None ,
1271
+ ) ) as ArrayRef ;
1272
+
1273
+ // Create corresponding Iceberg nested fields
1274
+ let id_field = NestedField {
1275
+ id : 1 ,
1276
+ name : id_field. to_string ( ) ,
1277
+ required : true ,
1278
+ field_type : Box :: new ( Type :: Primitive ( PrimitiveType :: Long ) ) ,
1279
+ doc : None ,
1280
+ initial_default : None ,
1281
+ write_default : None ,
1282
+ } ;
1283
+
1284
+ let score_field = NestedField {
1285
+ id : 2 ,
1286
+ name : score_field. to_string ( ) ,
1287
+ required : false ,
1288
+ field_type : Box :: new ( Type :: Primitive ( PrimitiveType :: Double ) ) ,
1289
+ doc : None ,
1290
+ initial_default : None ,
1291
+ write_default : None ,
1292
+ } ;
1293
+
1294
+ let accessor = ArrowArrayAccessor ;
1295
+
1296
+ // Test field matching by name, it should be ok because id has PARQUET:field_id metadata
1297
+ let id_partner = accessor. field_partner ( & struct_array, & id_field) . unwrap ( ) ;
1298
+ assert_eq ! ( id_partner. len( ) , 3 ) ;
1299
+ assert_eq ! ( id_partner. data_type( ) , & DataType :: Int64 ) ;
1300
+
1301
+ // Test field matching by name, it should be ok because score doesn't have PARQUET:field_id metadata
1302
+ // But it should fall back to name
1303
+ let score_partner = accessor. field_partner ( & struct_array, & score_field) . unwrap ( ) ;
1304
+ assert_eq ! ( score_partner. len( ) , 3 ) ;
1305
+ assert_eq ! ( score_partner. data_type( ) , & DataType :: Float64 ) ;
1306
+ }
1229
1307
}
0 commit comments