@@ -1335,7 +1335,7 @@ mod test {
13351335 /// - Iceberg spec: format/spec.md "Column Projection" section
13361336 #[ test]
13371337 fn test_all_four_spec_rules ( ) {
1338- use crate :: spec:: { MappedField , NameMapping , Transform } ;
1338+ use crate :: spec:: Transform ;
13391339
13401340 // Iceberg schema with columns designed to exercise each spec rule
13411341 let snapshot_schema = Arc :: new (
@@ -1346,7 +1346,7 @@ mod test {
13461346 NestedField :: required( 1 , "id" , Type :: Primitive ( PrimitiveType :: Int ) ) . into( ) ,
13471347 // Rule #1: Identity-partitioned field - should use partition metadata
13481348 NestedField :: required( 2 , "dept" , Type :: Primitive ( PrimitiveType :: String ) ) . into( ) ,
1349- // Rule #2: Field resolved by name mapping (no field ID in Parquet )
1349+ // Rule #2: Field resolved by name mapping (ArrowReader already applied )
13501350 NestedField :: required( 3 , "data" , Type :: Primitive ( PrimitiveType :: String ) ) . into( ) ,
13511351 // Rule #3: Field with initial_default
13521352 NestedField :: optional( 4 , "category" , Type :: Primitive ( PrimitiveType :: String ) )
@@ -1373,20 +1373,14 @@ mod test {
13731373 // Partition data: dept="engineering"
13741374 let partition_data = Struct :: from_iter ( vec ! [ Some ( Literal :: string( "engineering" ) ) ] ) ;
13751375
1376- // Parquet schema: has id (with field_id=1) and data (without field ID)
1376+ // Parquet schema: simulates post-ArrowReader state where name mapping already applied
1377+ // Has id (field_id=1) and data (field_id=3, assigned by ArrowReader via name mapping)
13771378 // Missing: dept (in partition), category (has default), notes (no default)
13781379 let parquet_schema = Arc :: new ( ArrowSchema :: new ( vec ! [
13791380 simple_field( "id" , DataType :: Int32 , false , "1" ) ,
1380- Field :: new ( "data" , DataType :: Utf8 , false ) , // No field ID - needs name mapping
1381+ simple_field ( "data" , DataType :: Utf8 , false , "3" ) ,
13811382 ] ) ) ;
13821383
1383- // Name mapping: maps field ID 3 to "data" column
1384- let name_mapping = Arc :: new ( NameMapping :: new ( vec ! [ MappedField :: new(
1385- Some ( 3 ) ,
1386- vec![ "data" . to_string( ) ] ,
1387- vec![ ] ,
1388- ) ] ) ) ;
1389-
13901384 let projected_field_ids = [ 1 , 2 , 3 , 4 , 5 ] ; // id, dept, data, category, notes
13911385
13921386 let mut transformer =
@@ -1452,120 +1446,4 @@ mod test {
14521446 assert ! ( notes_column. is_null( 0 ) ) ;
14531447 assert ! ( notes_column. is_null( 1 ) ) ;
14541448 }
1455-
1456- /// Verifies field ID conflict detection for add_files imports.
1457- ///
1458- /// Why: add_files can import Parquet with conflicting field IDs (field_id=1->"name" in Parquet
1459- /// vs field_id=1->"id" in Iceberg). Name-checking detects conflicts, treats fields as "not present",
1460- /// allowing spec fallback to partition constants and name mapping.
1461- ///
1462- /// Reproduces: TestAddFilesProcedure.addDataPartitioned
1463- #[ test]
1464- fn add_files_with_field_id_conflicts_like_java_test ( ) {
1465- use crate :: spec:: { MappedField , NameMapping , Struct , Transform } ;
1466-
1467- // Iceberg schema (field IDs 1-4)
1468- let snapshot_schema = Arc :: new (
1469- Schema :: builder ( )
1470- . with_schema_id ( 0 )
1471- . with_fields ( vec ! [
1472- NestedField :: required( 1 , "id" , Type :: Primitive ( PrimitiveType :: Int ) ) . into( ) ,
1473- NestedField :: required( 2 , "name" , Type :: Primitive ( PrimitiveType :: String ) ) . into( ) ,
1474- NestedField :: required( 3 , "dept" , Type :: Primitive ( PrimitiveType :: String ) ) . into( ) ,
1475- NestedField :: required( 4 , "subdept" , Type :: Primitive ( PrimitiveType :: String ) )
1476- . into( ) ,
1477- ] )
1478- . build ( )
1479- . unwrap ( ) ,
1480- ) ;
1481-
1482- let partition_spec = Arc :: new (
1483- crate :: spec:: PartitionSpec :: builder ( snapshot_schema. clone ( ) )
1484- . with_spec_id ( 0 )
1485- . add_partition_field ( "id" , "id" , Transform :: Identity )
1486- . unwrap ( )
1487- . build ( )
1488- . unwrap ( ) ,
1489- ) ;
1490-
1491- let partition_data = Struct :: from_iter ( vec ! [ Some ( Literal :: int( 1 ) ) ] ) ;
1492-
1493- // Parquet schema with CONFLICTING field IDs (1-3 instead of 1-4)
1494- let parquet_schema = Arc :: new ( ArrowSchema :: new ( vec ! [
1495- simple_field( "name" , DataType :: Utf8 , false , "1" ) ,
1496- simple_field( "dept" , DataType :: Utf8 , false , "2" ) ,
1497- simple_field( "subdept" , DataType :: Utf8 , false , "3" ) ,
1498- ] ) ) ;
1499-
1500- // WHY name mapping: Resolves conflicts by mapping Iceberg field IDs to Parquet column names
1501- let name_mapping = Arc :: new ( NameMapping :: new ( vec ! [
1502- MappedField :: new( Some ( 2 ) , vec![ "name" . to_string( ) ] , vec![ ] ) ,
1503- MappedField :: new( Some ( 3 ) , vec![ "dept" . to_string( ) ] , vec![ ] ) ,
1504- MappedField :: new( Some ( 4 ) , vec![ "subdept" . to_string( ) ] , vec![ ] ) ,
1505- ] ) ) ;
1506-
1507- let projected_field_ids = [ 1 , 2 , 3 , 4 ] ; // id, name, dept, subdept
1508-
1509- let mut transformer =
1510- RecordBatchTransformerBuilder :: new ( snapshot_schema, & projected_field_ids)
1511- . with_partition ( Some ( partition_spec) , Some ( partition_data) )
1512- . build ( ) ;
1513-
1514- // Parquet data (one row)
1515- let parquet_batch = RecordBatch :: try_new ( parquet_schema, vec ! [
1516- Arc :: new( StringArray :: from( vec![ "John Doe" ] ) ) , // name column
1517- Arc :: new( StringArray :: from( vec![ "Engineering" ] ) ) , // dept column
1518- Arc :: new( StringArray :: from( vec![ "Backend" ] ) ) , // subdept column
1519- ] )
1520- . unwrap ( ) ;
1521-
1522- let result = transformer. process_record_batch ( parquet_batch) . unwrap ( ) ;
1523-
1524- assert_eq ! ( result. num_columns( ) , 4 ) ;
1525- assert_eq ! ( result. num_rows( ) , 1 ) ;
1526-
1527- // Verify: id from partition constant (conflict with Parquet field_id=1)
1528- assert_eq ! (
1529- result
1530- . column( 0 )
1531- . as_any( )
1532- . downcast_ref:: <Int32Array >( )
1533- . unwrap( )
1534- . value( 0 ) ,
1535- 1
1536- ) ;
1537-
1538- // Verify: name via name mapping (conflict with Parquet field_id=2)
1539- assert_eq ! (
1540- result
1541- . column( 1 )
1542- . as_any( )
1543- . downcast_ref:: <StringArray >( )
1544- . unwrap( )
1545- . value( 0 ) ,
1546- "John Doe"
1547- ) ;
1548-
1549- // Verify: dept via name mapping (conflict with Parquet field_id=3)
1550- assert_eq ! (
1551- result
1552- . column( 2 )
1553- . as_any( )
1554- . downcast_ref:: <StringArray >( )
1555- . unwrap( )
1556- . value( 0 ) ,
1557- "Engineering"
1558- ) ;
1559-
1560- // Verify: subdept via name mapping (not in Parquet by field ID)
1561- assert_eq ! (
1562- result
1563- . column( 3 )
1564- . as_any( )
1565- . downcast_ref:: <StringArray >( )
1566- . unwrap( )
1567- . value( 0 ) ,
1568- "Backend"
1569- ) ;
1570- }
15711449}
0 commit comments