Skip to content

Commit edf549c

Browse files
committed
add test that fails on main
1 parent ac1ec47 commit edf549c

File tree

1 file changed

+69
-0
lines changed

1 file changed

+69
-0
lines changed

crates/iceberg/src/arrow/record_batch_transformer.rs

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1606,4 +1606,73 @@ mod test {
16061606
assert_eq!(get_string_value(result.column(4).as_ref(), 0), "");
16071607
assert_eq!(get_string_value(result.column(4).as_ref(), 1), "");
16081608
}
1609+
1610+
/// Test handling of null values in identity-partitioned columns.
1611+
///
1612+
/// Reproduces TestPartitionValues.testNullPartitionValue() from iceberg-java, which
1613+
/// writes records where the partition column has null values. Before the fix in #1922,
1614+
/// this would error with "Partition field X has null value for identity transform".
1615+
#[test]
1616+
fn null_identity_partition_value() {
1617+
use crate::spec::{Struct, Transform};
1618+
1619+
let schema = Arc::new(
1620+
Schema::builder()
1621+
.with_schema_id(0)
1622+
.with_fields(vec![
1623+
NestedField::optional(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
1624+
NestedField::optional(2, "data", Type::Primitive(PrimitiveType::String)).into(),
1625+
])
1626+
.build()
1627+
.unwrap(),
1628+
);
1629+
1630+
let partition_spec = Arc::new(
1631+
crate::spec::PartitionSpec::builder(schema.clone())
1632+
.with_spec_id(0)
1633+
.add_partition_field("data", "data", Transform::Identity)
1634+
.unwrap()
1635+
.build()
1636+
.unwrap(),
1637+
);
1638+
1639+
// Partition has null value for the data column
1640+
let partition_data = Struct::from_iter(vec![None]);
1641+
1642+
let file_schema = Arc::new(ArrowSchema::new(vec![simple_field(
1643+
"id",
1644+
DataType::Int32,
1645+
true,
1646+
"1",
1647+
)]));
1648+
1649+
let projected_field_ids = [1, 2];
1650+
1651+
let mut transformer = RecordBatchTransformerBuilder::new(schema, &projected_field_ids)
1652+
.with_partition(partition_spec, partition_data)
1653+
.expect("Should handle null partition values")
1654+
.build();
1655+
1656+
let file_batch =
1657+
RecordBatch::try_new(file_schema, vec![Arc::new(Int32Array::from(vec![1, 2, 3]))])
1658+
.unwrap();
1659+
1660+
let result = transformer.process_record_batch(file_batch).unwrap();
1661+
1662+
assert_eq!(result.num_columns(), 2);
1663+
assert_eq!(result.num_rows(), 3);
1664+
1665+
let id_col = result
1666+
.column(0)
1667+
.as_any()
1668+
.downcast_ref::<Int32Array>()
1669+
.unwrap();
1670+
assert_eq!(id_col.values(), &[1, 2, 3]);
1671+
1672+
// Partition column with null value should produce nulls
1673+
let data_col = result.column(1);
1674+
assert!(data_col.is_null(0));
1675+
assert!(data_col.is_null(1));
1676+
assert!(data_col.is_null(2));
1677+
}
16091678
}

0 commit comments

Comments
 (0)