Skip to content

Commit a667539

Browse files
authored
feat(reader): Add binary support to get_arrow_datum for equality deletes with binary type (#1848)
1 parent 46fa95c commit a667539

File tree

2 files changed

+25
-6
lines changed

2 files changed

+25
-6
lines changed

crates/iceberg/src/arrow/caching_delete_file_loader.rs

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -543,7 +543,9 @@ mod tests {
543543
use std::sync::Arc;
544544

545545
use arrow_array::cast::AsArray;
546-
use arrow_array::{ArrayRef, Int32Array, Int64Array, RecordBatch, StringArray, StructArray};
546+
use arrow_array::{
547+
ArrayRef, BinaryArray, Int32Array, Int64Array, RecordBatch, StringArray, StructArray,
548+
};
547549
use arrow_schema::{DataType, Field, Fields};
548550
use parquet::arrow::{ArrowWriter, PARQUET_FIELD_ID_META_KEY};
549551
use parquet::basic::Compression;
@@ -552,6 +554,8 @@ mod tests {
552554

553555
use super::*;
554556
use crate::arrow::delete_filter::tests::setup;
557+
use crate::scan::FileScanTaskDeleteFile;
558+
use crate::spec::{DataContentType, Schema};
555559

556560
#[tokio::test]
557561
async fn test_delete_file_loader_parse_equality_deletes() {
@@ -567,7 +571,7 @@ mod tests {
567571
.await
568572
.expect("could not get batch stream");
569573

570-
let eq_ids = HashSet::from_iter(vec![2, 3, 4, 6]);
574+
let eq_ids = HashSet::from_iter(vec![2, 3, 4, 6, 8]);
571575

572576
let parsed_eq_delete = CachingDeleteFileLoader::parse_equality_deletes_record_batch_stream(
573577
record_batch_stream,
@@ -577,7 +581,7 @@ mod tests {
577581
.expect("error parsing batch stream");
578582
println!("{parsed_eq_delete}");
579583

580-
let expected = "((((y != 1) OR (z != 100)) OR (a != \"HELP\")) OR (sa != 4)) AND ((((y != 2) OR (z IS NOT NULL)) OR (a IS NOT NULL)) OR (sa != 5))".to_string();
584+
let expected = "(((((y != 1) OR (z != 100)) OR (a != \"HELP\")) OR (sa != 4)) OR (b != 62696E6172795F64617461)) AND (((((y != 2) OR (z IS NOT NULL)) OR (a IS NOT NULL)) OR (sa != 5)) OR (b IS NOT NULL))".to_string();
581585

582586
assert_eq!(parsed_eq_delete.to_string(), expected);
583587
}
@@ -611,6 +615,9 @@ mod tests {
611615
),
612616
]));
613617

618+
let col_b_vals = vec![Some(&b"binary_data"[..]), None];
619+
let col_b = Arc::new(BinaryArray::from(col_b_vals)) as ArrayRef;
620+
614621
let equality_delete_schema = {
615622
let struct_field = DataType::Struct(Fields::from(vec![
616623
simple_field("sa", DataType::Int32, false, "6"),
@@ -628,12 +635,13 @@ mod tests {
628635
(PARQUET_FIELD_ID_META_KEY.to_string(), "4".to_string()),
629636
])),
630637
simple_field("s", struct_field, false, "5"),
638+
simple_field("b", DataType::Binary, true, "8"),
631639
];
632640
Arc::new(arrow_schema::Schema::new(fields))
633641
};
634642

635643
let equality_deletes_to_write = RecordBatch::try_new(equality_delete_schema.clone(), vec![
636-
col_y, col_z, col_a, col_s,
644+
col_y, col_z, col_a, col_s, col_b,
637645
])
638646
.unwrap();
639647

crates/iceberg/src/arrow/schema.rs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ use std::sync::Arc;
2222

2323
use arrow_array::types::{Decimal128Type, validate_decimal_precision_and_scale};
2424
use arrow_array::{
25-
BooleanArray, Date32Array, Datum as ArrowDatum, Decimal128Array, FixedSizeBinaryArray,
26-
Float32Array, Float64Array, Int32Array, Int64Array, Scalar, StringArray,
25+
BinaryArray, BooleanArray, Date32Array, Datum as ArrowDatum, Decimal128Array,
26+
FixedSizeBinaryArray, Float32Array, Float64Array, Int32Array, Int64Array, Scalar, StringArray,
2727
TimestampMicrosecondArray,
2828
};
2929
use arrow_schema::{DataType, Field, Fields, Schema as ArrowSchema, TimeUnit};
@@ -678,6 +678,9 @@ pub(crate) fn get_arrow_datum(datum: &Datum) -> Result<Arc<dyn ArrowDatum + Send
678678
(PrimitiveType::String, PrimitiveLiteral::String(value)) => {
679679
Ok(Arc::new(StringArray::new_scalar(value.as_str())))
680680
}
681+
(PrimitiveType::Binary, PrimitiveLiteral::Binary(value)) => {
682+
Ok(Arc::new(BinaryArray::new_scalar(value.as_slice())))
683+
}
681684
(PrimitiveType::Date, PrimitiveLiteral::Int(value)) => {
682685
Ok(Arc::new(Date32Array::new_scalar(*value)))
683686
}
@@ -1814,6 +1817,14 @@ mod tests {
18141817
assert!(is_scalar);
18151818
assert_eq!(array.value(0), "abc");
18161819
}
1820+
{
1821+
let datum = Datum::binary(vec![1, 2, 3, 4]);
1822+
let arrow_datum = get_arrow_datum(&datum).unwrap();
1823+
let (array, is_scalar) = arrow_datum.get();
1824+
let array = array.as_any().downcast_ref::<BinaryArray>().unwrap();
1825+
assert!(is_scalar);
1826+
assert_eq!(array.value(0), &[1, 2, 3, 4]);
1827+
}
18171828
{
18181829
let datum = Datum::date(42);
18191830
let arrow_datum = get_arrow_datum(&datum).unwrap();

0 commit comments

Comments
 (0)