Skip to content

Commit c20e340

Browse files
committed
Treat BinaryArray and LargeBinaryArray the same in chunk comparison
1 parent 88d2593 commit c20e340

File tree

2 files changed

+75
-1
lines changed

2 files changed

+75
-1
lines changed

crates/store/re_chunk/src/chunk.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use arrow::{
1212
use itertools::{Either, Itertools as _, izip};
1313
use nohash_hasher::IntMap;
1414

15-
use re_arrow_util::ArrowArrayDowncastRef as _;
15+
use re_arrow_util::{ArrowArrayDowncastRef as _, widen_binary_arrays};
1616
use re_byte_size::SizeBytes as _;
1717
use re_log_types::{
1818
AbsoluteTimeRange, EntityPath, NonMinI64, TimeInt, TimeType, Timeline, TimelineName,
@@ -101,6 +101,8 @@ impl ChunkComponents {
101101
let Some(right_array) = right.get(descr) else {
102102
anyhow::bail!("rhs is missing {descr:?}");
103103
};
104+
let left_array = widen_binary_arrays(left_array);
105+
let right_array = widen_binary_arrays(right_array);
104106
re_arrow_util::ensure_similar(&left_array.to_data(), &right_array.to_data())
105107
.with_context(|| format!("Component {descr:?}"))?;
106108
}

crates/utils/re_arrow_util/src/lib.rs

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,75 @@ pub use self::arrays::*;
99
pub use self::batches::*;
1010
pub use self::compare::*;
1111
pub use self::format_data_type::*;
12+
13+
// ----------------------------------------------------------------
14+
15+
use std::sync::Arc;
16+
17+
use arrow::{
18+
array::{Array as _, AsArray as _, ListArray},
19+
datatypes::{DataType, Field},
20+
};
21+
22+
/// Convert any `BinaryArray` to `LargeBinaryArray`, because we treat them logivally the same
23+
pub fn widen_binary_arrays(list_array: &ListArray) -> ListArray {
24+
let list_data_type = list_array.data_type();
25+
if let DataType::List(field) = list_data_type
26+
&& field.data_type() == &DataType::Binary
27+
{
28+
re_tracing::profile_function!();
29+
let large_binary_field = Field::new("item", DataType::LargeBinary, true);
30+
let target_type = DataType::List(Arc::new(large_binary_field));
31+
32+
#[expect(clippy::unwrap_used)]
33+
arrow::compute::kernels::cast::cast(list_array, &target_type)
34+
.unwrap()
35+
.as_list()
36+
.clone()
37+
} else {
38+
list_array.clone()
39+
}
40+
}
41+
42+
#[cfg(test)]
43+
mod tests {
44+
use super::*;
45+
use arrow::array::{BinaryBuilder, ListBuilder};
46+
47+
#[test]
48+
fn test_widen_list_binary() {
49+
// Create test data
50+
let mut list_builder = ListBuilder::new(BinaryBuilder::new());
51+
52+
// First list: [b"hello", b"world"]
53+
list_builder.values().append_value(b"hello");
54+
list_builder.values().append_value(b"world");
55+
list_builder.append(true);
56+
57+
// Second list: [b"rust", b"arrow"]
58+
list_builder.values().append_value(b"rust");
59+
list_builder.values().append_value(b"arrow");
60+
list_builder.append(true);
61+
62+
// Third list: null
63+
list_builder.append_null();
64+
65+
let original_list = list_builder.finish();
66+
67+
// Widen to LargeBinaryArray
68+
let widened_list = widen_binary_arrays(&original_list);
69+
70+
// Verify the result
71+
assert_eq!(widened_list.len(), 3);
72+
assert!(!widened_list.is_null(0));
73+
assert!(!widened_list.is_null(1));
74+
assert!(widened_list.is_null(2));
75+
76+
// Check data type
77+
if let DataType::List(field) = widened_list.data_type() {
78+
assert_eq!(field.data_type(), &DataType::LargeBinary);
79+
} else {
80+
panic!("Expected List data type");
81+
}
82+
}
83+
}

0 commit comments

Comments
 (0)