Skip to content

Commit 3ce50a3

Browse files
committed
Fix take on empty lists
It could previously replace empty lists with null. Updated tests were accidentally using null bitmasks intended for value arrays in the list arrays.
1 parent a876b59 commit 3ce50a3

File tree

1 file changed

+27
-13
lines changed

1 file changed

+27
-13
lines changed

arrow/src/compute/kernels/take.rs

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -770,21 +770,19 @@ where
770770
take_value_indices_from_list::<IndexType, OffsetType>(values, indices)?;
771771

772772
let taken = take_impl::<OffsetType>(values.values().as_ref(), &list_indices, None)?;
773-
// determine null count and null buffer, which are a function of `values` and `indices`
774-
let mut null_count = 0;
773+
// determine null buffer, which are a function of `values` and `indices`
775774
let num_bytes = bit_util::ceil(indices.len(), 8);
776775
let mut null_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, true);
777776
{
778777
let null_slice = null_buf.as_slice_mut();
779-
offsets[..].windows(2).enumerate().for_each(
780-
|(i, window): (usize, &[OffsetType::Native])| {
781-
if window[0] == window[1] {
782-
// offsets are equal, slot is null
783-
bit_util::unset_bit(null_slice, i);
784-
null_count += 1;
785-
}
786-
},
787-
);
778+
for i in 0..indices.len() {
779+
let index = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| {
780+
ArrowError::ComputeError("Cast to usize failed".to_string())
781+
})?;
782+
if !indices.is_valid(i) || values.is_null(index) {
783+
bit_util::unset_bit(null_slice, i);
784+
}
785+
}
788786
}
789787
let value_offsets = Buffer::from_slice_ref(&offsets);
790788
// create a new list with taken data and computed null information
@@ -1428,7 +1426,7 @@ mod tests {
14281426
let list_data = ArrayData::builder(list_data_type.clone())
14291427
.len(4)
14301428
.add_buffer(value_offsets)
1431-
.null_bit_buffer(Buffer::from([0b10111101, 0b00000000]))
1429+
.null_bit_buffer(Buffer::from([0b1111]))
14321430
.add_child_data(value_data)
14331431
.build();
14341432
let list_array = $list_array_type::from(list_data);
@@ -1501,7 +1499,7 @@ mod tests {
15011499
let list_data = ArrayData::builder(list_data_type.clone())
15021500
.len(4)
15031501
.add_buffer(value_offsets)
1504-
.null_bit_buffer(Buffer::from([0b01111101]))
1502+
.null_bit_buffer(Buffer::from([0b1011]))
15051503
.add_child_data(value_data)
15061504
.build();
15071505
let list_array = $list_array_type::from(list_data);
@@ -1766,6 +1764,22 @@ mod tests {
17661764
.unwrap();
17671765
}
17681766

1767+
#[test]
1768+
fn test_take_empty_list() {
1769+
let input = ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
1770+
Some(vec![]),
1771+
None,
1772+
Some(vec![]),
1773+
Some(vec![Some(123), None]),
1774+
Some(vec![]),
1775+
]);
1776+
let index = UInt32Array::from(vec![Some(0), Some(1), Some(2), Some(3), Some(4)]);
1777+
1778+
let output = take(&input, &index, None).unwrap();
1779+
let output = output.as_any().downcast_ref::<ListArray>().unwrap();
1780+
assert_eq!(output, &input);
1781+
}
1782+
17691783
#[test]
17701784
fn test_take_dict() {
17711785
let keys_builder = Int16Builder::new(8);

0 commit comments

Comments
 (0)