Skip to content

Commit a0808e7

Browse files
authored
Fix: revert List::take compute function (#5099)
Related: https://spiraldb.slack.com/archives/C07BV3GKAJ2/p1761219012611469 Reverts the `List` `take` compute function to again return a `ListArray` instead of converting to `ListView` and relying on the other `take` compute function. We will probably keep this as is until the performance of `ListView` improves / more people adopt `ListView` (Arrow and DF). Signed-off-by: Connor Tsui <[email protected]>
1 parent 28c5ce5 commit a0808e7

File tree

1 file changed

+49
-38
lines changed
  • vortex-array/src/arrays/list/compute

1 file changed

+49
-38
lines changed

vortex-array/src/arrays/list/compute/take.rs

Lines changed: 49 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,41 @@
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

44
use vortex_buffer::BitBufferMut;
5-
use vortex_dtype::{IntegerPType, Nullability};
5+
use vortex_dtype::{IntegerPType, Nullability, match_each_integer_ptype};
66
use vortex_error::{VortexExpect, VortexResult, vortex_panic};
77
use vortex_mask::Mask;
88

9-
use crate::arrays::{ListArray, ListVTable, PrimitiveArray, list_view_from_list};
9+
use crate::arrays::{ListArray, ListVTable, PrimitiveArray};
1010
use crate::builders::{ArrayBuilder, PrimitiveBuilder};
11-
use crate::compute::{self, TakeKernel, TakeKernelAdapter};
11+
use crate::compute::{TakeKernel, TakeKernelAdapter, take};
1212
use crate::validity::Validity;
1313
use crate::vtable::ValidityHelper;
14-
use crate::{Array, ArrayRef, IntoArray, register_kernel};
14+
use crate::{Array, ArrayRef, ToCanonical, register_kernel};
15+
16+
// TODO(connor)[ListView]: Re-revert to the version where we simply convert to a `ListView` and call
17+
// the `ListView::take` compute function once `ListView` is more stable.
1518

16-
// TODO(connor): For very short arrays it is probably more efficient to build the list from scratch.
1719
/// Take implementation for [`ListArray`].
1820
///
19-
/// This implementation converts the [`ListArray`] to a [`ListViewArray`] and then delegates to its
20-
/// `take` implementation. This approach avoids the need to rebuild the `elements` array.
21-
///
22-
/// The resulting [`ListViewArray`] can represent non-contiguous and out-of-order lists, which would
23-
/// violate [`ListArray`]'s invariants (but not [`ListViewArray`]'s).
24-
///
25-
/// [`ListViewArray`]: crate::arrays::ListViewArray
21+
/// Unlike `ListView`, `ListArray` must rebuild the elements array to maintain its invariant
22+
/// that lists are stored contiguously and in-order (`offset[i+1] >= offset[i]`). Taking
23+
/// non-contiguous indices would violate this requirement.
2624
impl TakeKernel for ListVTable {
2725
fn take(&self, array: &ListArray, indices: &dyn Array) -> VortexResult<ArrayRef> {
28-
let list_view = list_view_from_list(array.clone());
29-
compute::take(&list_view.into_array(), indices)
26+
let indices = indices.to_primitive();
27+
let offsets = array.offsets().to_primitive();
28+
29+
match_each_integer_ptype!(offsets.dtype().as_ptype(), |O| {
30+
match_each_integer_ptype!(indices.ptype(), |I| {
31+
_take::<I, O>(
32+
array,
33+
offsets.as_slice::<O>(),
34+
&indices,
35+
array.validity_mask(),
36+
indices.validity_mask(),
37+
)
38+
})
39+
})
3040
}
3141
}
3242

@@ -86,7 +96,7 @@ fn _take<I: IntegerPType, O: IntegerPType>(
8696
let elements_to_take = elements_to_take.finish();
8797
let new_offsets = new_offsets.finish();
8898

89-
let new_elements = compute::take(array.elements(), elements_to_take.as_ref())?;
99+
let new_elements = take(array.elements(), elements_to_take.as_ref())?;
90100

91101
Ok(ListArray::try_new(
92102
new_elements,
@@ -121,47 +131,48 @@ fn _take_nullable<I: IntegerPType, O: IntegerPType>(
121131
let mut current_offset = O::zero();
122132
new_offsets.append_zero();
123133

124-
let mut new_validity = BitBufferMut::with_capacity(indices.len());
134+
// Set all bits to invalid and selectively set which values are valid.
135+
let mut new_validity = BitBufferMut::new_unset(indices.len());
125136

126137
for (idx, data_idx) in indices.iter().enumerate() {
127138
if !indices_validity.value(idx) {
128139
new_offsets.append_value(current_offset);
129-
new_validity.append_false();
140+
// Bit buffer already has this set to invalid.
130141
continue;
131142
}
132143

133144
let data_idx = data_idx
134145
.to_usize()
135146
.unwrap_or_else(|| vortex_panic!("Failed to convert index to usize: {}", data_idx));
136147

137-
if data_validity.value(data_idx) {
138-
let start = offsets[data_idx];
139-
let stop = offsets[data_idx + 1];
140-
141-
// See the note it the `take` on the reasoning
142-
let additional = (stop - start).to_usize().unwrap_or_else(|| {
143-
vortex_panic!("Failed to convert range length to usize: {}", stop - start)
144-
});
145-
146-
elements_to_take.reserve_exact(additional);
147-
for i in 0..additional {
148-
elements_to_take
149-
.append_value(start + O::from_usize(i).vortex_expect("i < additional"));
150-
}
151-
current_offset += stop - start;
148+
if !data_validity.value(data_idx) {
152149
new_offsets.append_value(current_offset);
153-
new_validity.append_true()
154-
} else {
155-
new_offsets.append_value(current_offset);
156-
new_validity.append_false();
150+
// Bit buffer already has this set to invalid.
151+
continue;
157152
}
153+
154+
let start = offsets[data_idx];
155+
let stop = offsets[data_idx + 1];
156+
157+
// See the note it the `take` on the reasoning
158+
let additional = (stop - start).to_usize().unwrap_or_else(|| {
159+
vortex_panic!("Failed to convert range length to usize: {}", stop - start)
160+
});
161+
162+
elements_to_take.reserve_exact(additional);
163+
for i in 0..additional {
164+
elements_to_take.append_value(start + O::from_usize(i).vortex_expect("i < additional"));
165+
}
166+
current_offset += stop - start;
167+
new_offsets.append_value(current_offset);
168+
new_validity.set(idx);
158169
}
159170

160171
let elements_to_take = elements_to_take.finish();
161172
let new_offsets = new_offsets.finish();
162-
let new_elements = compute::take(array.elements(), elements_to_take.as_ref())?;
173+
let new_elements = take(array.elements(), elements_to_take.as_ref())?;
163174

164-
let new_validity: Validity = Validity::from(new_validity.freeze());
175+
let new_validity = Validity::from(new_validity.freeze());
165176
// data are indexes are nullable, so the final result is also nullable.
166177

167178
Ok(ListArray::try_new(new_elements, new_offsets, new_validity)?.to_array())

0 commit comments

Comments
 (0)