|
2 | 2 | // SPDX-FileCopyrightText: Copyright the Vortex contributors |
3 | 3 |
|
4 | 4 | use vortex_buffer::BitBufferMut; |
5 | | -use vortex_dtype::{IntegerPType, Nullability}; |
| 5 | +use vortex_dtype::{IntegerPType, Nullability, match_each_integer_ptype}; |
6 | 6 | use vortex_error::{VortexExpect, VortexResult, vortex_panic}; |
7 | 7 | use vortex_mask::Mask; |
8 | 8 |
|
9 | | -use crate::arrays::{ListArray, ListVTable, PrimitiveArray, list_view_from_list}; |
| 9 | +use crate::arrays::{ListArray, ListVTable, PrimitiveArray}; |
10 | 10 | use crate::builders::{ArrayBuilder, PrimitiveBuilder}; |
11 | | -use crate::compute::{self, TakeKernel, TakeKernelAdapter}; |
| 11 | +use crate::compute::{TakeKernel, TakeKernelAdapter, take}; |
12 | 12 | use crate::validity::Validity; |
13 | 13 | use crate::vtable::ValidityHelper; |
14 | | -use crate::{Array, ArrayRef, IntoArray, register_kernel}; |
| 14 | +use crate::{Array, ArrayRef, ToCanonical, register_kernel}; |
| 15 | + |
| 16 | +// TODO(connor)[ListView]: Re-revert to the version where we simply convert to a `ListView` and call |
| 17 | +// the `ListView::take` compute function once `ListView` is more stable. |
15 | 18 |
|
16 | | -// TODO(connor): For very short arrays it is probably more efficient to build the list from scratch. |
17 | 19 | /// Take implementation for [`ListArray`]. |
18 | 20 | /// |
19 | | -/// This implementation converts the [`ListArray`] to a [`ListViewArray`] and then delegates to its |
20 | | -/// `take` implementation. This approach avoids the need to rebuild the `elements` array. |
21 | | -/// |
22 | | -/// The resulting [`ListViewArray`] can represent non-contiguous and out-of-order lists, which would |
23 | | -/// violate [`ListArray`]'s invariants (but not [`ListViewArray`]'s). |
24 | | -/// |
25 | | -/// [`ListViewArray`]: crate::arrays::ListViewArray |
| 21 | +/// Unlike `ListView`, `ListArray` must rebuild the elements array to maintain its invariant |
| 22 | +/// that lists are stored contiguously and in-order (`offset[i+1] >= offset[i]`). Taking |
| 23 | +/// non-contiguous indices would violate this requirement. |
26 | 24 | impl TakeKernel for ListVTable { |
27 | 25 | fn take(&self, array: &ListArray, indices: &dyn Array) -> VortexResult<ArrayRef> { |
28 | | - let list_view = list_view_from_list(array.clone()); |
29 | | - compute::take(&list_view.into_array(), indices) |
| 26 | + let indices = indices.to_primitive(); |
| 27 | + let offsets = array.offsets().to_primitive(); |
| 28 | + |
| 29 | + match_each_integer_ptype!(offsets.dtype().as_ptype(), |O| { |
| 30 | + match_each_integer_ptype!(indices.ptype(), |I| { |
| 31 | + _take::<I, O>( |
| 32 | + array, |
| 33 | + offsets.as_slice::<O>(), |
| 34 | + &indices, |
| 35 | + array.validity_mask(), |
| 36 | + indices.validity_mask(), |
| 37 | + ) |
| 38 | + }) |
| 39 | + }) |
30 | 40 | } |
31 | 41 | } |
32 | 42 |
|
@@ -86,7 +96,7 @@ fn _take<I: IntegerPType, O: IntegerPType>( |
86 | 96 | let elements_to_take = elements_to_take.finish(); |
87 | 97 | let new_offsets = new_offsets.finish(); |
88 | 98 |
|
89 | | - let new_elements = compute::take(array.elements(), elements_to_take.as_ref())?; |
| 99 | + let new_elements = take(array.elements(), elements_to_take.as_ref())?; |
90 | 100 |
|
91 | 101 | Ok(ListArray::try_new( |
92 | 102 | new_elements, |
@@ -121,47 +131,48 @@ fn _take_nullable<I: IntegerPType, O: IntegerPType>( |
121 | 131 | let mut current_offset = O::zero(); |
122 | 132 | new_offsets.append_zero(); |
123 | 133 |
|
124 | | - let mut new_validity = BitBufferMut::with_capacity(indices.len()); |
| 134 | + // Set all bits to invalid and selectively set which values are valid. |
| 135 | + let mut new_validity = BitBufferMut::new_unset(indices.len()); |
125 | 136 |
|
126 | 137 | for (idx, data_idx) in indices.iter().enumerate() { |
127 | 138 | if !indices_validity.value(idx) { |
128 | 139 | new_offsets.append_value(current_offset); |
129 | | - new_validity.append_false(); |
| 140 | + // Bit buffer already has this set to invalid. |
130 | 141 | continue; |
131 | 142 | } |
132 | 143 |
|
133 | 144 | let data_idx = data_idx |
134 | 145 | .to_usize() |
135 | 146 | .unwrap_or_else(|| vortex_panic!("Failed to convert index to usize: {}", data_idx)); |
136 | 147 |
|
137 | | - if data_validity.value(data_idx) { |
138 | | - let start = offsets[data_idx]; |
139 | | - let stop = offsets[data_idx + 1]; |
140 | | - |
141 | | - // See the note it the `take` on the reasoning |
142 | | - let additional = (stop - start).to_usize().unwrap_or_else(|| { |
143 | | - vortex_panic!("Failed to convert range length to usize: {}", stop - start) |
144 | | - }); |
145 | | - |
146 | | - elements_to_take.reserve_exact(additional); |
147 | | - for i in 0..additional { |
148 | | - elements_to_take |
149 | | - .append_value(start + O::from_usize(i).vortex_expect("i < additional")); |
150 | | - } |
151 | | - current_offset += stop - start; |
| 148 | + if !data_validity.value(data_idx) { |
152 | 149 | new_offsets.append_value(current_offset); |
153 | | - new_validity.append_true() |
154 | | - } else { |
155 | | - new_offsets.append_value(current_offset); |
156 | | - new_validity.append_false(); |
| 150 | + // Bit buffer already has this set to invalid. |
| 151 | + continue; |
157 | 152 | } |
| 153 | + |
| 154 | + let start = offsets[data_idx]; |
| 155 | + let stop = offsets[data_idx + 1]; |
| 156 | + |
| 157 | + // See the note it the `take` on the reasoning |
| 158 | + let additional = (stop - start).to_usize().unwrap_or_else(|| { |
| 159 | + vortex_panic!("Failed to convert range length to usize: {}", stop - start) |
| 160 | + }); |
| 161 | + |
| 162 | + elements_to_take.reserve_exact(additional); |
| 163 | + for i in 0..additional { |
| 164 | + elements_to_take.append_value(start + O::from_usize(i).vortex_expect("i < additional")); |
| 165 | + } |
| 166 | + current_offset += stop - start; |
| 167 | + new_offsets.append_value(current_offset); |
| 168 | + new_validity.set(idx); |
158 | 169 | } |
159 | 170 |
|
160 | 171 | let elements_to_take = elements_to_take.finish(); |
161 | 172 | let new_offsets = new_offsets.finish(); |
162 | | - let new_elements = compute::take(array.elements(), elements_to_take.as_ref())?; |
| 173 | + let new_elements = take(array.elements(), elements_to_take.as_ref())?; |
163 | 174 |
|
164 | | - let new_validity: Validity = Validity::from(new_validity.freeze()); |
| 175 | + let new_validity = Validity::from(new_validity.freeze()); |
165 | 176 | // data are indexes are nullable, so the final result is also nullable. |
166 | 177 |
|
167 | 178 | Ok(ListArray::try_new(new_elements, new_offsets, new_validity)?.to_array()) |
|
0 commit comments