Skip to content

Commit 2d28ba3

Browse files
authored
ListView to List (#5802)
Signed-off-by: Nicholas Gates <[email protected]>
1 parent f6a3202 commit 2d28ba3

File tree

2 files changed

+74
-15
lines changed

2 files changed

+74
-15
lines changed

vortex-array/src/arrays/chunked/vtable/mod.rs

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,19 +10,14 @@ use vortex_error::VortexResult;
1010
use vortex_error::vortex_bail;
1111
use vortex_error::vortex_ensure;
1212
use vortex_error::vortex_err;
13-
use vortex_vector::Vector;
14-
use vortex_vector::VectorMut;
15-
use vortex_vector::VectorMutOps;
1613

1714
use crate::ArrayRef;
1815
use crate::Canonical;
1916
use crate::EmptyMetadata;
2017
use crate::IntoArray;
2118
use crate::ToCanonical;
22-
use crate::VectorExecutor;
2319
use crate::arrays::ChunkedArray;
2420
use crate::arrays::PrimitiveArray;
25-
use crate::executor::ExecutionCtx;
2621
use crate::serde::ArrayChildren;
2722
use crate::validity::Validity;
2823
use crate::vtable;
@@ -164,15 +159,6 @@ impl VTable for ChunkedVTable {
164159
Ok(())
165160
}
166161

167-
fn execute(array: &Self::Array, ctx: &mut ExecutionCtx) -> VortexResult<Vector> {
168-
let mut vector = VectorMut::with_capacity(&array.dtype, array.len);
169-
for chunk in &array.chunks {
170-
let chunk_vector = chunk.execute(ctx)?;
171-
vector.extend_from_vector(&chunk_vector);
172-
}
173-
Ok(vector.freeze())
174-
}
175-
176162
fn reduce(array: &Self::Array) -> VortexResult<Option<ArrayRef>> {
177163
Ok(match array.chunks.len() {
178164
0 => Some(Canonical::empty(array.dtype()).into_array()),

vortex-array/src/arrow/executor/list.rs

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4+
use std::any::type_name;
45
use std::sync::Arc;
56

67
use arrow_array::Array;
@@ -19,6 +20,7 @@ use vortex_error::VortexError;
1920
use vortex_error::VortexExpect;
2021
use vortex_error::VortexResult;
2122
use vortex_error::vortex_ensure;
23+
use vortex_error::vortex_err;
2224
use vortex_session::VortexSession;
2325

2426
use crate::ArrayRef;
@@ -28,9 +30,11 @@ use crate::arrays::ListArray;
2830
use crate::arrays::ListVTable;
2931
use crate::arrays::ListViewArray;
3032
use crate::arrays::ListViewVTable;
33+
use crate::arrays::PrimitiveArray;
3134
use crate::arrow::ArrowArrayExecutor;
3235
use crate::arrow::executor::validity::to_arrow_null_buffer;
3336
use crate::builtins::ArrayBuiltins;
37+
use crate::validity::Validity;
3438
use crate::vtable::ValidityHelper;
3539

3640
/// Convert a Vortex array into an Arrow GenericBinaryArray.
@@ -49,8 +53,9 @@ pub(super) fn to_arrow_list<O: OffsetSizeTrait + NativePType>(
4953
Ok(array) => {
5054
if array.is_zero_copy_to_list() {
5155
return list_view_zctl::<O>(array, elements_field, session);
56+
} else {
57+
return list_view_to_list::<O>(array, elements_field, session);
5258
}
53-
array.into_array()
5459
}
5560
Err(a) => a,
5661
};
@@ -159,3 +164,71 @@ fn list_view_zctl<O: OffsetSizeTrait + NativePType>(
159164
null_buffer,
160165
)))
161166
}
167+
168+
fn list_view_to_list<O: OffsetSizeTrait + NativePType>(
169+
array: ListViewArray,
170+
elements_field: &FieldRef,
171+
session: &VortexSession,
172+
) -> VortexResult<ArrowArrayRef> {
173+
let (elements, offsets, sizes, validity) = array.into_parts();
174+
175+
let offsets = offsets
176+
.cast(DType::Primitive(O::PTYPE, Nullability::NonNullable))?
177+
.execute_vector(session)?
178+
.into_primitive()
179+
.downcast::<O>()
180+
.into_nonnull_buffer();
181+
let sizes = sizes
182+
.cast(DType::Primitive(O::PTYPE, Nullability::NonNullable))?
183+
.execute_vector(session)?
184+
.into_primitive()
185+
.downcast::<O>()
186+
.into_nonnull_buffer();
187+
188+
// We create a new offsets buffer for the final list array.
189+
// And we also create an `indices` buffer for taking the elements.
190+
let mut new_offsets = BufferMut::<O>::with_capacity(offsets.len() + 1);
191+
let mut take_indices = BufferMut::<u32>::with_capacity(elements.len());
192+
193+
// Add the offset for the first subarray
194+
new_offsets.push(O::zero());
195+
for (offset, size) in offsets.iter().zip(sizes.iter()) {
196+
let offset = offset.as_usize();
197+
let size = size.as_usize();
198+
let end = offset + size;
199+
for j in offset..end {
200+
take_indices.push(u32::try_from(j).map_err(|_| {
201+
vortex_err!("List array too large for {} indices", type_name::<O>())
202+
})?);
203+
}
204+
new_offsets.push(O::usize_as(take_indices.len()));
205+
}
206+
207+
// Now we can "take" the elements using the computed indices.
208+
let elements =
209+
elements.take(PrimitiveArray::new(take_indices, Validity::NonNullable).into_array())?;
210+
211+
let elements = elements.execute_arrow(elements_field.data_type(), session)?;
212+
vortex_ensure!(
213+
elements_field.is_nullable() || elements.null_count() == 0,
214+
"Cannot convert to non-nullable Arrow array with null elements"
215+
);
216+
217+
// We need to compute the final offsets from the sizes.
218+
let mut final_offsets = Vec::with_capacity(sizes.len() + 1);
219+
final_offsets.push(O::usize_as(0));
220+
for i in 0..sizes.len() {
221+
let last_offset = final_offsets[i].as_usize();
222+
let size = sizes[i].as_usize();
223+
final_offsets.push(O::usize_as(last_offset + size));
224+
}
225+
226+
let null_buffer = to_arrow_null_buffer(&validity, sizes.len(), session)?;
227+
228+
Ok(Arc::new(GenericListArray::<O>::new(
229+
elements_field.clone(),
230+
offsets.into_arrow_offset_buffer(),
231+
elements,
232+
null_buffer,
233+
)))
234+
}

0 commit comments

Comments
 (0)