Skip to content

Commit 8141387

Browse files
committed
implement more efficient `extend_from_array_unchecked (ugly)
Signed-off-by: Connor Tsui <[email protected]>
1 parent cc1f859 commit 8141387

File tree

5 files changed

+119
-52
lines changed

5 files changed

+119
-52
lines changed

vortex-array/src/arrays/chunked/vtable/canonical.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -124,9 +124,9 @@ fn swizzle_list_chunks(
124124

125125
for chunk in chunks {
126126
let chunk_array = chunk.to_listview();
127-
// By rebuilding as zero-copy to `List`, we make the final output `ListView` also
128-
// zero-copyable to `List`.
129-
let chunk_array = chunk_array.rebuild(ListViewRebuildMode::MakeZeroCopyToList);
127+
// By rebuilding as zero-copy to `List` and trimming all elements (to prevent gaps), we make
128+
// the final output `ListView` also zero-copyable to `List`.
129+
let chunk_array = chunk_array.rebuild(ListViewRebuildMode::MakeExact);
130130

131131
// Add the `elements` of the current array as a new chunk.
132132
list_elements_chunks.push(chunk_array.elements().clone());

vortex-array/src/arrays/listview/conversion.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,8 @@ mod tests {
281281
};
282282
use super::recursive_list_from_list_view;
283283
use crate::arrays::{
284-
list_from_list_view, list_view_from_list, BoolArray, FixedSizeListArray, ListArray, ListViewArray, ListViewShape, PrimitiveArray, StructArray
284+
BoolArray, FixedSizeListArray, ListArray, ListViewArray, ListViewShape, PrimitiveArray,
285+
StructArray, list_from_list_view, list_view_from_list,
285286
};
286287
use crate::validity::Validity;
287288
use crate::vtable::ValidityHelper;

vortex-array/src/arrays/listview/rebuild.rs

Lines changed: 36 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@ pub enum ListViewRebuildMode {
2626
/// the [`ListViewArray`].
2727
TrimElements,
2828

29+
/// Equivalent to `MakeZeroCopyToList` plus `TrimElements`.
30+
///
31+
/// TODO more docs.
32+
MakeExact,
33+
2934
/// Removes any unused data from the underlying `elements` array.
3035
///
3136
/// This mode will rebuild the `elements` array in the process, but it will keep any overlapping
@@ -63,6 +68,9 @@ impl ListViewArray {
6368
match mode {
6469
ListViewRebuildMode::MakeZeroCopyToList => self.rebuild_zero_copy_to_list(),
6570
ListViewRebuildMode::TrimElements => self.rebuild_trim_elements(),
71+
ListViewRebuildMode::MakeExact => {
72+
self.rebuild_zero_copy_to_list().rebuild_trim_elements()
73+
}
6674
ListViewRebuildMode::RemoveGaps => self.rebuild_remove_gaps::<false>(),
6775
ListViewRebuildMode::RemoveNulls => self.rebuild_remove_gaps::<true>(),
6876
ListViewRebuildMode::OverlapCompression => unimplemented!("Does P=NP?"),
@@ -82,27 +90,38 @@ impl ListViewArray {
8290
return self.clone();
8391
}
8492

93+
let offsets_ptype = self.offsets().dtype().as_ptype();
94+
let sizes_ptype = self.sizes().dtype().as_ptype();
95+
96+
match_each_integer_ptype!(offsets_ptype, |O| {
97+
match_each_integer_ptype!(sizes_ptype, |S| { self.naive_rebuild::<O, S>() })
98+
})
99+
}
100+
101+
/// The inner function for `rebuild_zero_copy_to_list`, which naively rebuilds a `ListViewArray`
102+
/// via `append_scalar`.
103+
fn naive_rebuild<O: IntegerPType, S: IntegerPType>(&self) -> ListViewArray {
85104
let element_dtype = self
86105
.dtype()
87106
.as_list_element_opt()
88107
.vortex_expect("somehow had a canonical list that was not a list");
89108

90-
let offsets_ptype = self.offsets().dtype().as_ptype();
91-
let sizes_ptype = self.sizes().dtype().as_ptype();
109+
let mut builder = ListViewBuilder::<O, S>::with_capacity(
110+
element_dtype.clone(),
111+
self.dtype().nullability(),
112+
self.elements().len(),
113+
self.len(),
114+
);
92115

93-
match_each_integer_ptype!(offsets_ptype, |O| {
94-
match_each_integer_ptype!(sizes_ptype, |S| {
95-
let mut builder = ListViewBuilder::<O, S>::with_capacity(
96-
element_dtype.clone(),
97-
self.dtype().nullability(),
98-
self.elements().len(),
99-
self.len(),
100-
);
101-
102-
builder.extend_from_array(self.as_ref());
103-
builder.finish_into_listview()
104-
})
105-
})
116+
for i in 0..self.len() {
117+
let list = self.scalar_at(i);
118+
119+
builder
120+
.append_scalar(&list)
121+
.vortex_expect("was unable to extend the `ListViewBuilder`")
122+
}
123+
124+
builder.finish_into_listview()
106125
}
107126

108127
/// Rebuilds a [`ListViewArray`] by trimming any unused / unreferenced leading and trailing
@@ -658,9 +677,8 @@ mod tests {
658677
let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3, 4, 5]).into_array();
659678
let offsets = PrimitiveArray::from_iter(vec![0u32, 2, 4]).into_array();
660679
let sizes = PrimitiveArray::from_iter(vec![2u32, 2, 1]).into_array();
661-
let validity = Validity::Array(
662-
BoolArray::from(BitBuffer::from(vec![true, false, true])).into_array(),
663-
);
680+
let validity =
681+
Validity::Array(BoolArray::from(BitBuffer::from(vec![true, false, true])).into_array());
664682

665683
let listview = ListViewArray::try_new(
666684
elements,

vortex-array/src/builders/listview.rs

Lines changed: 70 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,18 @@
1010
1111
use std::sync::Arc;
1212

13-
use vortex_dtype::{DType, IntegerPType, Nullability};
13+
use vortex_dtype::{DType, IntegerPType, Nullability, match_each_integer_ptype};
1414
use vortex_error::{VortexExpect, VortexResult, vortex_ensure, vortex_panic};
1515
use vortex_mask::Mask;
1616
use vortex_scalar::{ListScalar, Scalar};
1717

1818
use crate::array::{Array, ArrayRef, IntoArray};
19-
use crate::arrays::{ListViewArray, ListViewShape};
19+
use crate::arrays::{ListViewArray, ListViewRebuildMode, ListViewShape, PrimitiveArray};
2020
use crate::builders::lazy_null_builder::LazyBitBufferBuilder;
2121
use crate::builders::{
22-
ArrayBuilder, DEFAULT_BUILDER_CAPACITY, PrimitiveBuilder, builder_with_capacity,
22+
ArrayBuilder, DEFAULT_BUILDER_CAPACITY, PrimitiveBuilder, UninitRange, builder_with_capacity,
2323
};
24-
use crate::{Canonical, ToCanonical};
24+
use crate::{Canonical, ToCanonical, compute};
2525

2626
/// A builder for creating [`ListViewArray`] instances, parameterized by the [`IntegerPType`] of
2727
/// the `offsets` and the `sizes` builders.
@@ -236,28 +236,74 @@ impl<O: IntegerPType, S: IntegerPType> ArrayBuilder for ListViewBuilder<O, S> {
236236
}
237237

238238
unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array) {
239-
let listview_array = array.to_listview();
240-
if listview_array.is_empty() {
239+
let listview = array.to_listview();
240+
if listview.is_empty() {
241241
return;
242242
}
243243

244-
// TODO NOW: Rebuild the array we are extending by to be zero-copy to list and then be
245-
// smarter about this.
246-
247-
// TODO(connor)[ListView]: We could potentially concatenate the new elements on top of the
248-
// existing elements and recalculate offsets (and then use `UninitRange`). However, that
249-
// would mean we lose the guarantee that the output `ListViewArray` does not look like a
250-
// `ListArray` (because the incoming array could have garbage data).
251-
252-
// We assume the worst case scenario, where the list view array is stored completely out of
253-
// order, with many out-of-order offsets, and lots of garbage data. Thus, we simply iterate
254-
// over all of the lists in the array and copy the data into this builder.
255-
for i in 0..listview_array.len() {
256-
let list = listview_array.scalar_at(i);
257-
258-
self.append_scalar(&list)
259-
.vortex_expect("was unable to extend the `ListViewBuilder`")
244+
// If we do not have the guarantee that the array is zero-copyable to a list, then we have
245+
// to manually append each scalar.
246+
if !listview.shape().is_zero_copy_to_list() {
247+
for i in 0..listview.len() {
248+
let list = listview.scalar_at(i);
249+
250+
self.append_scalar(&list)
251+
.vortex_expect("was unable to extend the `ListViewBuilder`")
252+
}
260253
}
254+
255+
// Otherwise, after removing any leading and trailing elements, we can simply bulk append
256+
// the entire array.
257+
let listview = listview.rebuild(ListViewRebuildMode::TrimElements);
258+
259+
self.nulls.append_validity_mask(array.validity_mask());
260+
self.elements_builder.extend_from_array(listview.elements());
261+
262+
self.sizes_builder.extend_from_array(
263+
compute::cast(listview.sizes(), self.sizes_builder.dtype())
264+
.vortex_expect(
265+
"was somehow unable to cast the new sizes to the type of the builder sizes",
266+
)
267+
.as_ref(),
268+
);
269+
270+
// Adjust all of the offsets.
271+
let new_offsets = listview.offsets().to_primitive(); // This should be cheap.
272+
let num_new_offsets = new_offsets.len();
273+
274+
let curr_builder_len = self.len();
275+
self.offsets_builder.reserve_exact(num_new_offsets);
276+
277+
let uninit_range = self.offsets_builder.uninit_range(num_new_offsets);
278+
279+
fn adjust_offsets<'a, O: IntegerPType, A: IntegerPType>(
280+
mut uninit_range: UninitRange<'a, O>,
281+
new_offsets: PrimitiveArray,
282+
curr_builder_len: usize,
283+
) {
284+
let new_offsets_slice = new_offsets.as_slice::<A>();
285+
let curr_builder_len = O::from_usize(curr_builder_len)
286+
.vortex_expect("the builder length did not fit into the offset type (impossible)");
287+
288+
for i in 0..uninit_range.len() {
289+
let new_offset = new_offsets_slice[i];
290+
let new_offset_correct_type = O::from_usize(
291+
new_offset
292+
.to_usize()
293+
.vortex_expect("Offsets must always fit in usize"),
294+
)
295+
.vortex_expect("New offset somehow did not fit into the builder's offset type");
296+
297+
uninit_range.set_value(i, new_offset_correct_type + curr_builder_len);
298+
}
299+
// SAFETY: We have set all the values in the range, and since `offsets` are
300+
// non-nullable, we are done.
301+
unsafe { uninit_range.finish() };
302+
}
303+
304+
match_each_integer_ptype!(new_offsets.ptype(), |A| {
305+
adjust_offsets::<O, A>(uninit_range, new_offsets, curr_builder_len);
306+
})
261307
}
262308

263309
fn reserve_exact(&mut self, capacity: usize) {
@@ -481,19 +527,15 @@ mod tests {
481527
.unwrap();
482528

483529
// Extend from the ListArray.
484-
unsafe {
485-
builder.extend_from_array_unchecked(&source.into_array());
486-
}
530+
builder.extend_from_array(&source.into_array());
487531

488532
// Extend from empty array (should be no-op).
489533
let empty_source = ListArray::from_iter_opt_slow::<u32, _, Vec<i32>>(
490534
std::iter::empty::<Option<Vec<i32>>>(),
491535
Arc::new(I32.into()),
492536
)
493537
.unwrap();
494-
unsafe {
495-
builder.extend_from_array_unchecked(&empty_source.into_array());
496-
}
538+
builder.extend_from_array(&empty_source.into_array());
497539

498540
let listview = builder.finish_into_listview();
499541
assert_eq!(listview.len(), 4);

vortex-array/src/compute/list_contains.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ pub(crate) fn warm_up_vtable() -> usize {
5353
///
5454
/// ```rust
5555
/// # use vortex_array::{Array, IntoArray, ToCanonical};
56-
/// # use vortex_array::arrays::{ConstantArray, ListViewArray, VarBinArray};
56+
/// # use vortex_array::arrays::{ConstantArray, ListViewArray, ListViewShape, VarBinArray};
5757
/// # use vortex_array::compute;
5858
/// # use vortex_array::validity::Validity;
5959
/// # use vortex_buffer::{buffer, bitbuffer};
@@ -65,7 +65,13 @@ pub(crate) fn warm_up_vtable() -> usize {
6565
/// let offsets = buffer![0u32, 1, 3].into_array();
6666
/// let sizes = buffer![1u32, 2, 2].into_array();
6767
/// let list_array =
68-
/// ListViewArray::try_new(elements, offsets, sizes, Validity::NonNullable).unwrap();
68+
/// ListViewArray::try_new(
69+
/// elements,
70+
/// offsets,
71+
/// sizes,
72+
/// Validity::NonNullable,
73+
/// ListViewShape::as_zero_copy_to_list(),
74+
/// ).unwrap();
6975
///
7076
/// let matches = compute::list_contains(
7177
/// list_array.as_ref(),

0 commit comments

Comments
 (0)