Skip to content

Commit a8bbe86

Browse files
committed
update all ListViewArray construction with is_zctl
Signed-off-by: Connor Tsui <[email protected]>
1 parent f2efda0 commit a8bbe86

File tree

27 files changed

+580
-807
lines changed

27 files changed

+580
-807
lines changed

encodings/sparse/src/canonical.rs

Lines changed: 47 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -492,8 +492,8 @@ mod test {
492492

493493
use rstest::rstest;
494494
use vortex_array::arrays::{
495-
BoolArray, DecimalArray, FixedSizeListArray, ListArray, ListViewArray, ListViewShape,
496-
PrimitiveArray, StructArray, VarBinArray, VarBinViewArray,
495+
BoolArray, DecimalArray, FixedSizeListArray, ListArray, ListViewArray, PrimitiveArray,
496+
StructArray, VarBinArray, VarBinViewArray,
497497
};
498498
use vortex_array::arrow::IntoArrowArray as _;
499499
use vortex_array::validity::Validity;
@@ -930,14 +930,15 @@ mod test {
930930
// List 3: [2] at offset 3, size 1
931931
let offsets = buffer![0u32, 1, 2, 3].into_array();
932932
let sizes = buffer![1u32, 1, 1, 1].into_array();
933-
let lists = ListViewArray::try_new(
934-
elements,
935-
offsets,
936-
sizes,
937-
Validity::AllValid,
938-
ListViewShape::as_zero_copy_to_list(),
939-
)
940-
.unwrap()
933+
let lists = unsafe {
934+
ListViewArray::new_unchecked(
935+
elements,
936+
offsets,
937+
sizes,
938+
Validity::AllValid,
939+
true, // Is zero-copy to list.
940+
)
941+
}
941942
.into_array();
942943

943944
let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
@@ -983,14 +984,15 @@ mod test {
983984
let elements = buffer![1i32, 2, 1, 2, 1, 2, 1, 2].into_array();
984985
let offsets = buffer![0u32, 1, 2, 3, 4, 5, 6, 7].into_array();
985986
let sizes = buffer![1u32, 1, 1, 1, 1, 1, 1, 1].into_array();
986-
let lists = ListViewArray::try_new(
987-
elements,
988-
offsets,
989-
sizes,
990-
Validity::AllValid,
991-
ListViewShape::as_zero_copy_to_list(),
992-
)
993-
.unwrap()
987+
let lists = unsafe {
988+
ListViewArray::new_unchecked(
989+
elements,
990+
offsets,
991+
sizes,
992+
Validity::AllValid,
993+
true, // Is zero-copy to list.
994+
)
995+
}
994996
.into_array();
995997

996998
// Slice to get lists 2..6, which are: [1], [2], [1], [2]
@@ -1033,14 +1035,15 @@ mod test {
10331035
let elements = buffer![1i32, 2, 1, 2].into_array();
10341036
let offsets = buffer![0u32, 1, 2, 3].into_array();
10351037
let sizes = buffer![1u32, 1, 1, 1].into_array();
1036-
let lists = ListViewArray::try_new(
1037-
elements,
1038-
offsets,
1039-
sizes,
1040-
Validity::AllValid,
1041-
ListViewShape::as_zero_copy_to_list(),
1042-
)
1043-
.unwrap()
1038+
let lists = unsafe {
1039+
ListViewArray::new_unchecked(
1040+
elements,
1041+
offsets,
1042+
sizes,
1043+
Validity::AllValid,
1044+
true, // Is zero-copy to list.
1045+
)
1046+
}
10441047
.into_array();
10451048

10461049
let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
@@ -1401,14 +1404,15 @@ mod test {
14011404
let offsets = buffer![0u32, 3, 5].into_array();
14021405
let sizes = buffer![3u32, 2, 4].into_array();
14031406

1404-
let list_view = ListViewArray::try_new(
1405-
elements.clone(),
1406-
offsets,
1407-
sizes,
1408-
Validity::AllValid,
1409-
ListViewShape::as_zero_copy_to_list(),
1410-
)
1411-
.unwrap();
1407+
let list_view = unsafe {
1408+
ListViewArray::new_unchecked(
1409+
elements.clone(),
1410+
offsets,
1411+
sizes,
1412+
Validity::AllValid,
1413+
true, // Is zero-copy to list.
1414+
)
1415+
};
14121416

14131417
let list_dtype = list_view.dtype().clone();
14141418

@@ -1483,14 +1487,15 @@ mod test {
14831487
let offsets = buffer![0u32, 2, 5, 6, 8].into_array();
14841488
let sizes = buffer![2u32, 3, 1, 2, 2].into_array();
14851489

1486-
let full_listview = ListViewArray::try_new(
1487-
elements,
1488-
offsets,
1489-
sizes,
1490-
Validity::AllValid,
1491-
ListViewShape::as_zero_copy_to_list(),
1492-
)
1493-
.unwrap()
1490+
let full_listview = unsafe {
1491+
ListViewArray::new_unchecked(
1492+
elements,
1493+
offsets,
1494+
sizes,
1495+
Validity::AllValid,
1496+
true, // Is zero-copy to list.
1497+
)
1498+
}
14941499
.into_array();
14951500

14961501
// Slice to get lists 1, 2, 3 (indices 1..4)

fuzz/src/array/mask.rs

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,18 @@ pub fn mask_canonical_array(canonical: Canonical, mask: &Mask) -> VortexResult<A
5757
}
5858
Canonical::List(array) => {
5959
let new_validity = apply_mask_to_validity(array.validity(), mask);
60-
ListViewArray::try_new(
61-
array.elements().clone(),
62-
array.offsets().clone(),
63-
array.sizes().clone(),
64-
new_validity,
65-
array.shape(),
66-
)
67-
.vortex_unwrap()
60+
61+
// SAFETY: Since we are only masking the validity and everything else comes from an
62+
// already valid `ListViewArray`, all of the invariants are still upheld.
63+
unsafe {
64+
ListViewArray::new_unchecked(
65+
array.elements().clone(),
66+
array.offsets().clone(),
67+
array.sizes().clone(),
68+
new_validity,
69+
array.is_zero_copy_to_list(),
70+
)
71+
}
6872
.into_array()
6973
}
7074
Canonical::FixedSizeList(array) => {
@@ -131,8 +135,8 @@ fn apply_mask_to_validity(validity: &Validity, mask: &Mask) -> Validity {
131135
#[cfg(test)]
132136
mod tests {
133137
use vortex_array::arrays::{
134-
BoolArray, DecimalArray, FixedSizeListArray, ListViewArray, ListViewShape, NullArray,
135-
PrimitiveArray, StructArray, VarBinViewArray,
138+
BoolArray, DecimalArray, FixedSizeListArray, ListViewArray, NullArray, PrimitiveArray,
139+
StructArray, VarBinViewArray,
136140
};
137141
use vortex_array::{Array, IntoArray};
138142
use vortex_dtype::{DecimalDType, FieldNames, Nullability};
@@ -244,14 +248,15 @@ mod tests {
244248
let elements = PrimitiveArray::from_iter([1i32, 2, 3, 4, 5, 6]).into_array();
245249
let offsets = PrimitiveArray::from_iter([0i32, 2, 4]).into_array();
246250
let sizes = PrimitiveArray::from_iter([2i32, 2, 2]).into_array();
247-
let array = ListViewArray::try_new(
248-
elements,
249-
offsets,
250-
sizes,
251-
Nullability::NonNullable.into(),
252-
ListViewShape::as_zero_copy_to_list(),
253-
)
254-
.unwrap();
251+
let array = unsafe {
252+
ListViewArray::new_unchecked(
253+
elements,
254+
offsets,
255+
sizes,
256+
Nullability::NonNullable.into(),
257+
true, // Is zero-copy to list.
258+
)
259+
};
255260

256261
let mask = Mask::from_iter([false, true, false]);
257262

fuzz/src/array/slice.rs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,19 @@ pub fn slice_canonical_array(
7474
// Since the list view elements can be stored out of order, we cannot slice it.
7575
let elements = list_array.elements().clone();
7676

77-
ListViewArray::try_new(elements, offsets, sizes, validity, list_array.shape())
78-
.map(|a| a.into_array())
77+
// SAFETY: If the array was already zero-copyable to list, slicing the offsets and sizes
78+
// only causes there to be leading and trailing garbage data, which is still
79+
// zero-copyable to a `ListArray`.
80+
Ok(unsafe {
81+
ListViewArray::new_unchecked(
82+
elements,
83+
offsets,
84+
sizes,
85+
validity,
86+
list_array.is_zero_copy_to_list(),
87+
)
88+
}
89+
.into_array())
7990
}
8091
DType::FixedSizeList(..) => {
8192
let fsl_array = array.to_fixed_size_list();

vortex-array/src/arrays/listview/array.rs

Lines changed: 48 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,12 @@ use crate::{Array, ArrayRef, ToCanonical};
5050
/// let offsets = buffer![2u32, 0, 1].into_array(); // Out-of-order offsets
5151
/// let sizes = buffer![2u32, 1, 2].into_array(); // The sizes cause overlaps
5252
///
53-
/// let list_view = ListViewArray::try_new(
53+
/// let list_view = ListViewArray::new(
5454
/// elements.into_array(),
5555
/// offsets.into_array(),
5656
/// sizes.into_array(),
5757
/// Validity::NonNullable,
58-
/// false, // NOT zero-copyable to a `ListArray`.
59-
/// ).unwrap();
58+
/// );
6059
///
6160
/// assert_eq!(list_view.len(), 3);
6261
///
@@ -95,10 +94,13 @@ pub struct ListViewArray {
9594
/// we want to access.
9695
sizes: ArrayRef,
9796

98-
// TODO(connor): More docs. Also we need the n+1 memory allocation optimization.
99-
/// A flag denoting if the array is zero-copyable* to a [`ListArray`].
97+
// TODO(connor)[ListView]: Add the n+1 memory allocation optimization.
98+
/// A flag denoting if the array is zero-copyable* to a [`ListArray`](crate::arrays::ListArray).
10099
///
101100
/// We use this information to help us more efficiently rebuild / compact our data.
101+
///
102+
/// When this flag is true (indicating sorted offsets with no gaps and no overlaps), conversions
103+
/// can bypass the very expensive rebuild process (which just calls `append_scalar` in a loop).
102104
is_zero_copy_to_list: bool,
103105

104106
/// The validity / null map of the array.
@@ -118,14 +120,8 @@ impl ListViewArray {
118120
///
119121
/// Panics if the provided components do not satisfy the invariants documented
120122
/// in [`ListViewArray::new_unchecked`].
121-
pub fn new(
122-
elements: ArrayRef,
123-
offsets: ArrayRef,
124-
sizes: ArrayRef,
125-
validity: Validity,
126-
is_zctl: bool,
127-
) -> Self {
128-
Self::try_new(elements, offsets, sizes, validity, is_zctl)
123+
pub fn new(elements: ArrayRef, offsets: ArrayRef, sizes: ArrayRef, validity: Validity) -> Self {
124+
Self::try_new(elements, offsets, sizes, validity)
129125
.vortex_expect("`ListViewArray` construction failed")
130126
}
131127

@@ -140,16 +136,31 @@ impl ListViewArray {
140136
offsets: ArrayRef,
141137
sizes: ArrayRef,
142138
validity: Validity,
143-
is_zctl: bool,
144139
) -> VortexResult<Self> {
145-
Self::validate(&elements, &offsets, &sizes, &validity, is_zctl)?;
146-
147-
// SAFETY: validate ensures all invariants are met.
148-
Ok(unsafe { Self::new_unchecked(elements, offsets, sizes, validity, is_zctl) })
140+
Self::validate(&elements, &offsets, &sizes, &validity, false)?;
141+
142+
Ok(Self {
143+
dtype: DType::List(Arc::new(elements.dtype().clone()), validity.nullability()),
144+
elements,
145+
offsets,
146+
sizes,
147+
validity,
148+
is_zero_copy_to_list: false,
149+
stats_set: Default::default(),
150+
})
149151
}
150152

151153
/// Creates a new [`ListViewArray`] without validation.
152154
///
155+
/// Note that this constructor is slightly different from [`new()`] and [`try_new()`] in that it
156+
/// also takes an `is_zctl` flag. This is an optimization flag that allows conversion to a
157+
/// [`ListArray`] more efficient. The caller must ensure that the flag is correct when passing
158+
/// it to this unsafe constructor.
159+
///
160+
/// [`ListArray`]: crate::arrays::ListArray
161+
/// [`new()`]: Self::new
162+
/// [`try_new()`]: Self::try_new
163+
///
153164
/// # Safety
154165
///
155166
/// The caller must ensure all of the following invariants are satisfied:
@@ -170,17 +181,18 @@ impl ListViewArray {
170181
is_zctl: bool,
171182
) -> Self {
172183
if cfg!(debug_assertions) {
173-
Self::new(elements, offsets, sizes, validity, is_zctl)
174-
} else {
175-
Self {
176-
dtype: DType::List(Arc::new(elements.dtype().clone()), validity.nullability()),
177-
elements,
178-
offsets,
179-
sizes,
180-
validity,
181-
is_zero_copy_to_list: is_zctl,
182-
stats_set: Default::default(),
183-
}
184+
Self::validate(&elements, &offsets, &sizes, &validity, is_zctl)
185+
.vortex_expect("Failed to crate `ListViewArray`");
186+
}
187+
188+
Self {
189+
dtype: DType::List(Arc::new(elements.dtype().clone()), validity.nullability()),
190+
elements,
191+
offsets,
192+
sizes,
193+
validity,
194+
is_zero_copy_to_list: is_zctl,
195+
stats_set: Default::default(),
184196
}
185197
}
186198

@@ -336,7 +348,8 @@ impl ListViewArray {
336348
&self.elements
337349
}
338350

339-
/// Returns true if the `ListViewArray` is zero-copyable to a [`ListArray`].
351+
/// Returns true if the `ListViewArray` is zero-copyable to a
352+
/// [`ListArray`](crate::arrays::ListArray).
340353
pub fn is_zero_copy_to_list(&self) -> bool {
341354
self.is_zero_copy_to_list
342355
}
@@ -393,7 +406,7 @@ where
393406
}
394407

395408
/// Helper function to validate if the [`ListViewArray`] components are actually zero-copyable to
396-
/// [`ListArray`].
409+
/// [`ListArray`](crate::arrays::ListArray).
397410
fn validate_zctl(
398411
elements: &dyn Array,
399412
offsets_primitive: PrimitiveArray,
@@ -407,6 +420,10 @@ fn validate_zctl(
407420
vortex_bail!("offsets must report is_sorted statistic");
408421
}
409422

423+
// TODO(connor)[ListView]: Making this allocation is expensive, but the more efficient
424+
// implementation would be even more complicated than this. We could use a bit buffer denoting
425+
// if positions in `elements` are used, and then additionally store a separate flag that tells
426+
// us if a position is used more than once.
410427
let mut element_references = vec![0u8; elements.len()];
411428

412429
fn count_references<O: IntegerPType, S: IntegerPType>(

vortex-array/src/arrays/listview/compute/filter.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use crate::compute::{self, FilterKernel, FilterKernelAdapter};
99
use crate::vtable::ValidityHelper;
1010
use crate::{ArrayRef, IntoArray, register_kernel};
1111

12+
// TODO(connor)[ListView]: Make use of this threshold after we start migrating operators.
1213
/// The threshold for triggering a rebuild of the [`ListViewArray`].
1314
///
1415
/// By default, we will not touch the underlying `elements` array of the [`ListViewArray`] since it
@@ -49,9 +50,6 @@ impl FilterKernel for ListViewVTable {
4950
let new_offsets = compute::filter(offsets.as_ref(), selection_mask)?;
5051
let new_sizes = compute::filter(sizes.as_ref(), selection_mask)?;
5152

52-
// Filters will create gaps of unused elements.
53-
let is_zctl = false;
54-
5553
// SAFETY: Filter operation maintains all `ListViewArray` invariants:
5654
// - Offsets and sizes are derived from existing valid child arrays.
5755
// - Offsets and sizes have the same length (both filtered by `selection_mask`).
@@ -62,7 +60,8 @@ impl FilterKernel for ListViewVTable {
6260
new_offsets,
6361
new_sizes,
6462
new_validity,
65-
is_zctl,
63+
// Filters will create gaps of unused elements.
64+
false,
6665
)
6766
};
6867

0 commit comments

Comments
 (0)