Skip to content

Commit f2efda0

Browse files
committed
remove ListViewShape and replace with is_zctl
Signed-off-by: Connor Tsui <[email protected]>
1 parent 1570b52 commit f2efda0

File tree

16 files changed

+142
-281
lines changed

16 files changed

+142
-281
lines changed

vortex-array/src/arrays/chunked/vtable/canonical.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@ use vortex_dtype::{DType, Nullability, PType, StructFields};
66
use vortex_error::VortexExpect;
77

88
use crate::arrays::{
9-
ChunkedArray, ChunkedVTable, ListViewArray, ListViewRebuildMode, ListViewShape, PrimitiveArray,
10-
StructArray,
9+
ChunkedArray, ChunkedVTable, ListViewArray, ListViewRebuildMode, PrimitiveArray, StructArray,
1110
};
1211
use crate::builders::{ArrayBuilder, builder_with_capacity};
1312
use crate::compute::cast;
@@ -166,14 +165,14 @@ fn swizzle_list_chunks(
166165

167166
// Since we made sure that all chunk were zero-copyable to list above, we know that the final
168167
// output is also zero-copyable to a list.
169-
let shape = ListViewShape::as_zero_copy_to_list();
168+
let is_zctl = true;
170169

171170
// SAFETY:
172171
// - `offsets` and `sizes` are non-nullable u64 arrays of the same length
173172
// - Each `offset[i] + size[i]` list view is within bounds of elements array because it came
174173
// from valid chunks
175174
// - Validity came from the outer chunked array so it must have the same length
176-
unsafe { ListViewArray::new_unchecked(chunked_elements, offsets, sizes, validity, shape) }
175+
unsafe { ListViewArray::new_unchecked(chunked_elements, offsets, sizes, validity, is_zctl) }
177176
}
178177

179178
#[cfg(test)]

vortex-array/src/arrays/constant/vtable/canonical.rs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use crate::arrays::constant::ConstantArray;
1616
use crate::arrays::primitive::PrimitiveArray;
1717
use crate::arrays::{
1818
BoolArray, ConstantVTable, DecimalArray, ExtensionArray, FixedSizeListArray, ListViewArray,
19-
ListViewShape, NullArray, StructArray, VarBinViewArray, smallest_decimal_value_type,
19+
NullArray, StructArray, VarBinViewArray, smallest_decimal_value_type,
2020
};
2121
use crate::builders::builder_with_capacity;
2222
use crate::validity::Validity;
@@ -253,14 +253,13 @@ fn constant_canonical_list_array(scalar: &Scalar, len: usize) -> ListViewArray {
253253
debug_assert!(!offsets.dtype().is_nullable());
254254
debug_assert!(!sizes.dtype().is_nullable());
255255

256-
// Since everything is pointing to the same exact thing, the offsets are sorted and there are no
257-
// gaps in the shape.
258-
let shape = ListViewShape::as_zero_copy_to_list().with_no_overlaps(false);
256+
// Since all views are pointing to the same exact thing, it is not zero-copyable to `ListArray`.
257+
let is_zctl = false;
259258

260259
// SAFETY: All views point to the same range [0, list.len()) in the elements array.
261260
// The elements array contains `len` copies of the same value, offsets are all 0,
262261
// and sizes are all equal to the list length. The validity matches the scalar's nullability.
263-
unsafe { ListViewArray::new_unchecked(elements, offsets, sizes, validity, shape) }
262+
unsafe { ListViewArray::new_unchecked(elements, offsets, sizes, validity, is_zctl) }
264263
}
265264

266265
fn constant_canonical_fixed_size_list_array(

vortex-array/src/arrays/list/array.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,11 @@ impl ListArray {
269269
&self.elements
270270
}
271271

272+
// TODO(connor)[ListView]: Create 2 functions `reset_offsets` and `recursive_reset_offsets`,
273+
// where `reset_offsets` is infallible.
274+
// Also, `reset_offsets` can be made more efficient by replacing `sub_scalar` with a match on
275+
// the offset type and manual subtraction and fast path where `offsets[0] == 0`.
276+
272277
/// Create a copy of this array by adjusting `offsets` to start at `0` and removing elements not
273278
/// referenced by the `offsets`.
274279
pub fn reset_offsets(&self, recurse: bool) -> VortexResult<Self> {

vortex-array/src/arrays/listview/array.rs

Lines changed: 60 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use num_traits::AsPrimitive;
77
use vortex_dtype::{DType, IntegerPType, match_each_integer_ptype};
88
use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_ensure, vortex_err};
99

10-
use crate::arrays::{ListViewShape, PrimitiveArray, PrimitiveVTable};
10+
use crate::arrays::{PrimitiveArray, PrimitiveVTable, bool};
1111
use crate::stats::ArrayStats;
1212
use crate::validity::Validity;
1313
use crate::{Array, ArrayRef, ToCanonical};
@@ -37,7 +37,7 @@ use crate::{Array, ArrayRef, ToCanonical};
3737
/// # Examples
3838
///
3939
/// ```
40-
/// # use vortex_array::arrays::{ListViewArray, ListViewShape, PrimitiveArray};
40+
/// # use vortex_array::arrays::{ListViewArray, PrimitiveArray};
4141
/// # use vortex_array::validity::Validity;
4242
/// # use vortex_array::IntoArray;
4343
/// # use vortex_buffer::buffer;
@@ -55,9 +55,7 @@ use crate::{Array, ArrayRef, ToCanonical};
5555
/// offsets.into_array(),
5656
/// sizes.into_array(),
5757
/// Validity::NonNullable,
58-
/// ListViewShape::as_zero_copy_to_list()
59-
/// .with_sorted_offsets(false)
60-
/// .with_no_overlaps(false),
58+
/// false, // NOT zero-copyable to a `ListArray`.
6159
/// ).unwrap();
6260
///
6361
/// assert_eq!(list_view.len(), 3);
@@ -97,12 +95,11 @@ pub struct ListViewArray {
9795
/// we want to access.
9896
sizes: ArrayRef,
9997

100-
/// The "shape" of the `ListViewArray` data.
98+
// TODO(connor): More docs. Also we need the n+1 memory allocation optimization.
99+
/// A flag denoting if the array is zero-copyable* to a [`ListArray`].
101100
///
102101
/// We use this information to help us more efficiently rebuild / compact our data.
103-
///
104-
/// See the documentation for [`ListViewShape`] for more information.
105-
shape: ListViewShape,
102+
is_zero_copy_to_list: bool,
106103

107104
/// The validity / null map of the array.
108105
///
@@ -126,10 +123,10 @@ impl ListViewArray {
126123
offsets: ArrayRef,
127124
sizes: ArrayRef,
128125
validity: Validity,
129-
shape: ListViewShape,
126+
is_zctl: bool,
130127
) -> Self {
131-
Self::try_new(elements, offsets, sizes, validity, shape)
132-
.vortex_expect("ListViewArray construction failed")
128+
Self::try_new(elements, offsets, sizes, validity, is_zctl)
129+
.vortex_expect("`ListViewArray` construction failed")
133130
}
134131

135132
/// Constructs a new `ListViewArray`.
@@ -143,12 +140,12 @@ impl ListViewArray {
143140
offsets: ArrayRef,
144141
sizes: ArrayRef,
145142
validity: Validity,
146-
shape: ListViewShape,
143+
is_zctl: bool,
147144
) -> VortexResult<Self> {
148-
Self::validate(&elements, &offsets, &sizes, &validity, shape)?;
145+
Self::validate(&elements, &offsets, &sizes, &validity, is_zctl)?;
149146

150147
// SAFETY: validate ensures all invariants are met.
151-
Ok(unsafe { Self::new_unchecked(elements, offsets, sizes, validity, shape) })
148+
Ok(unsafe { Self::new_unchecked(elements, offsets, sizes, validity, is_zctl) })
152149
}
153150

154151
/// Creates a new [`ListViewArray`] without validation.
@@ -163,26 +160,27 @@ impl ListViewArray {
163160
/// - For each `i`, `offsets[i] + sizes[i]` must not overflow and must be `<= elements.len()`
164161
/// (even if the corresponding view is defined as null by the validity array).
165162
/// - If validity is an array, its length must equal `offsets.len()`.
166-
/// - The `shape` that is passed in correctly describes the shape of the `ListViewArray` data.
163+
/// - If the `is_zctl` flag is true, then the [`ListViewArray`] is zero-copyable to a
164+
/// [`ListArray`].
167165
pub unsafe fn new_unchecked(
168166
elements: ArrayRef,
169167
offsets: ArrayRef,
170168
sizes: ArrayRef,
171169
validity: Validity,
172-
shape: ListViewShape,
170+
is_zctl: bool,
173171
) -> Self {
174-
#[cfg(debug_assertions)]
175-
Self::validate(&elements, &offsets, &sizes, &validity, shape)
176-
.vortex_expect("[Debug Assertion]: Invalid `ListViewArray` parameters");
177-
178-
Self {
179-
dtype: DType::List(Arc::new(elements.dtype().clone()), validity.nullability()),
180-
elements,
181-
offsets,
182-
sizes,
183-
validity,
184-
shape,
185-
stats_set: Default::default(),
172+
if cfg!(debug_assertions) {
173+
Self::new(elements, offsets, sizes, validity, is_zctl)
174+
} else {
175+
Self {
176+
dtype: DType::List(Arc::new(elements.dtype().clone()), validity.nullability()),
177+
elements,
178+
offsets,
179+
sizes,
180+
validity,
181+
is_zero_copy_to_list: is_zctl,
182+
stats_set: Default::default(),
183+
}
186184
}
187185
}
188186

@@ -192,7 +190,7 @@ impl ListViewArray {
192190
offsets: &dyn Array,
193191
sizes: &dyn Array,
194192
validity: &Validity,
195-
shape: ListViewShape,
193+
is_zctl: bool,
196194
) -> VortexResult<()> {
197195
// Check that offsets and sizes are integer arrays and non-nullable.
198196
vortex_ensure!(
@@ -255,8 +253,10 @@ impl ListViewArray {
255253
})
256254
});
257255

258-
// Validate the `ListViewShape`.
259-
validate_shape(shape, elements, offsets_primitive, sizes_primitive)?;
256+
// Validate the zero-copy to `ListArray` flag.
257+
if is_zctl {
258+
validate_zctl(elements, offsets_primitive, sizes_primitive)?;
259+
}
260260

261261
Ok(())
262262
}
@@ -336,11 +336,9 @@ impl ListViewArray {
336336
&self.elements
337337
}
338338

339-
/// Returns the shape of the `ListViewArray`.
340-
///
341-
/// See the documentation of [`ListViewShape`] for more information.
342-
pub fn shape(&self) -> ListViewShape {
343-
self.shape
339+
/// Returns true if the `ListViewArray` is zero-copyable to a [`ListArray`].
340+
pub fn is_zero_copy_to_list(&self) -> bool {
341+
self.is_zero_copy_to_list
344342
}
345343
}
346344

@@ -394,25 +392,19 @@ where
394392
Ok(())
395393
}
396394

397-
/// Helper function to validate if the [`ListViewShape`] correctly describes the data.
398-
fn validate_shape(
399-
shape: ListViewShape,
395+
/// Helper function to validate if the [`ListViewArray`] components are actually zero-copyable to
396+
/// [`ListArray`].
397+
fn validate_zctl(
400398
elements: &dyn Array,
401399
offsets_primitive: PrimitiveArray,
402400
sizes_primitive: PrimitiveArray,
403401
) -> VortexResult<()> {
404-
if shape.has_sorted_offsets() {
405-
// Offsets must be sorted (but not strictly sorted, zero-length lists are allowed), even
406-
// if there are null views.
407-
if let Some(is_sorted) = offsets_primitive.statistics().compute_is_sorted() {
408-
vortex_ensure!(is_sorted, "offsets must be sorted");
409-
} else {
410-
vortex_bail!("offsets must report is_sorted statistic");
411-
}
412-
}
413-
414-
if !(shape.has_no_gaps() || shape.has_no_overlaps()) {
415-
return Ok(());
402+
// Offsets must be sorted (but not strictly sorted, zero-length lists are allowed), even
403+
// if there are null views.
404+
if let Some(is_sorted) = offsets_primitive.statistics().compute_is_sorted() {
405+
vortex_ensure!(is_sorted, "offsets must be sorted");
406+
} else {
407+
vortex_bail!("offsets must report is_sorted statistic");
416408
}
417409

418410
let mut element_references = vec![0u8; elements.len()];
@@ -426,7 +418,7 @@ fn validate_shape(
426418
let sizes_slice = sizes_primitive.as_slice::<S>();
427419

428420
// Note that we ignore nulls here, as the "null" view metadata must still maintain the same
429-
// invariants as non-null views, even for a `ListViewShape` information.
421+
// invariants as non-null views, even for a `bool` information.
430422
for i in 0..offsets_slice.len() {
431423
let offset: usize = offsets_slice[i].as_();
432424
let size: usize = sizes_slice[i].as_();
@@ -442,28 +434,24 @@ fn validate_shape(
442434
})
443435
});
444436

445-
if shape.has_no_gaps() {
446-
// Allow leading and trailing unreferenced elements, but not gaps in the middle.
447-
let leftmost_used = element_references
448-
.iter()
449-
.position(|&references| references != 0);
450-
let rightmost_used = element_references
451-
.iter()
452-
.rposition(|&references| references != 0);
437+
// Allow leading and trailing unreferenced elements, but not gaps in the middle.
438+
let leftmost_used = element_references
439+
.iter()
440+
.position(|&references| references != 0);
441+
let rightmost_used = element_references
442+
.iter()
443+
.rposition(|&references| references != 0);
453444

454-
if let (Some(first_ref), Some(last_ref)) = (leftmost_used, rightmost_used) {
455-
vortex_ensure!(
456-
element_references[first_ref..=last_ref]
457-
.iter()
458-
.all(|&references| references != 0),
459-
"found gap in elements array between first and last referenced elements"
460-
);
461-
}
445+
if let (Some(first_ref), Some(last_ref)) = (leftmost_used, rightmost_used) {
446+
vortex_ensure!(
447+
element_references[first_ref..=last_ref]
448+
.iter()
449+
.all(|&references| references != 0),
450+
"found gap in elements array between first and last referenced elements"
451+
);
462452
}
463453

464-
if shape.has_no_overlaps() {
465-
vortex_ensure!(element_references.iter().all(|&references| references <= 1))
466-
}
454+
vortex_ensure!(element_references.iter().all(|&references| references <= 1));
467455

468456
Ok(())
469457
}

vortex-array/src/arrays/listview/compute/cast.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ impl CastKernel for ListViewVTable {
3131
array.offsets().clone(),
3232
array.sizes().clone(),
3333
validity,
34-
array.shape(),
34+
array.is_zero_copy_to_list(),
3535
)
3636
}
3737
.to_array(),

vortex-array/src/arrays/listview/compute/filter.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,8 @@ impl FilterKernel for ListViewVTable {
4949
let new_offsets = compute::filter(offsets.as_ref(), selection_mask)?;
5050
let new_sizes = compute::filter(sizes.as_ref(), selection_mask)?;
5151

52-
// Filters keep scalars in order, and it cannot cause overlaps to form. However, filters
53-
// **will** create gaps of unused elements.
54-
let new_shape = array.shape().with_no_gaps(false);
52+
// Filters will create gaps of unused elements.
53+
let is_zctl = false;
5554

5655
// SAFETY: Filter operation maintains all `ListViewArray` invariants:
5756
// - Offsets and sizes are derived from existing valid child arrays.
@@ -63,7 +62,7 @@ impl FilterKernel for ListViewVTable {
6362
new_offsets,
6463
new_sizes,
6564
new_validity,
66-
new_shape,
65+
is_zctl,
6766
)
6867
};
6968

vortex-array/src/arrays/listview/compute/mask.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ impl MaskKernel for ListViewVTable {
1616
array.offsets().clone(),
1717
array.sizes().clone(),
1818
array.validity().mask(mask),
19-
array.shape(),
19+
array.is_zero_copy_to_list(),
2020
)
2121
.map(|a| a.into_array())
2222
}

vortex-array/src/arrays/listview/compute/take.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use vortex_dtype::{Nullability, match_each_integer_ptype};
66
use vortex_error::VortexResult;
77
use vortex_scalar::Scalar;
88

9-
use crate::arrays::{ListViewArray, ListViewShape, ListViewVTable};
9+
use crate::arrays::{ListViewArray, ListViewVTable};
1010
use crate::compute::{self, TakeKernel, TakeKernelAdapter};
1111
use crate::vtable::ValidityHelper;
1212
use crate::{Array, ArrayRef, IntoArray, register_kernel};
@@ -66,7 +66,7 @@ impl TakeKernel for ListViewVTable {
6666

6767
// Take can reorder offsets, create gaps, and may introduce overlaps if the `indices`
6868
// contain duplicates.
69-
let new_shape = ListViewShape::default();
69+
let is_zctl = false;
7070

7171
// SAFETY: Take operation maintains all `ListViewArray` invariants:
7272
// - `new_offsets` and `new_sizes` are derived from existing valid child arrays.
@@ -80,7 +80,7 @@ impl TakeKernel for ListViewVTable {
8080
new_offsets,
8181
new_sizes,
8282
new_validity,
83-
new_shape,
83+
is_zctl,
8484
)
8585
};
8686

0 commit comments

Comments
 (0)