Skip to content

Commit 8e30bf2

Browse files
committed
use with_zero_copy_to_list unsafe method
Instead of taking a boolean flag inside `new_unchecked`, it is better to use a builder-like unsafe method that adds the flag. This separates the SAFETY comment out. Signed-off-by: Connor Tsui <[email protected]>
1 parent d6b2e64 commit 8e30bf2

File tree

23 files changed

+185
-220
lines changed

23 files changed

+185
-220
lines changed

encodings/sparse/src/canonical.rs

Lines changed: 10 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -932,13 +932,8 @@ mod test {
932932
let offsets = buffer![0u32, 1, 2, 3].into_array();
933933
let sizes = buffer![1u32, 1, 1, 1].into_array();
934934
let lists = unsafe {
935-
ListViewArray::new_unchecked(
936-
elements,
937-
offsets,
938-
sizes,
939-
Validity::AllValid,
940-
true, // Is zero-copy to list.
941-
)
935+
ListViewArray::new_unchecked(elements, offsets, sizes, Validity::AllValid)
936+
.with_zero_copy_to_list(true)
942937
}
943938
.into_array();
944939

@@ -986,13 +981,8 @@ mod test {
986981
let offsets = buffer![0u32, 1, 2, 3, 4, 5, 6, 7].into_array();
987982
let sizes = buffer![1u32, 1, 1, 1, 1, 1, 1, 1].into_array();
988983
let lists = unsafe {
989-
ListViewArray::new_unchecked(
990-
elements,
991-
offsets,
992-
sizes,
993-
Validity::AllValid,
994-
true, // Is zero-copy to list.
995-
)
984+
ListViewArray::new_unchecked(elements, offsets, sizes, Validity::AllValid)
985+
.with_zero_copy_to_list(true)
996986
}
997987
.into_array();
998988

@@ -1037,13 +1027,8 @@ mod test {
10371027
let offsets = buffer![0u32, 1, 2, 3].into_array();
10381028
let sizes = buffer![1u32, 1, 1, 1].into_array();
10391029
let lists = unsafe {
1040-
ListViewArray::new_unchecked(
1041-
elements,
1042-
offsets,
1043-
sizes,
1044-
Validity::AllValid,
1045-
true, // Is zero-copy to list.
1046-
)
1030+
ListViewArray::new_unchecked(elements, offsets, sizes, Validity::AllValid)
1031+
.with_zero_copy_to_list(true)
10471032
}
10481033
.into_array();
10491034

@@ -1406,13 +1391,8 @@ mod test {
14061391
let sizes = buffer![3u32, 2, 4].into_array();
14071392

14081393
let list_view = unsafe {
1409-
ListViewArray::new_unchecked(
1410-
elements.clone(),
1411-
offsets,
1412-
sizes,
1413-
Validity::AllValid,
1414-
true, // Is zero-copy to list.
1415-
)
1394+
ListViewArray::new_unchecked(elements.clone(), offsets, sizes, Validity::AllValid)
1395+
.with_zero_copy_to_list(true)
14161396
};
14171397

14181398
let list_dtype = list_view.dtype().clone();
@@ -1489,13 +1469,8 @@ mod test {
14891469
let sizes = buffer![2u32, 3, 1, 2, 2].into_array();
14901470

14911471
let full_listview = unsafe {
1492-
ListViewArray::new_unchecked(
1493-
elements,
1494-
offsets,
1495-
sizes,
1496-
Validity::AllValid,
1497-
true, // Is zero-copy to list.
1498-
)
1472+
ListViewArray::new_unchecked(elements, offsets, sizes, Validity::AllValid)
1473+
.with_zero_copy_to_list(true)
14991474
}
15001475
.into_array();
15011476

fuzz/src/array/mask.rs

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,8 @@ pub fn mask_canonical_array(canonical: Canonical, mask: &Mask) -> VortexResult<A
6666
array.offsets().clone(),
6767
array.sizes().clone(),
6868
new_validity,
69-
array.is_zero_copy_to_list(),
7069
)
70+
.with_zero_copy_to_list(array.is_zero_copy_to_list())
7171
}
7272
.into_array()
7373
}
@@ -249,13 +249,8 @@ mod tests {
249249
let offsets = PrimitiveArray::from_iter([0i32, 2, 4]).into_array();
250250
let sizes = PrimitiveArray::from_iter([2i32, 2, 2]).into_array();
251251
let array = unsafe {
252-
ListViewArray::new_unchecked(
253-
elements,
254-
offsets,
255-
sizes,
256-
Nullability::NonNullable.into(),
257-
true, // Is zero-copy to list.
258-
)
252+
ListViewArray::new_unchecked(elements, offsets, sizes, Nullability::NonNullable.into())
253+
.with_zero_copy_to_list(true)
259254
};
260255

261256
let mask = Mask::from_iter([false, true, false]);

fuzz/src/array/slice.rs

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -78,13 +78,8 @@ pub fn slice_canonical_array(
7878
// only causes there to be leading and trailing garbage data, which is still
7979
// zero-copyable to a `ListArray`.
8080
Ok(unsafe {
81-
ListViewArray::new_unchecked(
82-
elements,
83-
offsets,
84-
sizes,
85-
validity,
86-
list_array.is_zero_copy_to_list(),
87-
)
81+
ListViewArray::new_unchecked(elements, offsets, sizes, validity)
82+
.with_zero_copy_to_list(list_array.is_zero_copy_to_list())
8883
}
8984
.into_array())
9085
}

vortex-array/src/arrays/chunked/vtable/canonical.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -163,16 +163,17 @@ fn swizzle_list_chunks(
163163
let offsets = PrimitiveArray::new(offsets.freeze(), Validity::NonNullable).into_array();
164164
let sizes = PrimitiveArray::new(sizes.freeze(), Validity::NonNullable).into_array();
165165

166-
// Since we made sure that all chunk were zero-copyable to list above, we know that the final
167-
// output is also zero-copyable to a list.
168-
let is_zctl = true;
169-
170166
// SAFETY:
171167
// - `offsets` and `sizes` are non-nullable u64 arrays of the same length
172168
// - Each `offset[i] + size[i]` list view is within bounds of elements array because it came
173169
// from valid chunks
174170
// - Validity came from the outer chunked array so it must have the same length
175-
unsafe { ListViewArray::new_unchecked(chunked_elements, offsets, sizes, validity, is_zctl) }
171+
// - Since we made sure that all chunks were zero-copyable to a list above, we know that the
172+
// final concatenated output is also zero-copyable to a list.
173+
unsafe {
174+
ListViewArray::new_unchecked(chunked_elements, offsets, sizes, validity)
175+
.with_zero_copy_to_list(true)
176+
}
176177
}
177178

178179
#[cfg(test)]

vortex-array/src/arrays/constant/vtable/canonical.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -253,13 +253,10 @@ fn constant_canonical_list_array(scalar: &Scalar, len: usize) -> ListViewArray {
253253
debug_assert!(!offsets.dtype().is_nullable());
254254
debug_assert!(!sizes.dtype().is_nullable());
255255

256-
// Since all views are pointing to the same exact thing, it is not zero-copyable to `ListArray`.
257-
let is_zctl = false;
258-
259256
// SAFETY: All views point to the same range [0, list.len()) in the elements array.
260257
// The elements array contains `len` copies of the same value, offsets are all 0,
261258
// and sizes are all equal to the list length. The validity matches the scalar's nullability.
262-
unsafe { ListViewArray::new_unchecked(elements, offsets, sizes, validity, is_zctl) }
259+
unsafe { ListViewArray::new_unchecked(elements, offsets, sizes, validity) }
263260
}
264261

265262
fn constant_canonical_fixed_size_list_array(

vortex-array/src/arrays/listview/array.rs

Lines changed: 60 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ impl ListViewArray {
137137
sizes: ArrayRef,
138138
validity: Validity,
139139
) -> VortexResult<Self> {
140-
Self::validate(&elements, &offsets, &sizes, &validity, false)?;
140+
Self::validate(&elements, &offsets, &sizes, &validity)?;
141141

142142
Ok(Self {
143143
dtype: DType::List(Arc::new(elements.dtype().clone()), validity.nullability()),
@@ -152,10 +152,8 @@ impl ListViewArray {
152152

153153
/// Creates a new [`ListViewArray`] without validation.
154154
///
155-
/// Note that this constructor is slightly different from [`new()`] and [`try_new()`] in that it
156-
/// also takes an `is_zctl` flag. This is an optimization flag that allows conversion to a
157-
/// [`ListArray`] more efficient. The caller must ensure that the flag is correct when passing
158-
/// it to this unsafe constructor.
155+
/// This unsafe function does not check the validity of the data. Prefer calling [`new()`] or
156+
/// [`try_new()`] over this function, as they will check the validity of the data.
159157
///
160158
/// [`ListArray`]: crate::arrays::ListArray
161159
/// [`new()`]: Self::new
@@ -171,17 +169,14 @@ impl ListViewArray {
171169
/// - For each `i`, `offsets[i] + sizes[i]` must not overflow and must be `<= elements.len()`
172170
/// (even if the corresponding view is defined as null by the validity array).
173171
/// - If validity is an array, its length must equal `offsets.len()`.
174-
/// - If the `is_zctl` flag is true, then the [`ListViewArray`] is zero-copyable to a
175-
/// [`ListArray`].
176172
pub unsafe fn new_unchecked(
177173
elements: ArrayRef,
178174
offsets: ArrayRef,
179175
sizes: ArrayRef,
180176
validity: Validity,
181-
is_zctl: bool,
182177
) -> Self {
183178
if cfg!(debug_assertions) {
184-
Self::validate(&elements, &offsets, &sizes, &validity, is_zctl)
179+
Self::validate(&elements, &offsets, &sizes, &validity)
185180
.vortex_expect("Failed to crate `ListViewArray`");
186181
}
187182

@@ -191,7 +186,7 @@ impl ListViewArray {
191186
offsets,
192187
sizes,
193188
validity,
194-
is_zero_copy_to_list: is_zctl,
189+
is_zero_copy_to_list: false,
195190
stats_set: Default::default(),
196191
}
197192
}
@@ -202,7 +197,6 @@ impl ListViewArray {
202197
offsets: &dyn Array,
203198
sizes: &dyn Array,
204199
validity: &Validity,
205-
is_zctl: bool,
206200
) -> VortexResult<()> {
207201
// Check that offsets and sizes are integer arrays and non-nullable.
208202
vortex_ensure!(
@@ -265,12 +259,63 @@ impl ListViewArray {
265259
})
266260
});
267261

268-
// Validate the zero-copy to `ListArray` flag.
269-
if is_zctl {
270-
validate_zctl(elements, offsets_primitive, sizes_primitive)?;
262+
Ok(())
263+
}
264+
265+
/// Sets whether this [`ListViewArray`] is zero-copyable to a [`ListArray`].
266+
///
267+
/// This is an optimization flag that enables more efficient conversion to [`ListArray`] without
268+
/// needing to copy or reorganize the data.
269+
///
270+
/// [`ListArray`]: crate::arrays::ListArray
271+
///
272+
/// # Safety
273+
///
274+
/// When setting `is_zctl` to `true`, the caller must ensure that the [`ListViewArray`] is
275+
/// actually zero-copyable to a [`ListArray`]. This means:
276+
///
277+
/// - Offsets must be sorted (but not strictly sorted, zero-length lists are allowed).
278+
/// - No gaps in elements between first and last referenced elements.
279+
/// - No overlapping list views (each element referenced at most once).
280+
///
281+
/// Note that leading and trailing unreferenced elements **ARE** allowed.
282+
pub unsafe fn with_zero_copy_to_list(mut self, is_zctl: bool) -> Self {
283+
if cfg!(debug_assertions) && is_zctl {
284+
validate_zctl(
285+
&self.elements,
286+
self.offsets.to_primitive(),
287+
self.sizes.to_primitive(),
288+
)
289+
.vortex_expect("Failed to validate zero-copy to list flag");
271290
}
291+
self.is_zero_copy_to_list = is_zctl;
292+
self
293+
}
272294

273-
Ok(())
295+
/// Verifies that the `ListViewArray` is zero-copyable to a [`ListArray`].
296+
///
297+
/// This will run an expensive validation of the `ListViewArray`'s components. It will check the
298+
/// following things:
299+
///
300+
/// - Offsets must be sorted (but not strictly sorted, zero-length lists are allowed).
301+
/// - No gaps in elements between first and last referenced elements.
302+
/// - No overlapping list views (each element referenced at most once).
303+
///
304+
/// Note that leading and trailing unreferenced elements **ARE** allowed.
305+
///
306+
/// This method should really only be called if the caller knows that the `ListViewArray` will
307+
/// be converted into a [`ListArray`] in the future, and the caller wants to set the
308+
/// optimization flag to `true` with the unsafe [`with_zero_copy_to_list`] method.
309+
///
310+
/// [`ListArray`]: crate::arrays::ListArray
311+
/// [`with_zero_copy_to_list`]: Self::with_zero_copy_to_list
312+
pub fn verify_is_zero_copy_to_list(&self) -> bool {
313+
validate_zctl(
314+
&self.elements,
315+
self.offsets.to_primitive(),
316+
self.sizes.to_primitive(),
317+
)
318+
.is_ok()
274319
}
275320

276321
/// Returns the offset at the given index.

vortex-array/src/arrays/listview/compute/cast.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use vortex_error::VortexResult;
77
use crate::arrays::{ListViewArray, ListViewVTable};
88
use crate::compute::{self, CastKernel, CastKernelAdapter};
99
use crate::vtable::ValidityHelper;
10-
use crate::{ArrayRef, register_kernel};
10+
use crate::{ArrayRef, IntoArray, register_kernel};
1111

1212
impl CastKernel for ListViewVTable {
1313
fn cast(&self, array: &ListViewArray, dtype: &DType) -> VortexResult<Option<ArrayRef>> {
@@ -31,10 +31,10 @@ impl CastKernel for ListViewVTable {
3131
array.offsets().clone(),
3232
array.sizes().clone(),
3333
validity,
34-
array.is_zero_copy_to_list(),
3534
)
35+
.with_zero_copy_to_list(array.is_zero_copy_to_list())
3636
}
37-
.to_array(),
37+
.into_array(),
3838
))
3939
}
4040
}

vortex-array/src/arrays/listview/compute/filter.rs

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -55,14 +55,7 @@ impl FilterKernel for ListViewVTable {
5555
// - Offsets and sizes have the same length (both filtered by `selection_mask`).
5656
// - Validity matches the filtered array's nullability.
5757
let new_array = unsafe {
58-
ListViewArray::new_unchecked(
59-
elements.clone(),
60-
new_offsets,
61-
new_sizes,
62-
new_validity,
63-
// Filters will create gaps of unused elements.
64-
false,
65-
)
58+
ListViewArray::new_unchecked(elements.clone(), new_offsets, new_sizes, new_validity)
6659
};
6760

6861
// TODO(connor)[ListView]: Ideally, we would only rebuild after all `take`s and `filter`

vortex-array/src/arrays/listview/compute/mask.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ impl MaskKernel for ListViewVTable {
1919
array.offsets().clone(),
2020
array.sizes().clone(),
2121
array.validity().mask(mask),
22-
array.is_zero_copy_to_list(),
2322
)
23+
.with_zero_copy_to_list(array.is_zero_copy_to_list())
2424
}
2525
.into_array())
2626
}

vortex-array/src/arrays/listview/compute/take.rs

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use vortex_dtype::{Nullability, match_each_integer_ptype};
66
use vortex_error::VortexResult;
77
use vortex_scalar::Scalar;
88

9-
use crate::arrays::{ListViewArray, ListViewVTable};
9+
use crate::arrays::{ListViewArray, ListViewRebuildMode, ListViewVTable};
1010
use crate::compute::{self, TakeKernel, TakeKernelAdapter};
1111
use crate::vtable::ValidityHelper;
1212
use crate::{Array, ArrayRef, IntoArray, register_kernel};
@@ -71,18 +71,16 @@ impl TakeKernel for ListViewVTable {
7171
// `indices`).
7272
// - Validity correctly reflects the combination of array and indices validity.
7373
let new_array = unsafe {
74-
ListViewArray::new_unchecked(
75-
elements.clone(),
76-
new_offsets,
77-
new_sizes,
78-
new_validity,
79-
// Take can reorder offsets, create gaps, and may introduce overlaps if the
80-
// `indices` contain duplicates.
81-
false,
82-
)
74+
ListViewArray::new_unchecked(elements.clone(), new_offsets, new_sizes, new_validity)
8375
};
8476

85-
Ok(new_array.into_array())
77+
// TODO(connor)[ListView]: Ideally, we would only rebuild after all `take`s and `filter`
78+
// compute functions have run, at the "top" of the operator tree. However, we cannot do this
79+
// right now, so we will just rebuild every time (similar to `ListArray`).
80+
81+
Ok(new_array
82+
.rebuild(ListViewRebuildMode::MakeZeroCopyToList)
83+
.into_array())
8684
}
8785
}
8886

0 commit comments

Comments
 (0)