Skip to content

Commit 5edef87

Browse files
committed
is_zero_copy_to_list ListView to List optimization
Signed-off-by: Connor Tsui <[email protected]>
1 parent cee7469 commit 5edef87

File tree

28 files changed

+1066
-628
lines changed

28 files changed

+1066
-628
lines changed

encodings/sparse/src/canonical.rs

Lines changed: 49 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -931,9 +931,16 @@ mod test {
931931
// List 3: [2] at offset 3, size 1
932932
let offsets = buffer![0u32, 1, 2, 3].into_array();
933933
let sizes = buffer![1u32, 1, 1, 1].into_array();
934-
let lists = ListViewArray::try_new(elements, offsets, sizes, Validity::AllValid)
935-
.unwrap()
936-
.into_array();
934+
let lists = unsafe {
935+
ListViewArray::new_unchecked(
936+
elements,
937+
offsets,
938+
sizes,
939+
Validity::AllValid,
940+
true, // Is zero-copy to list.
941+
)
942+
}
943+
.into_array();
937944

938945
let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
939946
let fill_value = Scalar::null(lists.dtype().clone());
@@ -978,9 +985,16 @@ mod test {
978985
let elements = buffer![1i32, 2, 1, 2, 1, 2, 1, 2].into_array();
979986
let offsets = buffer![0u32, 1, 2, 3, 4, 5, 6, 7].into_array();
980987
let sizes = buffer![1u32, 1, 1, 1, 1, 1, 1, 1].into_array();
981-
let lists = ListViewArray::try_new(elements, offsets, sizes, Validity::AllValid)
982-
.unwrap()
983-
.into_array();
988+
let lists = unsafe {
989+
ListViewArray::new_unchecked(
990+
elements,
991+
offsets,
992+
sizes,
993+
Validity::AllValid,
994+
true, // Is zero-copy to list.
995+
)
996+
}
997+
.into_array();
984998

985999
// Slice to get lists 2..6, which are: [1], [2], [1], [2]
9861000
let lists = lists.slice(2..6);
@@ -1022,9 +1036,16 @@ mod test {
10221036
let elements = buffer![1i32, 2, 1, 2].into_array();
10231037
let offsets = buffer![0u32, 1, 2, 3].into_array();
10241038
let sizes = buffer![1u32, 1, 1, 1].into_array();
1025-
let lists = ListViewArray::try_new(elements, offsets, sizes, Validity::AllValid)
1026-
.unwrap()
1027-
.into_array();
1039+
let lists = unsafe {
1040+
ListViewArray::new_unchecked(
1041+
elements,
1042+
offsets,
1043+
sizes,
1044+
Validity::AllValid,
1045+
true, // Is zero-copy to list.
1046+
)
1047+
}
1048+
.into_array();
10281049

10291050
let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
10301051
let fill_value = Scalar::from(Some(vec![5i32, 6, 7, 8]));
@@ -1384,8 +1405,15 @@ mod test {
13841405
let offsets = buffer![0u32, 3, 5].into_array();
13851406
let sizes = buffer![3u32, 2, 4].into_array();
13861407

1387-
let list_view =
1388-
ListViewArray::try_new(elements.clone(), offsets, sizes, Validity::AllValid).unwrap();
1408+
let list_view = unsafe {
1409+
ListViewArray::new_unchecked(
1410+
elements.clone(),
1411+
offsets,
1412+
sizes,
1413+
Validity::AllValid,
1414+
true, // Is zero-copy to list.
1415+
)
1416+
};
13891417

13901418
let list_dtype = list_view.dtype().clone();
13911419

@@ -1460,9 +1488,16 @@ mod test {
14601488
let offsets = buffer![0u32, 2, 5, 6, 8].into_array();
14611489
let sizes = buffer![2u32, 3, 1, 2, 2].into_array();
14621490

1463-
let full_listview = ListViewArray::try_new(elements, offsets, sizes, Validity::AllValid)
1464-
.unwrap()
1465-
.into_array();
1491+
let full_listview = unsafe {
1492+
ListViewArray::new_unchecked(
1493+
elements,
1494+
offsets,
1495+
sizes,
1496+
Validity::AllValid,
1497+
true, // Is zero-copy to list.
1498+
)
1499+
}
1500+
.into_array();
14661501

14671502
// Slice to get lists 1, 2, 3 (indices 1..4)
14681503
// This gives us lists with elements:

fuzz/src/array/mask.rs

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,18 @@ pub fn mask_canonical_array(canonical: Canonical, mask: &Mask) -> VortexResult<A
5757
}
5858
Canonical::List(array) => {
5959
let new_validity = apply_mask_to_validity(array.validity(), mask);
60-
ListViewArray::try_new(
61-
array.elements().clone(),
62-
array.offsets().clone(),
63-
array.sizes().clone(),
64-
new_validity,
65-
)
66-
.vortex_unwrap()
60+
61+
// SAFETY: Since we are only masking the validity and everything else comes from an
62+
// already valid `ListViewArray`, all of the invariants are still upheld.
63+
unsafe {
64+
ListViewArray::new_unchecked(
65+
array.elements().clone(),
66+
array.offsets().clone(),
67+
array.sizes().clone(),
68+
new_validity,
69+
array.is_zero_copy_to_list(),
70+
)
71+
}
6772
.into_array()
6873
}
6974
Canonical::FixedSizeList(array) => {
@@ -243,9 +248,15 @@ mod tests {
243248
let elements = PrimitiveArray::from_iter([1i32, 2, 3, 4, 5, 6]).into_array();
244249
let offsets = PrimitiveArray::from_iter([0i32, 2, 4]).into_array();
245250
let sizes = PrimitiveArray::from_iter([2i32, 2, 2]).into_array();
246-
let array =
247-
ListViewArray::try_new(elements, offsets, sizes, Nullability::NonNullable.into())
248-
.unwrap();
251+
let array = unsafe {
252+
ListViewArray::new_unchecked(
253+
elements,
254+
offsets,
255+
sizes,
256+
Nullability::NonNullable.into(),
257+
true, // Is zero-copy to list.
258+
)
259+
};
249260

250261
let mask = Mask::from_iter([false, true, false]);
251262

fuzz/src/array/slice.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,19 @@ pub fn slice_canonical_array(
7474
// Since the list view elements can be stored out of order, we cannot slice it.
7575
let elements = list_array.elements().clone();
7676

77-
ListViewArray::try_new(elements, offsets, sizes, validity).map(|a| a.into_array())
77+
// SAFETY: If the array was already zero-copyable to list, slicing the offsets and sizes
78+
// only causes there to be leading and trailing garbage data, which is still
79+
// zero-copyable to a `ListArray`.
80+
Ok(unsafe {
81+
ListViewArray::new_unchecked(
82+
elements,
83+
offsets,
84+
sizes,
85+
validity,
86+
list_array.is_zero_copy_to_list(),
87+
)
88+
}
89+
.into_array())
7890
}
7991
DType::FixedSizeList(..) => {
8092
let fsl_array = array.to_fixed_size_list();

vortex-array/src/arrays/chunked/vtable/canonical.rs

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@ use vortex_buffer::BufferMut;
55
use vortex_dtype::{DType, Nullability, PType, StructFields};
66
use vortex_error::VortexExpect;
77

8-
use crate::arrays::{ChunkedArray, ChunkedVTable, ListViewArray, PrimitiveArray, StructArray};
8+
use crate::arrays::{
9+
ChunkedArray, ChunkedVTable, ListViewArray, ListViewRebuildMode, PrimitiveArray, StructArray,
10+
};
911
use crate::builders::{ArrayBuilder, builder_with_capacity};
1012
use crate::compute::cast;
1113
use crate::validity::Validity;
@@ -121,6 +123,9 @@ fn swizzle_list_chunks(
121123

122124
for chunk in chunks {
123125
let chunk_array = chunk.to_listview();
126+
// By rebuilding as zero-copy to `List` and trimming all elements (to prevent gaps), we make
127+
// the final output `ListView` also zero-copyable to `List`.
128+
let chunk_array = chunk_array.rebuild(ListViewRebuildMode::MakeExact);
124129

125130
// Add the `elements` of the current array as a new chunk.
126131
list_elements_chunks.push(chunk_array.elements().clone());
@@ -158,12 +163,16 @@ fn swizzle_list_chunks(
158163
let offsets = PrimitiveArray::new(offsets.freeze(), Validity::NonNullable).into_array();
159164
let sizes = PrimitiveArray::new(sizes.freeze(), Validity::NonNullable).into_array();
160165

166+
// Since we made sure that all chunk were zero-copyable to list above, we know that the final
167+
// output is also zero-copyable to a list.
168+
let is_zctl = true;
169+
161170
// SAFETY:
162171
// - `offsets` and `sizes` are non-nullable u64 arrays of the same length
163172
// - Each `offset[i] + size[i]` list view is within bounds of elements array because it came
164173
// from valid chunks
165174
// - Validity came from the outer chunked array so it must have the same length
166-
unsafe { ListViewArray::new_unchecked(chunked_elements, offsets, sizes, validity) }
175+
unsafe { ListViewArray::new_unchecked(chunked_elements, offsets, sizes, validity, is_zctl) }
167176
}
168177

169178
#[cfg(test)]

vortex-array/src/arrays/constant/vtable/canonical.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,10 +253,13 @@ fn constant_canonical_list_array(scalar: &Scalar, len: usize) -> ListViewArray {
253253
debug_assert!(!offsets.dtype().is_nullable());
254254
debug_assert!(!sizes.dtype().is_nullable());
255255

256+
// Since all views are pointing to the same exact thing, it is not zero-copyable to `ListArray`.
257+
let is_zctl = false;
258+
256259
// SAFETY: All views point to the same range [0, list.len()) in the elements array.
257260
// The elements array contains `len` copies of the same value, offsets are all 0,
258261
// and sizes are all equal to the list length. The validity matches the scalar's nullability.
259-
unsafe { ListViewArray::new_unchecked(elements, offsets, sizes, validity) }
262+
unsafe { ListViewArray::new_unchecked(elements, offsets, sizes, validity, is_zctl) }
260263
}
261264

262265
fn constant_canonical_fixed_size_list_array(

vortex-array/src/arrays/list/array.rs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
use std::sync::Arc;
55

66
use num_traits::AsPrimitive;
7-
use vortex_dtype::{DType, match_each_integer_ptype, match_each_native_ptype};
7+
use vortex_dtype::{DType, NativePType, match_each_integer_ptype, match_each_native_ptype};
88
use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_ensure};
99

1010
use crate::arrays::{ListVTable, PrimitiveVTable};
@@ -191,8 +191,11 @@ impl ListArray {
191191

192192
vortex_ensure!(
193193
max_offset
194-
<= P::try_from(elements.len())
195-
.vortex_expect("Offsets type must be able to fit elements length"),
194+
<= P::try_from(elements.len()).vortex_expect(&format!(
195+
"Offsets type {} must be able to fit elements length {}",
196+
<P as NativePType>::PTYPE,
197+
elements.len()
198+
)),
196199
"Max offset {max_offset} is beyond the length of the elements array {}",
197200
elements.len()
198201
);
@@ -266,6 +269,11 @@ impl ListArray {
266269
&self.elements
267270
}
268271

272+
// TODO(connor)[ListView]: Create 2 functions `reset_offsets` and `recursive_reset_offsets`,
273+
// where `reset_offsets` is infallible.
274+
// Also, `reset_offsets` can be made more efficient by replacing `sub_scalar` with a match on
275+
// the offset type and manual subtraction and fast path where `offsets[0] == 0`.
276+
269277
/// Create a copy of this array by adjusting `offsets` to start at `0` and removing elements not
270278
/// referenced by the `offsets`.
271279
pub fn reset_offsets(&self, recurse: bool) -> VortexResult<Self> {

0 commit comments

Comments
 (0)