Skip to content

Commit 17b60d8

Browse files
committed
add data "shape" to ListViewArray
Adds a `ListViewShape` type to track whether list view data has sorted offsets, overlapping views, or gaps. This enables faster rebuilding and identifies when a `ListView` can be efficiently converted to a regular `List` array. Signed-off-by: Connor Tsui <[email protected]>
1 parent e1ca80a commit 17b60d8

File tree

29 files changed

+1370
-244
lines changed

29 files changed

+1370
-244
lines changed

encodings/sparse/src/canonical.rs

Lines changed: 46 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -492,8 +492,8 @@ mod test {
492492

493493
use rstest::rstest;
494494
use vortex_array::arrays::{
495-
BoolArray, DecimalArray, FixedSizeListArray, ListArray, ListViewArray, PrimitiveArray,
496-
StructArray, VarBinArray, VarBinViewArray,
495+
BoolArray, DecimalArray, FixedSizeListArray, ListArray, ListViewArray, ListViewShape,
496+
PrimitiveArray, StructArray, VarBinArray, VarBinViewArray,
497497
};
498498
use vortex_array::arrow::IntoArrowArray as _;
499499
use vortex_array::validity::Validity;
@@ -930,9 +930,15 @@ mod test {
930930
// List 3: [2] at offset 3, size 1
931931
let offsets = buffer![0u32, 1, 2, 3].into_array();
932932
let sizes = buffer![1u32, 1, 1, 1].into_array();
933-
let lists = ListViewArray::try_new(elements, offsets, sizes, Validity::AllValid)
934-
.unwrap()
935-
.into_array();
933+
let lists = ListViewArray::try_new(
934+
elements,
935+
offsets,
936+
sizes,
937+
Validity::AllValid,
938+
ListViewShape::as_zero_copy_to_list(),
939+
)
940+
.unwrap()
941+
.into_array();
936942

937943
let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
938944
let fill_value = Scalar::null(lists.dtype().clone());
@@ -977,9 +983,15 @@ mod test {
977983
let elements = buffer![1i32, 2, 1, 2, 1, 2, 1, 2].into_array();
978984
let offsets = buffer![0u32, 1, 2, 3, 4, 5, 6, 7].into_array();
979985
let sizes = buffer![1u32, 1, 1, 1, 1, 1, 1, 1].into_array();
980-
let lists = ListViewArray::try_new(elements, offsets, sizes, Validity::AllValid)
981-
.unwrap()
982-
.into_array();
986+
let lists = ListViewArray::try_new(
987+
elements,
988+
offsets,
989+
sizes,
990+
Validity::AllValid,
991+
ListViewShape::as_zero_copy_to_list(),
992+
)
993+
.unwrap()
994+
.into_array();
983995

984996
// Slice to get lists 2..6, which are: [1], [2], [1], [2]
985997
let lists = lists.slice(2..6);
@@ -1021,9 +1033,15 @@ mod test {
10211033
let elements = buffer![1i32, 2, 1, 2].into_array();
10221034
let offsets = buffer![0u32, 1, 2, 3].into_array();
10231035
let sizes = buffer![1u32, 1, 1, 1].into_array();
1024-
let lists = ListViewArray::try_new(elements, offsets, sizes, Validity::AllValid)
1025-
.unwrap()
1026-
.into_array();
1036+
let lists = ListViewArray::try_new(
1037+
elements,
1038+
offsets,
1039+
sizes,
1040+
Validity::AllValid,
1041+
ListViewShape::as_zero_copy_to_list(),
1042+
)
1043+
.unwrap()
1044+
.into_array();
10271045

10281046
let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
10291047
let fill_value = Scalar::from(Some(vec![5i32, 6, 7, 8]));
@@ -1383,8 +1401,14 @@ mod test {
13831401
let offsets = buffer![0u32, 3, 5].into_array();
13841402
let sizes = buffer![3u32, 2, 4].into_array();
13851403

1386-
let list_view =
1387-
ListViewArray::try_new(elements.clone(), offsets, sizes, Validity::AllValid).unwrap();
1404+
let list_view = ListViewArray::try_new(
1405+
elements.clone(),
1406+
offsets,
1407+
sizes,
1408+
Validity::AllValid,
1409+
ListViewShape::as_zero_copy_to_list(),
1410+
)
1411+
.unwrap();
13881412

13891413
let list_dtype = list_view.dtype().clone();
13901414

@@ -1459,9 +1483,15 @@ mod test {
14591483
let offsets = buffer![0u32, 2, 5, 6, 8].into_array();
14601484
let sizes = buffer![2u32, 3, 1, 2, 2].into_array();
14611485

1462-
let full_listview = ListViewArray::try_new(elements, offsets, sizes, Validity::AllValid)
1463-
.unwrap()
1464-
.into_array();
1486+
let full_listview = ListViewArray::try_new(
1487+
elements,
1488+
offsets,
1489+
sizes,
1490+
Validity::AllValid,
1491+
ListViewShape::as_zero_copy_to_list(),
1492+
)
1493+
.unwrap()
1494+
.into_array();
14651495

14661496
// Slice to get lists 1, 2, 3 (indices 1..4)
14671497
// This gives us lists with elements:

fuzz/src/array/mask.rs

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ pub fn mask_canonical_array(canonical: Canonical, mask: &Mask) -> VortexResult<A
6262
array.offsets().clone(),
6363
array.sizes().clone(),
6464
new_validity,
65+
array.shape(),
6566
)
6667
.vortex_unwrap()
6768
.into_array()
@@ -130,8 +131,8 @@ fn apply_mask_to_validity(validity: &Validity, mask: &Mask) -> Validity {
130131
#[cfg(test)]
131132
mod tests {
132133
use vortex_array::arrays::{
133-
BoolArray, DecimalArray, FixedSizeListArray, ListViewArray, NullArray, PrimitiveArray,
134-
StructArray, VarBinViewArray,
134+
BoolArray, DecimalArray, FixedSizeListArray, ListViewArray, ListViewShape, NullArray,
135+
PrimitiveArray, StructArray, VarBinViewArray,
135136
};
136137
use vortex_array::{Array, IntoArray};
137138
use vortex_dtype::{DecimalDType, FieldNames, Nullability};
@@ -243,9 +244,14 @@ mod tests {
243244
let elements = PrimitiveArray::from_iter([1i32, 2, 3, 4, 5, 6]).into_array();
244245
let offsets = PrimitiveArray::from_iter([0i32, 2, 4]).into_array();
245246
let sizes = PrimitiveArray::from_iter([2i32, 2, 2]).into_array();
246-
let array =
247-
ListViewArray::try_new(elements, offsets, sizes, Nullability::NonNullable.into())
248-
.unwrap();
247+
let array = ListViewArray::try_new(
248+
elements,
249+
offsets,
250+
sizes,
251+
Nullability::NonNullable.into(),
252+
ListViewShape::as_zero_copy_to_list(),
253+
)
254+
.unwrap();
249255

250256
let mask = Mask::from_iter([false, true, false]);
251257

fuzz/src/array/slice.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,8 @@ pub fn slice_canonical_array(
7474
// Since the list view elements can be stored out of order, we cannot slice it.
7575
let elements = list_array.elements().clone();
7676

77-
ListViewArray::try_new(elements, offsets, sizes, validity).map(|a| a.into_array())
77+
ListViewArray::try_new(elements, offsets, sizes, validity, list_array.shape())
78+
.map(|a| a.into_array())
7879
}
7980
DType::FixedSizeList(..) => {
8081
let fsl_array = array.to_fixed_size_list();

vortex-array/src/arrays/chunked/vtable/canonical.rs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@ use vortex_buffer::BufferMut;
55
use vortex_dtype::{DType, Nullability, PType, StructFields};
66
use vortex_error::VortexExpect;
77

8-
use crate::arrays::{ChunkedArray, ChunkedVTable, ListViewArray, PrimitiveArray, StructArray};
8+
use crate::arrays::{
9+
ChunkedArray, ChunkedVTable, ListViewArray, ListViewRebuildMode, ListViewShape, PrimitiveArray,
10+
StructArray,
11+
};
912
use crate::builders::{ArrayBuilder, builder_with_capacity};
1013
use crate::compute::cast;
1114
use crate::validity::Validity;
@@ -121,6 +124,9 @@ fn swizzle_list_chunks(
121124

122125
for chunk in chunks {
123126
let chunk_array = chunk.to_listview();
127+
// By rebuilding as zero-copy to `List`, we make the final output `ListView` also
128+
// zero-copyable to `List`.
129+
let chunk_array = chunk_array.rebuild(ListViewRebuildMode::MakeZeroCopyToList);
124130

125131
// Add the `elements` of the current array as a new chunk.
126132
list_elements_chunks.push(chunk_array.elements().clone());
@@ -158,12 +164,16 @@ fn swizzle_list_chunks(
158164
let offsets = PrimitiveArray::new(offsets.freeze(), Validity::NonNullable).into_array();
159165
let sizes = PrimitiveArray::new(sizes.freeze(), Validity::NonNullable).into_array();
160166

167+
// Since we made sure that all chunk were zero-copyable to list above, we know that the final
168+
// output is also zero-copyable to a list.
169+
let shape = ListViewShape::as_zero_copy_to_list();
170+
161171
// SAFETY:
162172
// - `offsets` and `sizes` are non-nullable u64 arrays of the same length
163173
// - Each `offset[i] + size[i]` list view is within bounds of elements array because it came
164174
// from valid chunks
165175
// - Validity came from the outer chunked array so it must have the same length
166-
unsafe { ListViewArray::new_unchecked(chunked_elements, offsets, sizes, validity) }
176+
unsafe { ListViewArray::new_unchecked(chunked_elements, offsets, sizes, validity, shape) }
167177
}
168178

169179
#[cfg(test)]

vortex-array/src/arrays/constant/vtable/canonical.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use crate::arrays::constant::ConstantArray;
1616
use crate::arrays::primitive::PrimitiveArray;
1717
use crate::arrays::{
1818
BoolArray, ConstantVTable, DecimalArray, ExtensionArray, FixedSizeListArray, ListViewArray,
19-
NullArray, StructArray, VarBinViewArray, smallest_decimal_value_type,
19+
ListViewShape, NullArray, StructArray, VarBinViewArray, smallest_decimal_value_type,
2020
};
2121
use crate::builders::builder_with_capacity;
2222
use crate::validity::Validity;
@@ -253,10 +253,14 @@ fn constant_canonical_list_array(scalar: &Scalar, len: usize) -> ListViewArray {
253253
debug_assert!(!offsets.dtype().is_nullable());
254254
debug_assert!(!sizes.dtype().is_nullable());
255255

256+
// Since everything is pointing to the same exact thing, the offsets are sorted and there are no
257+
// gaps in the shape.
258+
let shape = ListViewShape::as_zero_copy_to_list().with_no_overlaps(false);
259+
256260
// SAFETY: All views point to the same range [0, list.len()) in the elements array.
257261
// The elements array contains `len` copies of the same value, offsets are all 0,
258262
// and sizes are all equal to the list length. The validity matches the scalar's nullability.
259-
unsafe { ListViewArray::new_unchecked(elements, offsets, sizes, validity) }
263+
unsafe { ListViewArray::new_unchecked(elements, offsets, sizes, validity, shape) }
260264
}
261265

262266
fn constant_canonical_fixed_size_list_array(

0 commit comments

Comments
 (0)