Skip to content

Commit 7bf36e4

Browse files
committed
add scaffolding for ListVector
Signed-off-by: Connor Tsui <[email protected]>
1 parent 4d2d47d commit 7bf36e4

File tree

12 files changed

+527
-19
lines changed

12 files changed

+527
-19
lines changed

vortex-compute/src/arrow/list.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use arrow_array::ArrayRef;
5+
use vortex_error::VortexResult;
6+
use vortex_vector::listview::ListViewVector;
7+
8+
use crate::arrow::IntoArrow;
9+
10+
impl IntoArrow<ArrayRef> for ListViewVector {
11+
fn into_arrow(self) -> VortexResult<ArrayRef> {
12+
todo!("Figure out how to do this")
13+
}
14+
}

vortex-compute/src/arrow/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ mod binaryview;
99
mod bool;
1010
mod decimal;
1111
mod fixed_size_list;
12+
mod list;
1213
mod mask;
1314
mod null;
1415
mod primitive;

vortex-compute/src/mask/mod.rs

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ use vortex_vector::binaryview::{BinaryViewType, BinaryViewVector};
1111
use vortex_vector::bool::BoolVector;
1212
use vortex_vector::decimal::{DVector, DecimalVector};
1313
use vortex_vector::fixed_size_list::FixedSizeListVector;
14+
use vortex_vector::listview::ListViewVector;
1415
use vortex_vector::null::NullVector;
1516
use vortex_vector::primitive::{PVector, PrimitiveVector};
1617
use vortex_vector::struct_::StructVector;
@@ -46,31 +47,31 @@ impl MaskValidity for BoolVector {
4647
}
4748
}
4849

49-
impl MaskValidity for DecimalVector {
50+
impl MaskValidity for PrimitiveVector {
5051
fn mask_validity(self, mask: &Mask) -> Self {
51-
match_each_dvector!(self, |v| { MaskValidity::mask_validity(v, mask).into() })
52+
match_each_pvector!(self, |v| { MaskValidity::mask_validity(v, mask).into() })
5253
}
5354
}
5455

55-
impl<D: NativeDecimalType> MaskValidity for DVector<D> {
56+
impl<T: NativePType> MaskValidity for PVector<T> {
5657
fn mask_validity(self, mask: &Mask) -> Self {
57-
let (ps, elements, validity) = self.into_parts();
58-
// SAFETY: we are preserving the original elements buffer and only modifying the validity.
59-
unsafe { Self::new_unchecked(ps, elements, validity.bitand(mask)) }
58+
let (data, validity) = self.into_parts();
59+
// SAFETY: we are preserving the original data buffer and only modifying the validity.
60+
unsafe { Self::new_unchecked(data, validity.bitand(mask)) }
6061
}
6162
}
6263

63-
impl MaskValidity for PrimitiveVector {
64+
impl MaskValidity for DecimalVector {
6465
fn mask_validity(self, mask: &Mask) -> Self {
65-
match_each_pvector!(self, |v| { MaskValidity::mask_validity(v, mask).into() })
66+
match_each_dvector!(self, |v| { MaskValidity::mask_validity(v, mask).into() })
6667
}
6768
}
6869

69-
impl<T: NativePType> MaskValidity for PVector<T> {
70+
impl<D: NativeDecimalType> MaskValidity for DVector<D> {
7071
fn mask_validity(self, mask: &Mask) -> Self {
71-
let (data, validity) = self.into_parts();
72-
// SAFETY: we are preserving the original data buffer and only modifying the validity.
73-
unsafe { Self::new_unchecked(data, validity.bitand(mask)) }
72+
let (ps, elements, validity) = self.into_parts();
73+
// SAFETY: we are preserving the original elements buffer and only modifying the validity.
74+
unsafe { Self::new_unchecked(ps, elements, validity.bitand(mask)) }
7475
}
7576
}
7677

@@ -82,6 +83,15 @@ impl<T: BinaryViewType> MaskValidity for BinaryViewVector<T> {
8283
}
8384
}
8485

86+
impl MaskValidity for ListViewVector {
87+
fn mask_validity(self, mask: &Mask) -> Self {
88+
let (elements, offsets, sizes, validity) = self.into_parts();
89+
// SAFETY: we are preserving the original elements and `list_size`, only modifying the
90+
// validity.
91+
unsafe { Self::new_unchecked(elements, offsets, sizes, validity.bitand(mask)) }
92+
}
93+
}
94+
8595
impl MaskValidity for FixedSizeListVector {
8696
fn mask_validity(self, mask: &Mask) -> Self {
8797
let (elements, list_size, validity) = self.into_parts();

vortex-vector/src/fixed_size_list/vector.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -149,22 +149,24 @@ impl VectorOps for FixedSizeListVector {
149149
&self.validity
150150
}
151151

152-
fn try_into_mut(self) -> Result<Self::Mutable, Self>
152+
fn try_into_mut(self) -> Result<FixedSizeListVectorMut, Self>
153153
where
154154
Self: Sized,
155155
{
156156
let len = self.len;
157157
let list_size = self.list_size;
158158

159+
// Try to unwrap the `Arc`.
159160
let elements = match Arc::try_unwrap(self.elements) {
160161
Ok(elements) => elements,
161-
Err(elements) => return Err(FixedSizeListVector { elements, ..self }),
162+
Err(elements) => return Err(Self { elements, ..self }),
162163
};
163164

165+
// Try to make validity mutable.
164166
let validity = match self.validity.try_into_mut() {
165167
Ok(validity) => validity,
166168
Err(validity) => {
167-
return Err(FixedSizeListVector {
169+
return Err(Self {
168170
elements: Arc::new(elements),
169171
list_size,
170172
validity,
@@ -173,14 +175,15 @@ impl VectorOps for FixedSizeListVector {
173175
}
174176
};
175177

178+
// Try to make the elements mutable.
176179
match elements.try_into_mut() {
177180
Ok(mutable_elements) => Ok(FixedSizeListVectorMut {
178181
elements: Box::new(mutable_elements),
179182
list_size,
180183
validity,
181184
len,
182185
}),
183-
Err(elements) => Err(FixedSizeListVector {
186+
Err(elements) => Err(Self {
184187
elements: Arc::new(elements),
185188
list_size,
186189
validity: validity.freeze(),

vortex-vector/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ pub mod binaryview;
1414
pub mod bool;
1515
pub mod decimal;
1616
pub mod fixed_size_list;
17+
pub mod listview;
1718
pub mod null;
1819
pub mod primitive;
1920
pub mod struct_;

vortex-vector/src/listview/mod.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! Definition and implementation of [`ListViewVector`] and [`ListViewVectorMut`].
5+
//!
6+
//! A [`ListViewVector`] represents a collection of variable-width lists, where each list can
7+
//! contain a different number of elements.
8+
//!
9+
//! The structure uses separate offset and size vectors to track the boundaries of each list
10+
//! within the flat elements array. This allows for efficient access to individual lists without
11+
//! copying data. This is similar to Apache Arrow's `ListView` type.
12+
13+
mod vector;
14+
pub use vector::ListViewVector;
15+
16+
mod vector_mut;
17+
pub use vector_mut::ListViewVectorMut;
18+
19+
use crate::{Vector, VectorMut};
20+
21+
impl From<ListViewVector> for Vector {
22+
fn from(v: ListViewVector) -> Self {
23+
Self::List(v)
24+
}
25+
}
26+
27+
impl From<ListViewVectorMut> for VectorMut {
28+
fn from(v: ListViewVectorMut) -> Self {
29+
Self::List(v)
30+
}
31+
}
Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! Definition and implementation of [`ListViewVector`].
5+
6+
use std::sync::Arc;
7+
8+
use vortex_error::{VortexExpect, VortexResult};
9+
use vortex_mask::Mask;
10+
11+
use super::ListViewVectorMut;
12+
use crate::Vector;
13+
use crate::ops::{VectorMutOps, VectorOps};
14+
use crate::primitive::PrimitiveVector;
15+
16+
/// A vector of variable-width lists.
17+
///
18+
/// Each list is defined by 2 integers: an offset and a size (a "list view"), which point into a
19+
/// child `elements` vector.
20+
///
21+
/// Note that the list views **do not** need to be sorted, nor do they have to be contiguous or
22+
/// fully cover the `elements` vector. This means that multiple views can be pointing to the same
23+
/// elements.
24+
///
25+
/// # Structure
26+
///
27+
/// - `elements`: The child vector of all list elements, stored as an [`Arc<Vector>`].
28+
/// - `offsets`: A [`PrimitiveVector`] containing the starting offset of each list in the `elements`
29+
/// vector.
30+
/// - `sizes`: A [`PrimitiveVector`] containing the size (number of elements) of each list.
31+
/// - `validity`: A [`Mask`] indicating which lists are null.
32+
#[derive(Debug, Clone)]
33+
pub struct ListViewVector {
34+
/// The child vector of elements.
35+
pub(super) elements: Arc<Vector>,
36+
37+
/// Offsets for each list into the elements vector.
38+
pub(super) offsets: PrimitiveVector,
39+
40+
/// Sizes (lengths) of each list.
41+
pub(super) sizes: PrimitiveVector,
42+
43+
/// The validity mask (where `true` represents a list is **not** null).
44+
///
45+
/// Note that the `elements` vector will have its own internal validity, denoting if individual
46+
/// list elements are null.
47+
pub(super) validity: Mask,
48+
49+
/// The length of the vector (which is the same as the length of the validity mask).
50+
///
51+
/// This is stored here as a convenience, as the validity also tracks this information.
52+
pub(super) len: usize,
53+
}
54+
55+
impl ListViewVector {
56+
/// Creates a new [`ListViewVector`] from its components.
57+
///
58+
/// # Panics
59+
///
60+
/// TODO
61+
pub fn new(
62+
elements: Arc<Vector>,
63+
offsets: PrimitiveVector,
64+
sizes: PrimitiveVector,
65+
validity: Mask,
66+
) -> Self {
67+
Self::try_new(elements, offsets, sizes, validity)
68+
.vortex_expect("Invalid ListViewVector construction")
69+
}
70+
71+
/// Attempts to create a new [`ListViewVector`] from its components.
72+
///
73+
/// # Errors
74+
///
75+
/// TODO
76+
pub fn try_new(
77+
_elements: Arc<Vector>,
78+
_offsets: PrimitiveVector,
79+
_sizes: PrimitiveVector,
80+
_validity: Mask,
81+
) -> VortexResult<Self> {
82+
todo!()
83+
}
84+
85+
/// Creates a new [`ListViewVector`] without validation.
86+
///
87+
/// # Safety
88+
///
89+
/// TODO
90+
pub unsafe fn new_unchecked(
91+
elements: Arc<Vector>,
92+
offsets: PrimitiveVector,
93+
sizes: PrimitiveVector,
94+
validity: Mask,
95+
) -> Self {
96+
let len = validity.len();
97+
98+
if cfg!(debug_assertions) {
99+
Self::new(elements, offsets, sizes, validity)
100+
} else {
101+
Self {
102+
elements,
103+
offsets,
104+
sizes,
105+
validity,
106+
len,
107+
}
108+
}
109+
}
110+
111+
/// Decomposes the [`ListViewVector`] into its constituent parts (child elements, offsets,
112+
/// sizes, and validity).
113+
pub fn into_parts(self) -> (Arc<Vector>, PrimitiveVector, PrimitiveVector, Mask) {
114+
(self.elements, self.offsets, self.sizes, self.validity)
115+
}
116+
117+
/// Returns a reference to the `elements` vector.
118+
#[inline]
119+
pub fn elements(&self) -> &Vector {
120+
&self.elements
121+
}
122+
123+
/// Returns a reference to the integer `offsets` vector.
124+
#[inline]
125+
pub fn offsets(&self) -> &PrimitiveVector {
126+
&self.offsets
127+
}
128+
129+
/// Returns a reference to the integer `sizes` vector.
130+
#[inline]
131+
pub fn sizes(&self) -> &PrimitiveVector {
132+
&self.sizes
133+
}
134+
}
135+
136+
impl VectorOps for ListViewVector {
137+
type Mutable = ListViewVectorMut;
138+
139+
fn len(&self) -> usize {
140+
self.len
141+
}
142+
143+
fn validity(&self) -> &Mask {
144+
&self.validity
145+
}
146+
147+
fn try_into_mut(self) -> Result<ListViewVectorMut, Self> {
148+
// Try to unwrap the `Arc`.
149+
let elements = match Arc::try_unwrap(self.elements) {
150+
Ok(elements) => elements,
151+
Err(elements) => return Err(Self { elements, ..self }),
152+
};
153+
154+
// Try to make the validity mutable.
155+
let validity = match self.validity.try_into_mut() {
156+
Ok(v) => v,
157+
Err(validity) => {
158+
return Err(Self {
159+
elements: Arc::new(elements),
160+
validity,
161+
..self
162+
});
163+
}
164+
};
165+
166+
// Try to make the offsets mutable.
167+
let offsets = match self.offsets.try_into_mut() {
168+
Ok(mutable_offsets) => mutable_offsets,
169+
Err(offsets) => {
170+
return Err(Self {
171+
offsets,
172+
sizes: self.sizes,
173+
elements: Arc::new(elements),
174+
validity: validity.freeze(),
175+
len: self.len,
176+
});
177+
}
178+
};
179+
180+
// Try to make the sizes mutable.
181+
let sizes = match self.sizes.try_into_mut() {
182+
Ok(mutable_sizes) => mutable_sizes,
183+
Err(sizes) => {
184+
return Err(Self {
185+
offsets: offsets.freeze(),
186+
sizes,
187+
elements: Arc::new(elements),
188+
validity: validity.freeze(),
189+
len: self.len,
190+
});
191+
}
192+
};
193+
194+
// Try to make the elements mutable.
195+
match elements.try_into_mut() {
196+
Ok(mut_elements) => Ok(ListViewVectorMut {
197+
offsets,
198+
sizes,
199+
elements: Box::new(mut_elements),
200+
validity,
201+
len: self.len,
202+
}),
203+
Err(elements) => Err(Self {
204+
offsets: offsets.freeze(),
205+
sizes: sizes.freeze(),
206+
elements: Arc::new(elements),
207+
validity: validity.freeze(),
208+
len: self.len,
209+
}),
210+
}
211+
}
212+
}

0 commit comments

Comments
 (0)