Skip to content

Commit 625273e

Browse files
authored
Feature: FixedSizeListVector (#5127)
Tracking Issue: #5028 --------- Signed-off-by: Connor Tsui <[email protected]>
1 parent 7d786bd commit 625273e

File tree

12 files changed

+978
-8
lines changed

12 files changed

+978
-8
lines changed
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use std::sync::Arc;
5+
6+
use arrow_array::{ArrayRef, FixedSizeListArray};
7+
use arrow_schema::Field;
8+
use vortex_error::VortexResult;
9+
use vortex_vector::FixedSizeListVector;
10+
11+
use crate::arrow::IntoArrow;
12+
13+
impl IntoArrow<ArrayRef> for FixedSizeListVector {
14+
fn into_arrow(self) -> VortexResult<ArrayRef> {
15+
let (elements, list_size, validity) = self.into_parts();
16+
17+
let converted_elements = elements.as_ref().clone().into_arrow()?;
18+
let field = Arc::new(Field::new_list_field(
19+
converted_elements.data_type().clone(),
20+
true, // Vectors are always nullable.
21+
));
22+
23+
Ok(Arc::new(FixedSizeListArray::try_new(
24+
field,
25+
list_size as i32,
26+
converted_elements,
27+
validity.into_arrow()?,
28+
)?))
29+
}
30+
}

vortex-compute/src/arrow/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use vortex_error::VortexResult;
88
mod binaryview;
99
mod bool;
1010
mod decimal;
11+
mod fixed_size_list;
1112
mod mask;
1213
mod null;
1314
mod primitive;

vortex-compute/src/mask/mod.rs

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ use std::ops::BitAnd;
88
use vortex_dtype::{NativeDecimalType, NativePType};
99
use vortex_mask::Mask;
1010
use vortex_vector::{
11-
BinaryViewType, BinaryViewVector, BoolVector, DVector, DecimalVector, NullVector, PVector,
12-
PrimitiveVector, StructVector, Vector, match_each_dvector, match_each_pvector,
13-
match_each_vector,
11+
BinaryViewType, BinaryViewVector, BoolVector, DVector, DecimalVector, FixedSizeListVector,
12+
NullVector, PVector, PrimitiveVector, StructVector, Vector, match_each_dvector,
13+
match_each_pvector, match_each_vector,
1414
};
1515

1616
/// Trait for masking the validity of an array or vector.
@@ -79,6 +79,15 @@ impl<T: BinaryViewType> MaskValidity for BinaryViewVector<T> {
7979
}
8080
}
8181

82+
impl MaskValidity for FixedSizeListVector {
83+
fn mask_validity(self, mask: &Mask) -> Self {
84+
let (elements, list_size, validity) = self.into_parts();
85+
// SAFETY: we are preserving the original elements and `list_size`, only modifying the
86+
// validity.
87+
unsafe { Self::new_unchecked(elements, list_size, validity.bitand(mask)) }
88+
}
89+
}
90+
8291
impl MaskValidity for StructVector {
8392
fn mask_validity(self, mask: &Mask) -> Self {
8493
let (fields, validity) = self.into_parts();
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! Definition and implementation of [`FixedSizeListVector`] and [`FixedSizeListVectorMut`].
5+
6+
mod vector;
7+
pub use vector::FixedSizeListVector;
8+
9+
mod vector_mut;
10+
pub use vector_mut::FixedSizeListVectorMut;
11+
12+
use crate::{Vector, VectorMut};
13+
14+
impl From<FixedSizeListVector> for Vector {
15+
fn from(v: FixedSizeListVector) -> Self {
16+
Self::FixedSizeList(v)
17+
}
18+
}
19+
20+
impl From<FixedSizeListVectorMut> for VectorMut {
21+
fn from(v: FixedSizeListVectorMut) -> Self {
22+
Self::FixedSizeList(v)
23+
}
24+
}
Lines changed: 305 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,305 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! Definition and implementation of [`FixedSizeListVector`].
5+
6+
use std::sync::Arc;
7+
8+
use vortex_error::{VortexExpect, VortexResult, vortex_ensure};
9+
use vortex_mask::Mask;
10+
11+
use crate::{FixedSizeListVectorMut, Vector, VectorOps};
12+
13+
/// An immutable vector of fixed-size lists.
14+
///
15+
/// `FixedSizeListVector` can be considered a borrowed / frozen version of
16+
/// [`FixedSizeListVectorMut`], which is created via the [`freeze`](crate::VectorMutOps::freeze)
17+
/// method.
18+
///
19+
/// See the documentation for [`FixedSizeListVectorMut`] for more information.
20+
#[derive(Debug, Clone)]
21+
pub struct FixedSizeListVector {
22+
/// The child vector of elements.
23+
pub(super) elements: Arc<Vector>,
24+
25+
/// The size of every list in the vector.
26+
pub(super) list_size: u32,
27+
28+
/// The validity mask (where `true` represents a list is **not** null).
29+
///
30+
/// Note that the `elements` vector will have its own internal validity, denoting if individual
31+
/// list elements are null.
32+
pub(super) validity: Mask,
33+
34+
/// The length of the vector (which is the same as the length of the validity mask).
35+
///
36+
/// This is stored here as a convenience, as the validity also tracks this information.
37+
pub(super) len: usize,
38+
}
39+
40+
impl FixedSizeListVector {
41+
/// Creates a new [`FixedSizeListVector`] from the given `elements` vector, size of each list,
42+
/// and validity mask.
43+
///
44+
/// # Panics
45+
///
46+
/// Panics if the length of the `validity` mask multiplied by the `list_size` is not
47+
/// equal to the length of the `elements` vector.
48+
///
49+
/// Put another way, the length of the `elements` vector divided by the `list_size` must be
50+
/// equal to the length of the validity, or this function will panic.
51+
pub fn new(elements: Arc<Vector>, list_size: u32, validity: Mask) -> Self {
52+
Self::try_new(elements, list_size, validity)
53+
.vortex_expect("Failed to create `FixedSizeListVector`")
54+
}
55+
56+
/// Tries to create a new [`FixedSizeListVector`] from the given `elements` vector, size of each
57+
/// list, and validity mask.
58+
///
59+
/// # Errors
60+
///
61+
/// Returns and error if the length of the `validity` mask multiplied by the `list_size` is not
62+
/// equal to the length of the `elements` vector.
63+
///
64+
/// Put another way, the length of the `elements` vector divided by the `list_size` must be
65+
/// equal to the length of the validity.
66+
pub fn try_new(elements: Arc<Vector>, list_size: u32, validity: Mask) -> VortexResult<Self> {
67+
let len = validity.len();
68+
let elements_len = elements.len();
69+
70+
if list_size == 0 {
71+
vortex_ensure!(
72+
elements.is_empty(),
73+
"A degenerate (`list_size == 0`) `FixedSizeListVector` should have no underlying elements",
74+
);
75+
} else {
76+
vortex_ensure!(
77+
list_size as usize * len == elements_len,
78+
"Tried to create a `FixedSizeListVector` of length {len} and list_size {list_size} \
79+
with an child vector of size {elements_len} ({list_size} * {len} != {elements_len})",
80+
);
81+
}
82+
83+
Ok(Self {
84+
elements,
85+
list_size,
86+
validity,
87+
len,
88+
})
89+
}
90+
91+
/// Tries to create a new [`FixedSizeListVector`] from the given `elements` vector, size of each
92+
/// list, and validity mask without validation.
93+
///
94+
/// # Safety
95+
///
96+
/// The caller must ensure that the length of the `validity` mask multiplied by the `list_size`
97+
/// is exactly equal to the length of the `elements` vector.
98+
pub unsafe fn new_unchecked(elements: Arc<Vector>, list_size: u32, validity: Mask) -> Self {
99+
let len = validity.len();
100+
101+
if cfg!(debug_assertions) {
102+
Self::new(elements, list_size, validity)
103+
} else {
104+
Self {
105+
elements,
106+
list_size,
107+
validity,
108+
len,
109+
}
110+
}
111+
}
112+
113+
/// Decomposes the `FixedSizeListVector` into its constituent parts (child elements, list size,
114+
/// and validity).
115+
pub fn into_parts(self) -> (Arc<Vector>, u32, Mask) {
116+
(self.elements, self.list_size, self.validity)
117+
}
118+
119+
/// Returns the child vector of elements, which represents the contiguous fixed-size lists of
120+
/// the `FixedSizeListVector`.
121+
pub fn elements(&self) -> &Arc<Vector> {
122+
&self.elements
123+
}
124+
125+
/// Returns the size of every list in the vector.
126+
pub fn list_size(&self) -> u32 {
127+
self.list_size
128+
}
129+
}
130+
131+
impl VectorOps for FixedSizeListVector {
132+
type Mutable = FixedSizeListVectorMut;
133+
134+
fn len(&self) -> usize {
135+
self.len
136+
}
137+
138+
fn validity(&self) -> &Mask {
139+
&self.validity
140+
}
141+
142+
fn try_into_mut(self) -> Result<Self::Mutable, Self>
143+
where
144+
Self: Sized,
145+
{
146+
let len = self.len;
147+
let list_size = self.list_size;
148+
149+
let elements = match Arc::try_unwrap(self.elements) {
150+
Ok(elements) => elements,
151+
Err(elements) => return Err(FixedSizeListVector { elements, ..self }),
152+
};
153+
154+
let validity = match self.validity.try_into_mut() {
155+
Ok(validity) => validity,
156+
Err(validity) => {
157+
return Err(FixedSizeListVector {
158+
elements: Arc::new(elements),
159+
list_size,
160+
validity,
161+
len,
162+
});
163+
}
164+
};
165+
166+
match elements.try_into_mut() {
167+
Ok(mutable_elements) => Ok(FixedSizeListVectorMut {
168+
elements: Box::new(mutable_elements),
169+
list_size,
170+
validity,
171+
len,
172+
}),
173+
Err(elements) => Err(FixedSizeListVector {
174+
elements: Arc::new(elements),
175+
list_size,
176+
validity: validity.freeze(),
177+
len,
178+
}),
179+
}
180+
}
181+
}
182+
183+
#[cfg(test)]
184+
mod tests {
185+
use std::sync::Arc;
186+
187+
use vortex_mask::Mask;
188+
189+
use super::*;
190+
use crate::{PVectorMut, Vector, VectorMutOps};
191+
192+
#[test]
193+
fn test_constructor_and_validation() {
194+
// Valid construction with new().
195+
let elements: Arc<Vector> = Arc::new(
196+
PVectorMut::<i32>::from_iter([1, 2, 3, 4, 5, 6])
197+
.freeze()
198+
.into(),
199+
);
200+
let validity = Mask::new_true(2);
201+
let vec = FixedSizeListVector::new(elements.clone(), 3, validity.clone());
202+
assert_eq!(vec.len(), 2);
203+
assert_eq!(vec.list_size(), 3);
204+
205+
// Valid construction with try_new().
206+
let result = FixedSizeListVector::try_new(elements.clone(), 3, validity);
207+
assert!(result.is_ok());
208+
assert_eq!(result.unwrap().len(), 2);
209+
210+
// Length mismatch error - elements length != list_size * validity length.
211+
let bad_validity = Mask::new_true(3); // Should be 2 for 6 elements with list_size=3.
212+
let result = FixedSizeListVector::try_new(elements.clone(), 3, bad_validity);
213+
assert!(result.is_err());
214+
215+
// Degenerate case (list_size = 0) with empty elements is valid.
216+
let empty_elements: Arc<Vector> = Arc::new(
217+
PVectorMut::<i32>::from_iter(Vec::<i32>::new())
218+
.freeze()
219+
.into(),
220+
);
221+
let validity = Mask::new_true(5);
222+
let result = FixedSizeListVector::try_new(empty_elements, 0, validity);
223+
assert!(result.is_ok());
224+
let vec = result.unwrap();
225+
assert_eq!(vec.len(), 5);
226+
assert_eq!(vec.list_size(), 0);
227+
228+
// Degenerate case with non-empty elements should fail.
229+
let result = FixedSizeListVector::try_new(elements, 0, Mask::new_true(1));
230+
assert!(result.is_err());
231+
232+
// Test unsafe new_unchecked in debug mode (it should still validate).
233+
let elements: Arc<Vector> =
234+
Arc::new(PVectorMut::<i32>::from_iter([1, 2, 3, 4]).freeze().into());
235+
let validity = Mask::new_true(2);
236+
let vec = unsafe { FixedSizeListVector::new_unchecked(elements, 2, validity) };
237+
assert_eq!(vec.len(), 2);
238+
assert_eq!(vec.list_size(), 2);
239+
}
240+
241+
#[test]
242+
fn test_try_into_mut_conversion() {
243+
// Create a vector that we solely own.
244+
let elements: Arc<Vector> = Arc::new(
245+
PVectorMut::<i32>::from_iter([1, 2, 3, 4, 5, 6])
246+
.freeze()
247+
.into(),
248+
);
249+
let validity = Mask::new_true(2);
250+
let vec = FixedSizeListVector::new(elements, 3, validity);
251+
252+
// Successful conversion when solely owned.
253+
let result = vec.try_into_mut();
254+
assert!(result.is_ok());
255+
let mut_vec = result.unwrap();
256+
assert_eq!(mut_vec.len(), 2);
257+
assert_eq!(mut_vec.list_size(), 3);
258+
259+
// Freeze and try again - roundtrip test.
260+
let vec = mut_vec.freeze();
261+
let result = vec.try_into_mut();
262+
assert!(result.is_ok());
263+
264+
// Test failed conversion with shared ownership.
265+
let elements: Arc<Vector> =
266+
Arc::new(PVectorMut::<i32>::from_iter([1, 2, 3, 4]).freeze().into());
267+
let validity = Mask::new_true(2);
268+
let vec = FixedSizeListVector::new(elements, 2, validity);
269+
270+
// Keep a clone to maintain shared ownership.
271+
let _shared = vec.clone();
272+
273+
let result = vec.try_into_mut();
274+
assert!(result.is_err());
275+
276+
// The error case should return the original vector.
277+
if let Err(returned_vec) = result {
278+
assert_eq!(returned_vec.len(), 2);
279+
assert_eq!(returned_vec.list_size(), 2);
280+
}
281+
}
282+
283+
#[test]
284+
fn test_accessors_and_parts() {
285+
let elements: Arc<Vector> = Arc::new(
286+
PVectorMut::<i32>::from_iter([1, 2, 3, 4, 5, 6])
287+
.freeze()
288+
.into(),
289+
);
290+
let validity = Mask::new_true(3);
291+
let vec = FixedSizeListVector::new(elements, 2, validity);
292+
293+
// Test accessors.
294+
assert_eq!(vec.len(), 3);
295+
assert_eq!(vec.list_size(), 2);
296+
assert_eq!(vec.elements().len(), 6);
297+
assert_eq!(vec.validity().true_count(), 3);
298+
299+
// Test into_parts.
300+
let (parts_elements, list_size, parts_validity) = vec.into_parts();
301+
assert_eq!(parts_elements.len(), 6);
302+
assert_eq!(list_size, 2);
303+
assert_eq!(parts_validity.true_count(), 3);
304+
}
305+
}

0 commit comments

Comments
 (0)