Skip to content

Commit b10f3aa

Browse files
committed
add scaffolding for FixedSizeListArray::bind
We can't actually implement this right now without some sort of way to expand the mask. Signed-off-by: Connor Tsui <[email protected]>
1 parent 423b93f commit b10f3aa

File tree

5 files changed

+96
-8
lines changed

5 files changed

+96
-8
lines changed

vortex-array/src/arrays/fixed_size_list/vtable/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use crate::{EncodingId, EncodingRef, vtable};
88
mod array;
99
mod canonical;
1010
mod operations;
11+
mod operator;
1112
mod serde;
1213
mod validity;
1314
mod visitor;
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use std::sync::Arc;
5+
6+
use vortex_error::{VortexExpect, VortexResult};
7+
use vortex_vector::Vector;
8+
use vortex_vector::fixed_size_list::FixedSizeListVector;
9+
10+
use crate::ArrayRef;
11+
use crate::arrays::{FixedSizeListArray, FixedSizeListVTable};
12+
use crate::execution::{BatchKernelRef, BindCtx, kernel};
13+
use crate::vtable::{OperatorVTable, ValidityHelper};
14+
15+
impl OperatorVTable<FixedSizeListVTable> for FixedSizeListVTable {
16+
fn bind(
17+
array: &FixedSizeListArray,
18+
selection: Option<&ArrayRef>,
19+
ctx: &mut dyn BindCtx,
20+
) -> VortexResult<BatchKernelRef> {
21+
let validity = ctx.bind_validity(array.validity(), array.len(), selection)?;
22+
23+
let list_size = array.list_size();
24+
let elem_dtype = array
25+
.dtype()
26+
.as_fixed_size_list_element_opt()
27+
.vortex_expect("`FixedSizeListArray` `DType` was somehow not `FixedSizeList`")
28+
.clone();
29+
30+
// TODO(connor): Should we raise an error if a child kernel returns a data-full `elements`?
31+
// Technically nothing bad will happen if we don't because of this edge case handling below.
32+
33+
// If the size of each list is 0, then we know that the child elements must empty. Even if
34+
// the child kernel incorrectly gives us some data, we can (correctly) just throw it away.
35+
if list_size == 0 {
36+
return Ok(kernel(move || {
37+
let elements = Vector::empty(&elem_dtype);
38+
let validity_mask = validity.execute()?;
39+
40+
Ok(
41+
FixedSizeListVector::try_new(Arc::new(elements), list_size, validity_mask)?
42+
.into(),
43+
)
44+
}));
45+
}
46+
47+
// Bind the child elements by "expanding" the selection mask out by `list_size` so that we
48+
// correctly select all of the child elements we need.
49+
let expanded_selection = expand_selection(selection, list_size);
50+
let elements_kernel = ctx.bind(array.elements(), expanded_selection.as_ref())?;
51+
52+
Ok(kernel(move || {
53+
if list_size != 0 {
54+
let elements = elements_kernel.execute()?;
55+
let validity_mask = validity.execute()?;
56+
57+
Ok(
58+
FixedSizeListVector::try_new(Arc::new(elements), list_size, validity_mask)?
59+
.into(),
60+
)
61+
} else {
62+
todo!()
63+
}
64+
}))
65+
}
66+
}
67+
68+
/// Takes a selection mask and "expands" it out by duplicating each bit `list_size` times.
69+
///
70+
/// If `selection` is not `None`, the output array is guaranteed to have
71+
/// `selection.len() * list_size` total bits.
72+
fn expand_selection(_selection: Option<&ArrayRef>, _list_size: u32) -> Option<ArrayRef> {
73+
todo!(
74+
"TODO(connor)[FixedSizeList]: We need some sort of `ExpandArray` that takes the bits and
75+
duplicates them, this would be similar to a:
76+
`RunEndArray(selection_mask, ends=Constant(list_size)`
77+
(but without depending on the `vortex-runend` encoding crate"
78+
)
79+
}

vortex-vector/src/listview/vector_mut.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ impl ListViewVectorMut {
178178
///
179179
/// TODO figure out how to set offsets and sizes type?
180180
pub fn with_capacity(_element_dtype: &DType, _capacity: usize) -> Self {
181-
todo!("ListViewVectorMut::with_capacity")
181+
todo!("TODO(connor)[ListView]")
182182
}
183183

184184
/// Decomposes the [`ListViewVectorMut`] into its constituent parts (child elements, offsets,

vortex-vector/src/vector.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
//!
77
//! [`Vector`] can be transformed into the [`VectorMut`] type if it is owned.
88
9+
use vortex_dtype::DType;
910
use vortex_error::vortex_panic;
1011

1112
use crate::binaryview::{BinaryVector, StringVector};
@@ -16,7 +17,7 @@ use crate::listview::ListViewVector;
1617
use crate::null::NullVector;
1718
use crate::primitive::PrimitiveVector;
1819
use crate::struct_::StructVector;
19-
use crate::{VectorMut, VectorOps, match_each_vector};
20+
use crate::{VectorMut, VectorMutOps, VectorOps, match_each_vector};
2021

2122
/// An enum over all kinds of immutable vectors, which represent fully decompressed (canonical)
2223
/// array data.
@@ -78,6 +79,13 @@ impl VectorOps for Vector {
7879
}
7980

8081
impl Vector {
82+
/// Returns an empty `Vector` according to the given `DType`.
83+
pub fn empty(dtype: &DType) -> Self {
84+
// We _could_ manually implement this for `Vector` instead of doing this via `VectorMut` and
85+
// `freeze`, but it's probably not worth it.
86+
VectorMut::with_capacity(dtype, 0).freeze()
87+
}
88+
8189
/// Returns a reference to the inner [`NullVector`] if `self` is of that variant.
8290
pub fn as_null(&self) -> &NullVector {
8391
if let Vector::Null(v) = self {

vortex-vector/src/vector_mut.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,19 +70,19 @@ impl VectorMut {
7070
DType::Primitive(ptype, _) => {
7171
PrimitiveVectorMut::with_capacity(*ptype, capacity).into()
7272
}
73+
DType::Decimal(decimal_dtype, _) => {
74+
DecimalVectorMut::with_capacity(decimal_dtype, capacity).into()
75+
}
76+
DType::Utf8(..) => StringVectorMut::with_capacity(capacity).into(),
77+
DType::Binary(..) => BinaryVectorMut::with_capacity(capacity).into(),
78+
DType::List(..) => ListViewVectorMut::with_capacity(dtype, capacity).into(),
7379
DType::FixedSizeList(elem_dtype, list_size, _) => {
7480
FixedSizeListVectorMut::with_capacity(elem_dtype, *list_size, capacity).into()
7581
}
7682
DType::Struct(struct_fields, _) => {
7783
StructVectorMut::with_capacity(struct_fields, capacity).into()
7884
}
79-
DType::Decimal(decimal_dtype, _) => {
80-
DecimalVectorMut::with_capacity(decimal_dtype, capacity).into()
81-
}
82-
DType::Utf8(..) => StringVectorMut::with_capacity(capacity).into(),
83-
DType::Binary(..) => BinaryVectorMut::with_capacity(capacity).into(),
8485
DType::Extension(ext) => VectorMut::with_capacity(ext.storage_dtype(), capacity),
85-
DType::List(..) => ListViewVectorMut::with_capacity(dtype, capacity).into(),
8686
}
8787
}
8888
}

0 commit comments

Comments
 (0)