Skip to content

Commit e0df94a

Browse files
committed
feat: OperatorVTable::bind for VarBin/VarBinView
Signed-off-by: Andrew Duffy <[email protected]>
1 parent 84b2f96 commit e0df94a

File tree

9 files changed

+304
-26
lines changed

9 files changed

+304
-26
lines changed

vortex-array/src/arrays/varbin/vtable/mod.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use crate::{EncodingId, EncodingRef, vtable};
88
mod array;
99
mod canonical;
1010
mod operations;
11+
mod operator;
1112
mod serde;
1213
mod validity;
1314
mod visitor;
@@ -24,7 +25,7 @@ impl VTable for VarBinVTable {
2425
type VisitorVTable = Self;
2526
type ComputeVTable = NotSupported;
2627
type EncodeVTable = NotSupported;
27-
type OperatorVTable = NotSupported;
28+
type OperatorVTable = Self;
2829
type SerdeVTable = Self;
2930

3031
fn id(_encoding: &Self::Encoding) -> EncodingId {
Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use std::marker::PhantomData;
5+
use std::sync::Arc;
6+
7+
use num_traits::ToPrimitive;
8+
use vortex_buffer::{Buffer, BufferMut, ByteBuffer};
9+
use vortex_compute::filter::Filter;
10+
use vortex_dtype::{DType, PTypeDowncastExt, match_each_integer_ptype};
11+
use vortex_error::{VortexExpect, VortexResult};
12+
use vortex_vector::Vector;
13+
use vortex_vector::binaryview::{
14+
BinaryType, BinaryView, BinaryViewType, BinaryViewVector, StringType,
15+
};
16+
17+
use crate::ArrayRef;
18+
use crate::arrays::{VarBinArray, VarBinVTable};
19+
use crate::execution::{BatchKernel, BatchKernelRef, BindCtx, MaskExecution};
20+
use crate::vtable::{OperatorVTable, ValidityHelper};
21+
22+
impl OperatorVTable<VarBinVTable> for VarBinVTable {
23+
fn bind(
24+
array: &VarBinArray,
25+
selection: Option<&ArrayRef>,
26+
ctx: &mut dyn BindCtx,
27+
) -> VortexResult<BatchKernelRef> {
28+
let mask = ctx.bind_selection(array.len(), selection)?;
29+
let validity = ctx.bind_validity(array.validity(), array.len(), selection)?;
30+
let offsets = ctx.bind(array.offsets(), None)?;
31+
32+
match array.dtype() {
33+
DType::Utf8(_) => Ok(Box::new(VarBinKernel::<StringType>::new(
34+
offsets,
35+
array.bytes().clone(),
36+
validity,
37+
mask,
38+
))),
39+
DType::Binary(_) => Ok(Box::new(VarBinKernel::<BinaryType>::new(
40+
offsets,
41+
array.bytes().clone(),
42+
validity,
43+
mask,
44+
))),
45+
_ => unreachable!("invalid DType for VarBinArray {}", array.dtype()),
46+
}
47+
}
48+
}
49+
50+
struct VarBinKernel<V> {
51+
offsets: BatchKernelRef,
52+
bytes: ByteBuffer,
53+
validity: MaskExecution,
54+
selection: MaskExecution,
55+
_type: PhantomData<V>,
56+
}
57+
58+
impl<V> VarBinKernel<V> {
59+
fn new(
60+
offsets: BatchKernelRef,
61+
bytes: ByteBuffer,
62+
validity: MaskExecution,
63+
selection: MaskExecution,
64+
) -> Self {
65+
Self {
66+
offsets,
67+
bytes,
68+
validity,
69+
selection,
70+
_type: PhantomData,
71+
}
72+
}
73+
}
74+
75+
impl<V: BinaryViewType> BatchKernel for VarBinKernel<V> {
76+
fn execute(self: Box<Self>) -> VortexResult<Vector> {
77+
let offsets = self.offsets.execute()?.into_primitive();
78+
79+
match_each_integer_ptype!(offsets.ptype(), |T| {
80+
let pvec = offsets.downcast::<T>();
81+
// NOTE: discard the validity because offsets must be non-nullable
82+
let (offsets, _) = pvec.into_parts();
83+
let first = offsets[0];
84+
85+
let lens: Buffer<u32> = offsets
86+
.iter()
87+
.copied()
88+
.skip(1)
89+
.scan(first, |prev, next| {
90+
let len = (next - *prev)
91+
.to_u32()
92+
.vortex_expect("offset must map to u32");
93+
*prev = next;
94+
Some(len)
95+
})
96+
.collect();
97+
98+
let mut views = BufferMut::with_capacity(lens.len());
99+
100+
for (offset, len) in std::iter::zip(offsets, lens) {
101+
let offset = offset.to_u32().vortex_expect("offset must fit in u32");
102+
let bytes = &self.bytes[offset as usize..(offset + len) as usize];
103+
let view = if len as usize <= BinaryView::MAX_INLINED_SIZE {
104+
BinaryView::new_inlined(bytes)
105+
} else {
106+
BinaryView::make_view(bytes, 0, offset)
107+
};
108+
views.push(view);
109+
}
110+
111+
let selection = self.selection.execute()?;
112+
let validity = self.validity.execute()?;
113+
114+
let views = views.freeze().filter(&selection);
115+
116+
Ok(Vector::from(BinaryViewVector::<V>::new(
117+
views,
118+
Arc::new([self.bytes.clone()]),
119+
validity,
120+
)))
121+
})
122+
}
123+
}
124+
125+
#[cfg(test)]
126+
mod tests {
127+
use rstest::{fixture, rstest};
128+
use vortex_dtype::{DType, Nullability};
129+
130+
use crate::IntoArray;
131+
use crate::arrays::builder::VarBinBuilder;
132+
use crate::arrays::{BoolArray, VarBinArray};
133+
134+
#[fixture]
135+
fn strings() -> VarBinArray {
136+
let mut strings = VarBinBuilder::<u32>::with_capacity(5);
137+
strings.append_value("inlined");
138+
strings.append_null();
139+
strings.append_value("large string 1");
140+
strings.append_value("large string 2");
141+
strings.append_value("large string 3");
142+
strings.finish(DType::Utf8(Nullability::Nullable))
143+
}
144+
145+
#[rstest]
146+
fn test_bind(strings: VarBinArray) {
147+
// Attempt to bind with a full selection.
148+
let strings_vec = strings
149+
.bind(None, &mut ())
150+
.unwrap()
151+
.execute()
152+
.unwrap()
153+
.into_string();
154+
assert_eq!(strings_vec.get(0), Some("inlined"));
155+
assert_eq!(strings_vec.get(1), None);
156+
assert_eq!(strings_vec.get(2), Some("large string 1"));
157+
assert_eq!(strings_vec.get(3), Some("large string 2"));
158+
assert_eq!(strings_vec.get(4), Some("large string 3"));
159+
}
160+
161+
#[rstest]
162+
fn test_bind_with_selection(strings: VarBinArray) {
163+
let selection = BoolArray::from_iter([false, true, false, true, true]).into_array();
164+
let strings_vec = strings
165+
.bind(Some(&selection), &mut ())
166+
.unwrap()
167+
.execute()
168+
.unwrap()
169+
.into_string();
170+
171+
assert_eq!(strings_vec.get(0), None);
172+
assert_eq!(strings_vec.get(1), Some("large string 2"));
173+
assert_eq!(strings_vec.get(2), Some("large string 3"));
174+
}
175+
}

vortex-array/src/arrays/varbinview/vtable/mod.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use crate::{EncodingId, EncodingRef, vtable};
88
mod array;
99
mod canonical;
1010
mod operations;
11+
mod operator;
1112
mod serde;
1213
mod validity;
1314
mod visitor;
@@ -25,7 +26,7 @@ impl VTable for VarBinViewVTable {
2526
type VisitorVTable = Self;
2627
type ComputeVTable = NotSupported;
2728
type EncodeVTable = NotSupported;
28-
type OperatorVTable = NotSupported;
29+
type OperatorVTable = Self;
2930
type SerdeVTable = Self;
3031

3132
fn id(_encoding: &Self::Encoding) -> EncodingId {
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use vortex_buffer::Buffer;
5+
use vortex_compute::filter::Filter;
6+
use vortex_dtype::DType;
7+
use vortex_error::VortexResult;
8+
use vortex_vector::Vector;
9+
use vortex_vector::binaryview::{BinaryVector, BinaryView, BinaryViewTypeUpcast, StringVector};
10+
11+
use crate::ArrayRef;
12+
use crate::arrays::{VarBinViewArray, VarBinViewVTable};
13+
use crate::execution::{BatchKernelRef, BindCtx, kernel};
14+
use crate::vtable::{OperatorVTable, ValidityHelper};
15+
16+
impl OperatorVTable<VarBinViewVTable> for VarBinViewVTable {
17+
fn bind(
18+
array: &VarBinViewArray,
19+
selection: Option<&ArrayRef>,
20+
ctx: &mut dyn BindCtx,
21+
) -> VortexResult<BatchKernelRef> {
22+
let mask = ctx.bind_selection(array.len(), selection)?;
23+
let validity = ctx.bind_validity(array.validity(), array.len(), selection)?;
24+
let dtype = array.dtype().clone();
25+
26+
let views = array.views().clone();
27+
let buffers = array.buffers().clone();
28+
29+
Ok(kernel(move || {
30+
let selection = mask.execute()?;
31+
let validity = validity.execute()?;
32+
33+
// We only filter the views buffer
34+
let views = views.filter(&selection);
35+
let views = Buffer::<BinaryView>::from_byte_buffer(views.into_byte_buffer());
36+
37+
match dtype {
38+
// SAFETY: the incoming array has the same validation as the vector
39+
DType::Utf8(_) => Ok(Vector::from_string(unsafe {
40+
StringVector::new_unchecked(views, buffers, validity)
41+
})),
42+
43+
// SAFETY: the incoming array has the same validation as the vector
44+
DType::Binary(_) => Ok(Vector::from_binary(unsafe {
45+
BinaryVector::new_unchecked(views, buffers, validity)
46+
})),
47+
_ => unreachable!("invalid dtype for VarBinViewArray {dtype}"),
48+
}
49+
}))
50+
}
51+
}
52+
53+
#[cfg(test)]
54+
mod tests {
55+
use rstest::{fixture, rstest};
56+
use vortex_dtype::{DType, Nullability};
57+
58+
use crate::IntoArray;
59+
use crate::arrays::{BoolArray, VarBinViewArray};
60+
use crate::builders::{ArrayBuilder, VarBinViewBuilder};
61+
62+
#[fixture]
63+
fn strings() -> VarBinViewArray {
64+
let mut strings = VarBinViewBuilder::with_capacity(DType::Utf8(Nullability::Nullable), 5);
65+
strings.append_value("inlined");
66+
strings.append_nulls(1);
67+
strings.append_value("large string 1");
68+
strings.append_value("large string 2");
69+
strings.append_value("large string 3");
70+
strings.finish_into_varbinview()
71+
}
72+
73+
#[rstest]
74+
fn test_bind(strings: VarBinViewArray) {
75+
// Attempt to bind with a full selection.
76+
let strings_vec = strings
77+
.bind(None, &mut ())
78+
.unwrap()
79+
.execute()
80+
.unwrap()
81+
.into_string();
82+
assert_eq!(strings_vec.get(0), Some("inlined"));
83+
assert_eq!(strings_vec.get(1), None);
84+
assert_eq!(strings_vec.get(2), Some("large string 1"));
85+
assert_eq!(strings_vec.get(3), Some("large string 2"));
86+
assert_eq!(strings_vec.get(4), Some("large string 3"));
87+
}
88+
89+
#[rstest]
90+
fn test_bind_with_selection(strings: VarBinViewArray) {
91+
let selection = BoolArray::from_iter([false, true, false, true, true]).into_array();
92+
let strings_vec = strings
93+
.bind(Some(&selection), &mut ())
94+
.unwrap()
95+
.execute()
96+
.unwrap()
97+
.into_string();
98+
99+
assert_eq!(strings_vec.get(0), None);
100+
assert_eq!(strings_vec.get(1), Some("large string 2"));
101+
assert_eq!(strings_vec.get(2), Some("large string 3"));
102+
}
103+
}

vortex-vector/src/binaryview/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
pub use types::*;
1212
pub use vector::*;
1313
pub use vector_mut::*;
14+
pub use view::*;
1415
use vortex_error::vortex_panic;
1516

1617
use crate::{Vector, VectorMut};

vortex-vector/src/binaryview/types.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ impl<T: BinaryViewType> From<BinaryViewVectorMut<T>> for VectorMut {
2121
}
2222

2323
/// Trait to mark supported binary view types.
24-
pub trait BinaryViewType: Debug + Sized + private::Sealed {
24+
pub trait BinaryViewType: Debug + Sized + Send + Sync + 'static + private::Sealed {
2525
/// The slice type for this variable binary type.
2626
type Slice: ?Sized + AsRef<[u8]>;
2727

0 commit comments

Comments
 (0)