Skip to content

Commit 78d86e3

Browse files
committed
feat: rest of BinaryViewVector{,Mut}
Signed-off-by: Andrew Duffy <[email protected]>
1 parent 8945cd8 commit 78d86e3

File tree

15 files changed

+744
-272
lines changed

15 files changed

+744
-272
lines changed

Cargo.lock

Lines changed: 7 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vortex-buffer/src/buffer.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,12 @@ impl<T> Buffer<T> {
226226
unsafe { std::slice::from_raw_parts(self.bytes.as_ptr().cast(), self.length) }
227227
}
228228

229+
/// Return a view over the buffer as an opaque byte slice.
230+
#[inline(always)]
231+
pub fn as_bytes(&self) -> &[u8] {
232+
self.bytes.as_ref()
233+
}
234+
229235
/// Returns an iterator over the buffer of elements of type T.
230236
pub fn iter(&self) -> Iter<'_, T> {
231237
Iter {
@@ -319,7 +325,7 @@ impl<T> Buffer<T> {
319325

320326
/// Returns a slice of self that is equivalent to the given subset.
321327
///
322-
/// When processing the buffer you will often end up with &\[T\] that is a subset
328+
/// When processing the buffer you will often end up with `&[T]` that is a subset
323329
/// of the underlying buffer. This function turns the slice into a slice of the buffer
324330
/// it has been taken from.
325331
///
@@ -332,7 +338,7 @@ impl<T> Buffer<T> {
332338

333339
/// Returns a slice of self that is equivalent to the given subset.
334340
///
335-
/// When processing the buffer you will often end up with &\[T\] that is a subset
341+
/// When processing the buffer you will often end up with `&[T]` that is a subset
336342
/// of the underlying buffer. This function turns the slice into a slice of the buffer
337343
/// it has been taken from.
338344
///

vortex-compute/src/arrow/varbin.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@ use std::sync::Arc;
66
use arrow_array::{ArrayRef, GenericByteViewArray};
77
use vortex_buffer::Buffer;
88
use vortex_error::VortexResult;
9-
use vortex_vector::{BinaryType, StringType, VarBinVector};
9+
use vortex_vector::{BinaryType, BinaryViewVector, StringType};
1010

1111
use crate::arrow::IntoArrow;
1212

1313
macro_rules! impl_varbin {
1414
($T:ty, $A:ty) => {
15-
impl IntoArrow<ArrayRef> for VarBinVector<$T> {
15+
impl IntoArrow<ArrayRef> for BinaryViewVector<$T> {
1616
fn into_arrow(self) -> VortexResult<ArrayRef> {
1717
let (views, buffers, validity) = self.into_parts();
1818

vortex-compute/src/mask/mod.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@ use std::ops::BitAnd;
88
use vortex_dtype::{NativeDecimalType, NativePType};
99
use vortex_mask::Mask;
1010
use vortex_vector::{
11-
BoolVector, DVector, DecimalVector, NullVector, PVector, PrimitiveVector, StructVector,
12-
VarBinType, VarBinVector, Vector, match_each_dvector, match_each_pvector, match_each_vector,
11+
BinaryViewType, BinaryViewVector, BoolVector, DVector, DecimalVector, NullVector, PVector,
12+
PrimitiveVector, StructVector, Vector, match_each_dvector, match_each_pvector,
13+
match_each_vector,
1314
};
1415

1516
/// Trait for masking the validity of an array or vector.
@@ -70,7 +71,7 @@ impl<T: NativePType> MaskValidity for PVector<T> {
7071
}
7172
}
7273

73-
impl<T: VarBinType> MaskValidity for VarBinVector<T> {
74+
impl<T: BinaryViewType> MaskValidity for BinaryViewVector<T> {
7475
fn mask_validity(self, mask: &Mask) -> Self {
7576
let (views, buffers, validity) = self.into_parts();
7677
// SAFETY: we are preserving the original views and buffers, only modifying the validity.
Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4+
//! Definition and implementation of variable-length binary types.
5+
//!
6+
//! All types are specializations of the [`BinaryViewVector`] type, which is represented internally
7+
//! by `BinaryView`s. `BinaryView`s are identical to the `BinaryView` type defined by the Arrow
8+
//! [specification](https://arrow.apache.org/docs/format/Columnar.html#variable-size-binary-view-layout),
9+
//! which are inspired by "German" strings.
10+
411
pub use types::*;
512
pub use vector::*;
613
pub use vector_mut::*;
@@ -14,16 +21,16 @@ mod vector_mut;
1421
mod view;
1522

1623
/// Type alias for non-utf8 variable-length binary vectors.
17-
pub type BinaryVector = VarBinVector<BinaryType>;
24+
pub type BinaryVector = BinaryViewVector<BinaryType>;
1825
/// Type alias for mutable non-utf8 variable-length binary vectors.
19-
pub type BinaryVectorMut = VarBinVectorMut<BinaryType>;
26+
pub type BinaryVectorMut = BinaryViewVectorMut<BinaryType>;
2027
/// Type alias for UTF-8 variable-length string vectors.
21-
pub type StringVector = VarBinVector<StringType>;
28+
pub type StringVector = BinaryViewVector<StringType>;
2229
/// Type alias for mutable UTF-8 variable-length string vectors.
23-
pub type StringVectorMut = VarBinVectorMut<StringType>;
30+
pub type StringVectorMut = BinaryViewVectorMut<StringType>;
2431

25-
impl VarBinTypeDowncast for Vector {
26-
type Output<T: VarBinType> = VarBinVector<T>;
32+
impl BinaryViewDowncast for Vector {
33+
type Output<T: BinaryViewType> = BinaryViewVector<T>;
2734

2835
fn into_binary(self) -> Self::Output<BinaryType> {
2936
if let Vector::Binary(v) = self {
@@ -40,8 +47,8 @@ impl VarBinTypeDowncast for Vector {
4047
}
4148
}
4249

43-
impl VarBinTypeUpcast for Vector {
44-
type Input<T: VarBinType> = VarBinVector<T>;
50+
impl BinaryViewTypeUpcast for Vector {
51+
type Input<T: BinaryViewType> = BinaryViewVector<T>;
4552

4653
fn from_binary(input: Self::Input<BinaryType>) -> Self {
4754
Vector::Binary(input)
@@ -52,8 +59,8 @@ impl VarBinTypeUpcast for Vector {
5259
}
5360
}
5461

55-
impl VarBinTypeDowncast for VectorMut {
56-
type Output<T: VarBinType> = VarBinVectorMut<T>;
62+
impl BinaryViewDowncast for VectorMut {
63+
type Output<T: BinaryViewType> = BinaryViewVectorMut<T>;
5764

5865
fn into_binary(self) -> Self::Output<BinaryType> {
5966
if let VectorMut::Binary(v) = self {
@@ -70,8 +77,8 @@ impl VarBinTypeDowncast for VectorMut {
7077
}
7178
}
7279

73-
impl VarBinTypeUpcast for VectorMut {
74-
type Input<T: VarBinType> = VarBinVectorMut<T>;
80+
impl BinaryViewTypeUpcast for VectorMut {
81+
type Input<T: BinaryViewType> = BinaryViewVectorMut<T>;
7582

7683
fn from_binary(input: Self::Input<BinaryType>) -> Self {
7784
VectorMut::Binary(input)
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! Variable-length binary types and related traits.
5+
6+
use std::fmt::Debug;
7+
8+
use crate::{BinaryViewVector, BinaryViewVectorMut, Vector, VectorMut};
9+
10+
impl<T: BinaryViewType> From<BinaryViewVector<T>> for Vector {
11+
fn from(value: BinaryViewVector<T>) -> Self {
12+
T::upcast(value)
13+
}
14+
}
15+
16+
impl<T: BinaryViewType> From<BinaryViewVectorMut<T>> for VectorMut {
17+
fn from(value: BinaryViewVectorMut<T>) -> Self {
18+
T::upcast(value)
19+
}
20+
}
21+
22+
/// Trait to mark supported binary view types.
23+
pub trait BinaryViewType: Debug + Sized + private::Sealed {
24+
/// The slice type for this variable binary type.
25+
type Slice: ?Sized + AsRef<[u8]>;
26+
27+
/// Validate if a set of bytes conforms to the logical type constraints of the native `Slice`.
28+
fn validate(bytes: &[u8]) -> bool;
29+
30+
/// Returns the bytes as the native `Slice` type
31+
/// for this binary view vector.
32+
fn from_bytes(bytes: &[u8]) -> &Self::Slice;
33+
34+
/// Downcast the provided object to a type-specific instance.
35+
fn downcast<V: BinaryViewDowncast>(visitor: V) -> V::Output<Self>;
36+
37+
/// Upcast a type-specific instance to a generic instance.
38+
fn upcast<V: BinaryViewTypeUpcast>(input: V::Input<Self>) -> V;
39+
}
40+
41+
/// [`BinaryType`] for UTF-8 strings.
42+
#[derive(Clone, Debug)]
43+
pub struct StringType;
44+
impl BinaryViewType for StringType {
45+
type Slice = str;
46+
47+
#[inline(always)]
48+
fn validate(bytes: &[u8]) -> bool {
49+
std::str::from_utf8(bytes).is_ok()
50+
}
51+
52+
fn from_bytes(bytes: &[u8]) -> &Self::Slice {
53+
// SAFETY: vectors should be checked at the boundary for upholding the UTF8 variant,
54+
// or only be built from vectors that are known to satisfy the variant.
55+
unsafe { std::str::from_utf8_unchecked(bytes) }
56+
}
57+
58+
fn downcast<V: BinaryViewDowncast>(visitor: V) -> V::Output<Self> {
59+
visitor.into_string()
60+
}
61+
62+
fn upcast<V: BinaryViewTypeUpcast>(input: V::Input<Self>) -> V {
63+
V::from_string(input)
64+
}
65+
}
66+
67+
/// [`BinaryType`] for raw binary data.
68+
#[derive(Clone, Debug)]
69+
pub struct BinaryType;
70+
impl BinaryViewType for BinaryType {
71+
type Slice = [u8];
72+
73+
#[inline(always)]
74+
fn validate(_bytes: &[u8]) -> bool {
75+
true
76+
}
77+
78+
fn from_bytes(bytes: &[u8]) -> &Self::Slice {
79+
bytes
80+
}
81+
82+
fn downcast<V: BinaryViewDowncast>(visitor: V) -> V::Output<Self> {
83+
visitor.into_binary()
84+
}
85+
86+
fn upcast<V: BinaryViewTypeUpcast>(input: V::Input<Self>) -> V {
87+
V::from_binary(input)
88+
}
89+
}
90+
91+
/// Trait for downcasting generic variable binary types to specific types.
92+
pub trait BinaryViewDowncast {
93+
/// The output type after downcasting.
94+
type Output<T: BinaryViewType>;
95+
96+
/// Downcast to a binary type.
97+
fn into_binary(self) -> Self::Output<BinaryType>;
98+
/// Downcast to a string type.
99+
fn into_string(self) -> Self::Output<StringType>;
100+
}
101+
102+
/// Trait for upcasting specific variable binary types to generic types.
103+
pub trait BinaryViewTypeUpcast {
104+
/// The input type for upcasting.
105+
type Input<T: BinaryViewType>;
106+
107+
/// Upcast from a binary type.
108+
fn from_binary(input: Self::Input<BinaryType>) -> Self;
109+
/// Upcast from a string type.
110+
fn from_string(input: Self::Input<StringType>) -> Self;
111+
}
112+
113+
/// Private module to seal the `BinaryViewType` trait.
114+
mod private {
115+
/// Sealed trait to prevent external implementations of [`VarBinType`].
116+
pub trait Sealed {}
117+
118+
impl Sealed for super::StringType {}
119+
impl Sealed for super::BinaryType {}
120+
}

0 commit comments

Comments
 (0)