Skip to content

Commit feb0905

Browse files
committed
VarBinVector
Signed-off-by: Nicholas Gates <[email protected]>
1 parent bb3989f commit feb0905

File tree

11 files changed

+656
-4
lines changed

11 files changed

+656
-4
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vortex-vector/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,5 @@ vortex-buffer = { workspace = true }
2424
vortex-dtype = { workspace = true }
2525
vortex-error = { workspace = true }
2626
vortex-mask = { workspace = true }
27+
28+
static_assertions = { workspace = true }

vortex-vector/src/lib.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,16 @@ mod bool;
1515
mod null;
1616
mod primitive;
1717
mod struct_;
18+
mod varbin;
1819

1920
pub use bool::{BoolVector, BoolVectorMut};
2021
pub use null::{NullVector, NullVectorMut};
2122
pub use primitive::{PVector, PVectorMut, PrimitiveVector, PrimitiveVectorMut};
2223
pub use struct_::{StructVector, StructVectorMut};
24+
pub use varbin::{
25+
BinaryVector, BinaryVectorMut, StringVector, StringVectorMut, VarBinType, VarBinVector,
26+
VarBinVectorMut,
27+
};
2328

2429
mod ops;
2530
mod vector;

vortex-vector/src/macros.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,14 @@ macro_rules! match_each_vector {
4949
let $vec = v;
5050
$body
5151
}
52+
$crate::Vector::String(v) => {
53+
let $vec = v;
54+
$body
55+
}
56+
$crate::Vector::Binary(v) => {
57+
let $vec = v;
58+
$body
59+
}
5260
$crate::Vector::Struct(v) => {
5361
let $vec = v;
5462
$body
@@ -104,6 +112,14 @@ macro_rules! match_each_vector_mut {
104112
let $vec = v;
105113
$body
106114
}
115+
$crate::VectorMut::String(v) => {
116+
let $vec = v;
117+
$body
118+
}
119+
$crate::VectorMut::Binary(v) => {
120+
let $vec = v;
121+
$body
122+
}
107123
$crate::VectorMut::Struct(v) => {
108124
let $vec = v;
109125
$body

vortex-vector/src/private.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,5 +29,8 @@ impl Sealed for PrimitiveVectorMut {}
2929
impl<T: NativePType> Sealed for PVector<T> {}
3030
impl<T: NativePType> Sealed for PVectorMut<T> {}
3131

32+
impl<T: VarBinType> Sealed for VarBinVector<T> {}
33+
impl<T: VarBinType> Sealed for VarBinVectorMut<T> {}
34+
3235
impl Sealed for StructVector {}
3336
impl Sealed for StructVectorMut {}

vortex-vector/src/varbin/mod.rs

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use crate::{Vector, VectorMut};
5+
use std::fmt::Debug;
6+
7+
mod vector;
8+
pub use vector::VarBinVector;
9+
10+
mod vector_mut;
11+
pub use vector_mut::VarBinVectorMut;
12+
use vortex_error::vortex_panic;
13+
14+
mod view;
15+
16+
/// Type alias for non-utf8 variable-length binary vectors.
17+
pub type BinaryVector = VarBinVector<BinaryType>;
18+
/// Type alias for mutable non-utf8 variable-length binary vectors.
19+
pub type BinaryVectorMut = VarBinVectorMut<BinaryType>;
20+
/// Type alias for UTF-8 variable-length string vectors.
21+
pub type StringVector = VarBinVector<StringType>;
22+
/// Type alias for mutable UTF-8 variable-length string vectors.
23+
pub type StringVectorMut = VarBinVectorMut<StringType>;
24+
25+
impl<T: VarBinType> From<VarBinVector<T>> for Vector {
26+
fn from(value: VarBinVector<T>) -> Self {
27+
T::upcast(value)
28+
}
29+
}
30+
31+
impl<T: VarBinType> From<VarBinVectorMut<T>> for VectorMut {
32+
fn from(value: VarBinVectorMut<T>) -> Self {
33+
T::upcast(value)
34+
}
35+
}
36+
37+
/// Trait to mark supported binary view types.
38+
pub trait VarBinType: Debug + Sized + private::Sealed {
39+
/// The slice type for this variable binary type.
40+
type Slice: ?Sized + AsRef<[u8]>;
41+
42+
/// Downcast the provided object to a type-specific instance.
43+
fn downcast<V: VarBinTypeDowncast>(visitor: V) -> V::Output<Self>;
44+
45+
/// Upcast a type-specific instance to a generic instance.
46+
fn upcast<V: VarBinTypeUpcast>(input: V::Input<Self>) -> V;
47+
}
48+
49+
/// [`BinaryType`] for UTF-8 strings.
50+
#[derive(Clone, Debug)]
51+
pub struct StringType;
52+
impl VarBinType for StringType {
53+
type Slice = str;
54+
55+
fn downcast<V: VarBinTypeDowncast>(visitor: V) -> V::Output<Self> {
56+
visitor.into_string()
57+
}
58+
59+
fn upcast<V: VarBinTypeUpcast>(input: V::Input<Self>) -> V {
60+
V::from_string(input)
61+
}
62+
}
63+
64+
/// [`BinaryType`] for raw binary data.
65+
#[derive(Clone, Debug)]
66+
pub struct BinaryType;
67+
impl VarBinType for BinaryType {
68+
type Slice = [u8];
69+
70+
fn downcast<V: VarBinTypeDowncast>(visitor: V) -> V::Output<Self> {
71+
visitor.into_binary()
72+
}
73+
74+
fn upcast<V: VarBinTypeUpcast>(input: V::Input<Self>) -> V {
75+
V::from_binary(input)
76+
}
77+
}
78+
79+
pub trait VarBinTypeDowncast {
80+
type Output<T: VarBinType>;
81+
82+
fn into_binary(self) -> Self::Output<BinaryType>;
83+
fn into_string(self) -> Self::Output<StringType>;
84+
}
85+
86+
pub trait VarBinTypeUpcast {
87+
type Input<T: VarBinType>;
88+
89+
fn from_binary(input: Self::Input<BinaryType>) -> Self;
90+
fn from_string(input: Self::Input<StringType>) -> Self;
91+
}
92+
93+
impl VarBinTypeDowncast for Vector {
94+
type Output<T: VarBinType> = VarBinVector<T>;
95+
96+
fn into_binary(self) -> Self::Output<BinaryType> {
97+
if let Vector::Binary(v) = self {
98+
return v;
99+
}
100+
vortex_panic!("Expected BinaryVector, got {self:?}");
101+
}
102+
103+
fn into_string(self) -> Self::Output<StringType> {
104+
if let Vector::String(v) = self {
105+
return v;
106+
}
107+
vortex_panic!("Expected StringVector, got {self:?}");
108+
}
109+
}
110+
111+
impl VarBinTypeUpcast for Vector {
112+
type Input<T: VarBinType> = VarBinVector<T>;
113+
114+
fn from_binary(input: Self::Input<BinaryType>) -> Self {
115+
Vector::Binary(input)
116+
}
117+
118+
fn from_string(input: Self::Input<StringType>) -> Self {
119+
Vector::String(input)
120+
}
121+
}
122+
123+
impl VarBinTypeDowncast for VectorMut {
124+
type Output<T: VarBinType> = VarBinVectorMut<T>;
125+
126+
fn into_binary(self) -> Self::Output<BinaryType> {
127+
if let VectorMut::Binary(v) = self {
128+
return v;
129+
}
130+
vortex_panic!("Expected BinaryVector, got {self:?}");
131+
}
132+
133+
fn into_string(self) -> Self::Output<StringType> {
134+
if let VectorMut::String(v) = self {
135+
return v;
136+
}
137+
vortex_panic!("Expected StringVector, got {self:?}");
138+
}
139+
}
140+
141+
impl VarBinTypeUpcast for VectorMut {
142+
type Input<T: VarBinType> = VarBinVectorMut<T>;
143+
144+
fn from_binary(input: Self::Input<BinaryType>) -> Self {
145+
VectorMut::Binary(input)
146+
}
147+
148+
fn from_string(input: Self::Input<StringType>) -> Self {
149+
VectorMut::String(input)
150+
}
151+
}
152+
153+
mod private {
154+
pub trait Sealed {}
155+
impl Sealed for super::StringType {}
156+
impl Sealed for super::BinaryType {}
157+
}

vortex-vector/src/varbin/vector.rs

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use crate::varbin::vector_mut::VarBinVectorMut;
5+
use crate::varbin::view::BinaryView;
6+
use crate::varbin::VarBinType;
7+
use crate::VectorOps;
8+
use std::sync::Arc;
9+
use vortex_buffer::{Buffer, ByteBuffer};
10+
use vortex_mask::Mask;
11+
12+
/// A variable-length binary vector.
13+
#[derive(Debug, Clone)]
14+
pub struct VarBinVector<T: VarBinType> {
15+
views: Buffer<BinaryView>,
16+
validity: Mask,
17+
buffers: Arc<Box<[ByteBuffer]>>,
18+
_marker: std::marker::PhantomData<T>,
19+
}
20+
21+
impl<T: VarBinType> VarBinVector<T> {
22+
/// Creates a new [`VarBinVector`] from the provided components.
23+
///
24+
/// # Safety
25+
///
26+
/// This function is unsafe because it does not validate the consistency of the provided
27+
/// components.
28+
///
29+
/// The caller must ensure that:
30+
/// - The length of the `validity` mask matches the length of the `views` buffer.
31+
/// - The `views` buffer correctly references the data in the `buffers`.
32+
pub unsafe fn new_unchecked(
33+
views: Buffer<BinaryView>,
34+
validity: Mask,
35+
buffers: Arc<Box<[ByteBuffer]>>,
36+
) -> Self {
37+
Self {
38+
views,
39+
validity,
40+
buffers,
41+
_marker: std::marker::PhantomData,
42+
}
43+
}
44+
}
45+
46+
impl<T: VarBinType> VectorOps for VarBinVector<T> {
47+
type Mutable = VarBinVectorMut<T>;
48+
49+
fn len(&self) -> usize {
50+
self.views.len()
51+
}
52+
53+
fn validity(&self) -> &Mask {
54+
&self.validity
55+
}
56+
57+
fn try_into_mut(self) -> Result<Self::Mutable, Self>
58+
where
59+
Self: Sized,
60+
{
61+
todo!()
62+
}
63+
}
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use crate::varbin::vector::VarBinVector;
5+
use crate::varbin::view::BinaryView;
6+
use crate::varbin::VarBinType;
7+
use crate::VectorMutOps;
8+
use vortex_buffer::{BufferMut, ByteBuffer};
9+
use vortex_mask::MaskMut;
10+
11+
/// Mutable variable-length binary vector.
12+
#[derive(Clone, Debug)]
13+
pub struct VarBinVectorMut<T: VarBinType> {
14+
views: BufferMut<BinaryView>,
15+
validity: MaskMut,
16+
17+
buffers: Vec<ByteBuffer>,
18+
open_buffer: Option<ByteBuffer>,
19+
20+
_marker: std::marker::PhantomData<T>,
21+
}
22+
23+
impl<T: VarBinType> VarBinVectorMut<T> {
24+
pub(super) fn new(
25+
views: BufferMut<BinaryView>,
26+
validity: MaskMut,
27+
buffers: Vec<ByteBuffer>,
28+
) -> Self {
29+
Self {
30+
views,
31+
validity,
32+
buffers,
33+
open_buffer: None,
34+
_marker: std::marker::PhantomData,
35+
}
36+
}
37+
}
38+
39+
impl<T: VarBinType> VectorMutOps for VarBinVectorMut<T> {
40+
type Immutable = VarBinVector<T>;
41+
42+
fn len(&self) -> usize {
43+
self.views.len()
44+
}
45+
46+
fn capacity(&self) -> usize {
47+
self.views.capacity()
48+
}
49+
50+
fn reserve(&mut self, additional: usize) {
51+
self.views.reserve(additional);
52+
}
53+
54+
fn extend_from_vector(&mut self, other: &Self::Immutable) {
55+
todo!()
56+
}
57+
58+
fn append_nulls(&mut self, n: usize) {
59+
self.views.push_n(BinaryView::empty_view(), n);
60+
self.validity.append_n(false, n);
61+
}
62+
63+
fn freeze(self) -> Self::Immutable {
64+
todo!()
65+
}
66+
67+
fn split_off(&mut self, at: usize) -> Self {
68+
todo!()
69+
}
70+
71+
fn unsplit(&mut self, other: Self) {
72+
todo!()
73+
}
74+
}

0 commit comments

Comments
 (0)