Skip to content

Commit a9bde6b

Browse files
authored
More vector scalars (#5459)
Signed-off-by: Nicholas Gates <[email protected]>
1 parent 4ec0f68 commit a9bde6b

35 files changed

+557
-83
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vortex-scalar/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,10 @@ prost = { workspace = true }
2727
vortex-buffer = { workspace = true }
2828
vortex-dtype = { workspace = true, features = ["arrow"] }
2929
vortex-error = { workspace = true }
30+
vortex-mask = { workspace = true }
3031
vortex-proto = { workspace = true, features = ["scalar"] }
3132
vortex-utils = { workspace = true }
33+
vortex-vector = { workspace = true }
3234

3335
[dev-dependencies]
3436
rstest = { workspace = true }

vortex-scalar/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ mod struct_;
2828
#[cfg(test)]
2929
mod tests;
3030
mod utf8;
31+
mod vectors;
3132

3233
pub use binary::*;
3334
pub use bool::*;

vortex-scalar/src/vectors.rs

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! Conversion logic from this "legacy" scalar crate to Vortex Vector scalars.
5+
6+
use std::sync::Arc;
7+
8+
use vortex_buffer::Buffer;
9+
use vortex_dtype::{
10+
DType, DecimalType, PrecisionScale, match_each_decimal_value_type, match_each_native_ptype,
11+
};
12+
use vortex_error::VortexExpect;
13+
use vortex_mask::Mask;
14+
use vortex_vector::binaryview::{BinaryScalar, StringScalar};
15+
use vortex_vector::bool::BoolScalar;
16+
use vortex_vector::decimal::DScalar;
17+
use vortex_vector::fixed_size_list::{FixedSizeListScalar, FixedSizeListVector};
18+
use vortex_vector::listview::{ListViewScalar, ListViewVector, ListViewVectorMut};
19+
use vortex_vector::null::NullScalar;
20+
use vortex_vector::primitive::{PScalar, PVector};
21+
use vortex_vector::struct_::{StructScalar, StructVector};
22+
use vortex_vector::{VectorMut, VectorMutOps};
23+
24+
use crate::Scalar;
25+
26+
impl Scalar {
27+
/// Convert the `vortex-scalar` [`Scalar`] into a `vortex-vector` [`vortex_vector::Scalar`].
28+
pub fn to_vector_scalar(&self) -> vortex_vector::Scalar {
29+
match self.dtype() {
30+
DType::Null => NullScalar.into(),
31+
DType::Bool(_) => BoolScalar::new(self.as_bool().value()).into(),
32+
DType::Primitive(ptype, _) => {
33+
match_each_native_ptype!(ptype, |T| {
34+
PScalar::new(self.as_primitive().typed_value::<T>()).into()
35+
})
36+
}
37+
DType::Decimal(dec_dtype, _) => {
38+
let dscalar = self.as_decimal();
39+
let dec_type = DecimalType::smallest_decimal_value_type(dec_dtype);
40+
match_each_decimal_value_type!(dec_type, |D| {
41+
let ps = PrecisionScale::<D>::new(dec_dtype.precision(), dec_dtype.scale());
42+
DScalar::maybe_new(
43+
ps,
44+
dscalar
45+
.decimal_value()
46+
.map(|d| d.cast::<D>().vortex_expect("Failed to cast decimal value")),
47+
)
48+
.vortex_expect("Failed to create decimal scalar")
49+
.into()
50+
})
51+
}
52+
DType::Utf8(_) => StringScalar::new(self.as_utf8().value()).into(),
53+
DType::Binary(_) => BinaryScalar::new(self.as_binary().value()).into(),
54+
DType::List(elems_dtype, _) => {
55+
let lscalar = self.as_list();
56+
match lscalar.elements() {
57+
None => {
58+
let mut list_view = ListViewVectorMut::with_capacity(elems_dtype, 1);
59+
list_view.append_nulls(1);
60+
ListViewScalar::new(list_view.freeze()).into()
61+
}
62+
Some(elements) => {
63+
// If the list elements are non-null, we convert each one accordingly
64+
// and append it to the new list view.
65+
let mut new_elements =
66+
VectorMut::with_capacity(elems_dtype, elements.len());
67+
for element in &elements {
68+
let element_scalar = element.to_vector_scalar();
69+
new_elements.append_scalars(&element_scalar, 1);
70+
}
71+
72+
let offsets =
73+
PVector::<u64>::new(Buffer::from_iter([0]), Mask::new_true(1));
74+
let sizes = PVector::<u64>::new(
75+
Buffer::from_iter([elements.len() as u64]),
76+
Mask::new_true(1),
77+
);
78+
79+
// Create the length-1 vector holding the list scalar.
80+
let list_view_vector = ListViewVector::new(
81+
Arc::new(new_elements.freeze()),
82+
offsets.into(),
83+
sizes.into(),
84+
Mask::new_true(1),
85+
);
86+
87+
ListViewScalar::new(list_view_vector).into()
88+
}
89+
}
90+
}
91+
DType::FixedSizeList(elems_dtype, size, _) => {
92+
let lscalar = self.as_list();
93+
match lscalar.elements() {
94+
None => {
95+
let mut elements = VectorMut::with_capacity(elems_dtype, *size as usize);
96+
elements.append_zeros(*size as usize);
97+
98+
FixedSizeListScalar::new(FixedSizeListVector::new(
99+
Arc::new(elements.freeze()),
100+
*size,
101+
Mask::new_false(1),
102+
))
103+
.into()
104+
}
105+
Some(element_scalars) => {
106+
let mut elements = VectorMut::with_capacity(elems_dtype, *size as usize);
107+
for element_scalar in &element_scalars {
108+
elements.append_scalars(&element_scalar.to_vector_scalar(), 1);
109+
}
110+
FixedSizeListScalar::new(FixedSizeListVector::new(
111+
Arc::new(elements.freeze()),
112+
*size,
113+
Mask::new_true(1),
114+
))
115+
.into()
116+
}
117+
}
118+
}
119+
DType::Struct(fields, _) => {
120+
let scalar = self.as_struct();
121+
122+
match scalar.fields() {
123+
None => {
124+
// Null struct scalar, we still need a length-1 vector for each field.
125+
let fields = fields
126+
.fields()
127+
.map(|dtype| {
128+
let mut field_vec = VectorMut::with_capacity(&dtype, 1);
129+
field_vec.append_zeros(1);
130+
field_vec.freeze()
131+
})
132+
.collect();
133+
StructScalar::new(StructVector::new(Arc::new(fields), Mask::new_false(1)))
134+
.into()
135+
}
136+
Some(field_scalars) => {
137+
let fields = field_scalars
138+
.map(|scalar| {
139+
let mut field_vec = VectorMut::with_capacity(scalar.dtype(), 1);
140+
field_vec.append_scalars(&scalar.to_vector_scalar(), 1);
141+
field_vec.freeze()
142+
})
143+
.collect();
144+
StructScalar::new(StructVector::new(Arc::new(fields), Mask::new_false(1)))
145+
.into()
146+
}
147+
}
148+
}
149+
DType::Extension(_) => self.as_extension().storage().to_vector_scalar(),
150+
}
151+
}
152+
}

vortex-vector/src/binaryview/scalar.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@ use crate::{Scalar, ScalarOps, VectorMutOps};
1010
#[derive(Debug)]
1111
pub struct BinaryViewScalar<T: BinaryViewType>(Option<T::Scalar>);
1212

13-
impl<T: BinaryViewType> From<Option<T::Scalar>> for BinaryViewScalar<T> {
14-
fn from(value: Option<T::Scalar>) -> Self {
13+
impl<T: BinaryViewType> BinaryViewScalar<T> {
14+
/// Creates a new binary view scalar with the given value.
15+
pub fn new(value: Option<T::Scalar>) -> Self {
1516
Self(value)
1617
}
1718
}

vortex-vector/src/binaryview/vector.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@ use vortex_buffer::{Alignment, Buffer, ByteBuffer};
1111
use vortex_error::{VortexExpect, VortexResult, vortex_ensure};
1212
use vortex_mask::Mask;
1313

14+
use crate::VectorOps;
1415
use crate::binaryview::vector_mut::BinaryViewVectorMut;
1516
use crate::binaryview::view::{BinaryView, validate_views};
1617
use crate::binaryview::{BinaryViewScalar, BinaryViewType};
17-
use crate::{Scalar, VectorOps};
1818

1919
/// A variable-length binary vector.
2020
///
@@ -193,6 +193,7 @@ impl<T: BinaryViewType> BinaryViewVector<T> {
193193

194194
impl<T: BinaryViewType> VectorOps for BinaryViewVector<T> {
195195
type Mutable = BinaryViewVectorMut<T>;
196+
type Scalar = BinaryViewScalar<T>;
196197

197198
fn len(&self) -> usize {
198199
self.views.len()
@@ -202,9 +203,9 @@ impl<T: BinaryViewType> VectorOps for BinaryViewVector<T> {
202203
&self.validity
203204
}
204205

205-
fn scalar_at(&self, index: usize) -> Scalar {
206+
fn scalar_at(&self, index: usize) -> BinaryViewScalar<T> {
206207
assert!(index < self.len());
207-
BinaryViewScalar::<T>::from(self.get(index)).into()
208+
BinaryViewScalar::<T>::new(self.get(index))
208209
}
209210

210211
fn slice(&self, _range: impl RangeBounds<usize> + Clone + Debug) -> Self {

vortex-vector/src/binaryview/vector_mut.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@ use vortex_buffer::{BufferMut, ByteBuffer, ByteBufferMut};
99
use vortex_error::{VortexExpect, VortexResult, vortex_ensure};
1010
use vortex_mask::MaskMut;
1111

12-
use crate::binaryview::BinaryViewType;
1312
use crate::binaryview::vector::BinaryViewVector;
1413
use crate::binaryview::view::{BinaryView, validate_views};
14+
use crate::binaryview::{BinaryViewScalar, BinaryViewType};
1515
use crate::{VectorMutOps, VectorOps};
1616

1717
// Default capacity for new string data buffers of 2MiB.
@@ -264,6 +264,20 @@ impl<T: BinaryViewType> VectorMutOps for BinaryViewVectorMut<T> {
264264
self.validity.append_n(false, n);
265265
}
266266

267+
fn append_zeros(&mut self, n: usize) {
268+
self.views.push_n(BinaryView::empty_view(), n);
269+
self.validity.append_n(true, n);
270+
}
271+
272+
fn append_scalars(&mut self, scalar: &BinaryViewScalar<T>, n: usize) {
273+
match scalar.value() {
274+
None => self.append_nulls(n),
275+
Some(v) => {
276+
self.append_owned_values(v.clone(), n);
277+
}
278+
}
279+
}
280+
267281
fn freeze(mut self) -> BinaryViewVector<T> {
268282
// Freeze all components, close any in-progress views
269283
self.flush_open_buffer();

vortex-vector/src/bool/scalar.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,16 @@ use crate::{Scalar, ScalarOps, VectorMut, VectorMutOps};
88
#[derive(Debug)]
99
pub struct BoolScalar(Option<bool>);
1010

11-
impl From<Option<bool>> for BoolScalar {
12-
fn from(value: Option<bool>) -> Self {
11+
impl BoolScalar {
12+
/// Creates a new bool scalar with the given value.
13+
pub fn new(value: Option<bool>) -> Self {
1314
Self(value)
1415
}
16+
17+
/// Returns the value of the bool scalar, or `None` if the scalar is null.
18+
pub fn value(&self) -> Option<bool> {
19+
self.0
20+
}
1521
}
1622

1723
impl ScalarOps for BoolScalar {

vortex-vector/src/bool/vector.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ use vortex_buffer::BitBuffer;
1010
use vortex_error::{VortexExpect, VortexResult, vortex_ensure};
1111
use vortex_mask::Mask;
1212

13-
use crate::bool::BoolVectorMut;
14-
use crate::{Scalar, VectorOps};
13+
use crate::VectorOps;
14+
use crate::bool::{BoolScalar, BoolVectorMut};
1515

1616
/// An immutable vector of boolean values.
1717
///
@@ -74,6 +74,7 @@ impl BoolVector {
7474

7575
impl VectorOps for BoolVector {
7676
type Mutable = BoolVectorMut;
77+
type Scalar = BoolScalar;
7778

7879
fn len(&self) -> usize {
7980
debug_assert!(self.validity.len() == self.bits.len());
@@ -84,13 +85,13 @@ impl VectorOps for BoolVector {
8485
&self.validity
8586
}
8687

87-
fn scalar_at(&self, index: usize) -> Scalar {
88+
fn scalar_at(&self, index: usize) -> BoolScalar {
8889
assert!(index < self.len());
8990

9091
let is_valid = self.validity.value(index);
9192
let value = is_valid.then(|| self.bits.value(index));
9293

93-
Scalar::Bool(value.into())
94+
BoolScalar::new(value)
9495
}
9596

9697
fn slice(&self, range: impl RangeBounds<usize> + Clone + Debug) -> Self {

vortex-vector/src/bool/vector_mut.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use vortex_buffer::BitBufferMut;
77
use vortex_error::{VortexExpect, VortexResult, vortex_ensure};
88
use vortex_mask::MaskMut;
99

10-
use crate::bool::BoolVector;
10+
use crate::bool::{BoolScalar, BoolVector};
1111
use crate::{VectorMutOps, VectorOps};
1212

1313
/// A mutable vector of boolean values.
@@ -147,6 +147,18 @@ impl VectorMutOps for BoolVectorMut {
147147
self.validity.append_n(false, n);
148148
}
149149

150+
fn append_zeros(&mut self, n: usize) {
151+
self.bits.append_n(false, n);
152+
self.validity.append_n(true, n);
153+
}
154+
155+
fn append_scalars(&mut self, scalar: &BoolScalar, n: usize) {
156+
match scalar.value() {
157+
None => self.append_nulls(n),
158+
Some(value) => self.append_values(value, n),
159+
}
160+
}
161+
150162
fn freeze(self) -> BoolVector {
151163
BoolVector {
152164
bits: self.bits.freeze(),

0 commit comments

Comments
 (0)