Skip to content

Commit 3e94087

Browse files
authored
Chore: Remove nullability from Vectors (#5029)
Tracking Issue: #5028 Removes explicit nullability in Vectors, meaning our physical vectors now behave even more like Arrow. --------- Signed-off-by: Connor Tsui <[email protected]>
1 parent 6317892 commit 3e94087

File tree

11 files changed

+116
-268
lines changed

11 files changed

+116
-268
lines changed

vortex-vector/src/bool/vector.rs

Lines changed: 16 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4,42 +4,33 @@
44
//! Definition and implementation of [`BoolVector`].
55
66
use vortex_buffer::BitBuffer;
7-
use vortex_dtype::Nullability;
87
use vortex_mask::Mask;
98

109
use super::BoolVectorMut;
1110
use crate::VectorOps;
1211

1312
/// An immutable vector of boolean values.
1413
///
15-
/// Internally, the boolean values are stored as the bits of a [`BitBuffer`] plus an optional
16-
/// [`Mask`] for null booleans (where `true` represents a _valid_ boolean and `false` represents a
17-
/// `null` boolean).
18-
///
1914
/// The mutable equivalent of this type is [`BoolVectorMut`].
2015
#[derive(Debug, Clone)]
2116
pub struct BoolVector {
2217
pub(super) bits: BitBuffer,
23-
pub(super) validity: Option<Mask>,
18+
pub(super) validity: Mask,
2419
}
2520

2621
impl VectorOps for BoolVector {
2722
type Mutable = BoolVectorMut;
2823

29-
fn nullability(&self) -> Nullability {
30-
Nullability::from(self.validity.is_some())
31-
}
32-
3324
fn len(&self) -> usize {
34-
debug_assert!(
35-
self.validity
36-
.as_ref()
37-
.is_none_or(|mask| mask.len() == self.bits.len())
38-
);
25+
debug_assert!(self.validity.len() == self.bits.len());
3926

4027
self.bits.len()
4128
}
4229

30+
fn validity(&self) -> &Mask {
31+
&self.validity
32+
}
33+
4334
fn try_into_mut(self) -> Result<Self::Mutable, Self>
4435
where
4536
Self: Sized,
@@ -54,19 +45,15 @@ impl VectorOps for BoolVector {
5445
}
5546
};
5647

57-
let validity = match self.validity {
58-
Some(v) => match v.try_into_mut() {
59-
Ok(v) => Some(v),
60-
Err(v) => {
61-
return Err(BoolVector {
62-
bits: bits.freeze(),
63-
validity: Some(v),
64-
});
65-
}
66-
},
67-
None => None,
68-
};
69-
70-
Ok(BoolVectorMut { bits, validity })
48+
match self.validity.try_into_mut() {
49+
Ok(validity_mut) => Ok(BoolVectorMut {
50+
bits,
51+
validity: validity_mut,
52+
}),
53+
Err(validity) => Err(BoolVector {
54+
bits: bits.freeze(),
55+
validity,
56+
}),
57+
}
7158
}
7259
}

vortex-vector/src/bool/vector_mut.rs

Lines changed: 11 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -4,53 +4,35 @@
44
//! Definition and implementation of [`BoolVectorMut`].
55
66
use vortex_buffer::BitBufferMut;
7-
use vortex_dtype::Nullability;
8-
use vortex_error::vortex_panic;
97
use vortex_mask::MaskMut;
108

119
use super::BoolVector;
1210
use crate::{VectorMutOps, VectorOps};
1311

1412
/// A mutable vector of boolean values.
1513
///
16-
/// Internally, the boolean values are stored as the bits of a [`BitBufferMut`] plus an optional
17-
/// [`MaskMut`] for null booleans (where `true` represents a _valid_ boolean and `false` represents
18-
/// a `null` boolean).
19-
///
2014
/// The immutable equivalent of this type is [`BoolVector`].
2115
#[derive(Debug, Clone)]
2216
pub struct BoolVectorMut {
2317
pub(super) bits: BitBufferMut,
24-
pub(super) validity: Option<MaskMut>,
18+
pub(super) validity: MaskMut,
2519
}
2620

2721
impl BoolVectorMut {
28-
/// Creates a new mutable boolean vector with the given `capacity` and `nullability`.
29-
pub fn with_capacity(capacity: usize, nullability: Nullability) -> Self {
30-
let validity = nullability
31-
.is_nullable()
32-
.then(|| MaskMut::with_capacity(capacity));
33-
22+
/// Creates a new mutable boolean vector with the given `capacity`.
23+
pub fn with_capacity(capacity: usize) -> Self {
3424
Self {
3525
bits: BitBufferMut::with_capacity(capacity),
36-
validity,
26+
validity: MaskMut::with_capacity(capacity),
3727
}
3828
}
3929
}
4030

4131
impl VectorMutOps for BoolVectorMut {
4232
type Immutable = BoolVector;
4333

44-
fn nullability(&self) -> Nullability {
45-
Nullability::from(self.validity.is_some())
46-
}
47-
4834
fn len(&self) -> usize {
49-
debug_assert!(
50-
self.validity
51-
.as_ref()
52-
.is_none_or(|mask| mask.len() == self.bits.len())
53-
);
35+
debug_assert!(self.validity.len() == self.bits.len());
5436

5537
self.bits.len()
5638
}
@@ -61,74 +43,35 @@ impl VectorMutOps for BoolVectorMut {
6143

6244
fn reserve(&mut self, additional: usize) {
6345
self.bits.reserve(additional);
64-
65-
if let Some(v) = self.validity.as_mut() {
66-
v.reserve(additional);
67-
}
46+
self.validity.reserve(additional);
6847
}
6948

7049
fn extend_from_vector(&mut self, other: &BoolVector) {
71-
assert_eq!(
72-
self.nullability(),
73-
other.nullability(),
74-
"tried to extend a vector with nullability {} with another vector with nullability {}",
75-
self.nullability(),
76-
other.nullability(),
77-
);
78-
7950
self.bits.append_buffer(&other.bits);
80-
81-
match (&mut self.validity, &other.validity) {
82-
(Some(self_v), Some(other_v)) => self_v.append_mask(other_v),
83-
(Some(self_v), None) => self_v.append_n(true, other.bits.len()),
84-
(None, Some(other_v)) => {
85-
let mut new_validity = MaskMut::new_true(self.bits.len() - other.bits.len());
86-
new_validity.append_mask(other_v);
87-
self.validity = Some(new_validity);
88-
}
89-
(None, None) => {}
90-
}
51+
self.validity.append_mask(other.validity());
9152
}
9253

9354
fn append_nulls(&mut self, n: usize) {
94-
let Some(mask) = &mut self.validity else {
95-
vortex_panic!("tried to append nulls to a non-nullable vector")
96-
};
97-
98-
mask.append_n(false, n);
99-
10055
self.bits.append_n(false, n);
56+
self.validity.append_n(false, n);
10157
}
10258

10359
fn freeze(self) -> Self::Immutable {
10460
BoolVector {
10561
bits: self.bits.freeze(),
106-
validity: self.validity.map(|v| v.freeze()),
62+
validity: self.validity.freeze(),
10763
}
10864
}
10965

11066
fn split_off(&mut self, at: usize) -> Self {
11167
BoolVectorMut {
11268
bits: self.bits.split_off(at),
113-
validity: self.validity.as_mut().map(|v| v.split_off(at)),
69+
validity: self.validity.split_off(at),
11470
}
11571
}
11672

11773
fn unsplit(&mut self, other: Self) {
118-
// TODO(connor): We must check `other`'s nullability in relation to `self`.
119-
120-
let other_len = other.bits.len();
12174
self.bits.unsplit(other.bits);
122-
123-
match (&mut self.validity, other.validity) {
124-
(Some(self_v), Some(other_v)) => self_v.unsplit(other_v),
125-
(Some(self_v), None) => self_v.append_n(true, other_len),
126-
(None, Some(other_v)) => {
127-
let mut new_validity = MaskMut::new_true(self.bits.len() - other_len);
128-
new_validity.unsplit(other_v);
129-
self.validity = Some(new_validity);
130-
}
131-
(None, None) => {}
132-
}
75+
self.validity.unsplit(other.validity);
13376
}
13477
}

vortex-vector/src/null/vector.rs

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
//! Definition and implementation of [`NullVector`].
55
6-
use vortex_dtype::Nullability;
6+
use vortex_mask::Mask;
77

88
use crate::{NullVectorMut, VectorOps};
99

@@ -13,29 +13,33 @@ use crate::{NullVectorMut, VectorOps};
1313
/// single `length` counter.
1414
///
1515
/// The mutable equivalent of this type is [`NullVectorMut`].
16-
#[derive(Debug, Clone, Copy)]
16+
#[derive(Debug, Clone)]
1717
pub struct NullVector {
1818
pub(super) len: usize,
19+
pub(super) validity: Mask,
1920
}
2021

2122
impl NullVector {
2223
/// Creates a new immutable vector of nulls with the given length.
2324
pub fn new(len: usize) -> Self {
24-
Self { len }
25+
Self {
26+
len,
27+
validity: Mask::AllFalse(len),
28+
}
2529
}
2630
}
2731

2832
impl VectorOps for NullVector {
2933
type Mutable = NullVectorMut;
3034

31-
fn nullability(&self) -> Nullability {
32-
Nullability::Nullable
33-
}
34-
3535
fn len(&self) -> usize {
3636
self.len
3737
}
3838

39+
fn validity(&self) -> &Mask {
40+
&self.validity
41+
}
42+
3943
fn try_into_mut(self) -> Result<Self::Mutable, Self>
4044
where
4145
Self: Sized,

vortex-vector/src/null/vector_mut.rs

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33

44
//! Definition and implementation of [`NullVectorMut`].
55
6-
use vortex_dtype::Nullability;
7-
86
use super::NullVector;
97
use crate::VectorMutOps;
108

@@ -29,10 +27,6 @@ impl NullVectorMut {
2927
impl VectorMutOps for NullVectorMut {
3028
type Immutable = NullVector;
3129

32-
fn nullability(&self) -> Nullability {
33-
Nullability::Nullable
34-
}
35-
3630
fn len(&self) -> usize {
3731
self.len
3832
}

vortex-vector/src/ops.rs

Lines changed: 11 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
//! Definition and implementation of [`VectorOps`] and [`VectorMutOps`] for [`Vector`] and
55
//! [`VectorMut`], respectively.
66
7-
use vortex_dtype::Nullability;
7+
use vortex_mask::Mask;
88

99
use crate::{Vector, VectorMut, private};
1010

@@ -13,16 +13,6 @@ pub trait VectorOps: private::Sealed + Into<Vector> {
1313
/// The mutable equivalent of this immutable vector.
1414
type Mutable: VectorMutOps<Immutable = Self>;
1515

16-
/// Returns the [`Nullability`] of the vector.
17-
fn nullability(&self) -> Nullability;
18-
19-
/// Returns `true` if the nullability of this vector is [`Nullable`].
20-
///
21-
/// [`Nullable`]: Nullability::Nullable
22-
fn is_nullable(&self) -> bool {
23-
self.nullability().is_nullable()
24-
}
25-
2616
/// Returns the number of elements in the vector, also referred to as its "length".
2717
fn len(&self) -> usize;
2818

@@ -31,6 +21,14 @@ pub trait VectorOps: private::Sealed + Into<Vector> {
3121
self.len() == 0
3222
}
3323

24+
/// Returns the validity mask of the vector, where `true` represents a _valid_ element and
25+
/// `false` represents a `null` element.
26+
///
27+
/// Note that vectors are **always** considered nullable. "Non-nullable" data will simply have a
28+
/// [`Mask`] of [`AllTrue(len)`](Mask::AllTrue). It is on the caller to ensure that they do not
29+
/// add nullable data to a vector they want to keep as non-nullable.
30+
fn validity(&self) -> &Mask;
31+
3432
/// Tries to convert `self` into a mutable vector (implementing [`VectorMutOps`]).
3533
///
3634
/// This method will only succeed if `self` is the only unique strong reference (it effectively
@@ -50,16 +48,6 @@ pub trait VectorMutOps: private::Sealed + Into<VectorMut> {
5048
/// The immutable equivalent of this mutable vector.
5149
type Immutable: VectorOps<Mutable = Self>;
5250

53-
/// Returns the [`Nullability`] of the vector.
54-
fn nullability(&self) -> Nullability;
55-
56-
/// Returns `true` if the nullability of this vector is [`Nullable`].
57-
///
58-
/// [`Nullable`]: Nullability::Nullable
59-
fn is_nullable(&self) -> bool {
60-
self.nullability().is_nullable()
61-
}
62-
6351
/// Returns the number of elements in the vector, also referred to as its "length".
6452
fn len(&self) -> usize;
6553

@@ -82,20 +70,12 @@ pub trait VectorMutOps: private::Sealed + Into<VectorMut> {
8270
fn reserve(&mut self, additional: usize);
8371

8472
/// Extends the vector by appending elements from another vector.
85-
///
86-
/// # Panics
87-
///
88-
/// Panics if `other` does not have the same nullability as `self`.
8973
fn extend_from_vector(&mut self, other: &Self::Immutable);
9074

91-
/// Appends `n` null elements to the vector (if it is nullable).
75+
/// Appends `n` null elements to the vector.
9276
///
9377
/// Implementors should ensure that they correctly append "null" or garbage values to their
94-
/// elements in addition to adding nulls to their validity mask.
95-
///
96-
/// # Panics
97-
///
98-
/// If `self` is a non-nullable vector, implementors should ensure that this function panics.
78+
/// elements in addition to adding nulls to their validity mask.s.
9979
fn append_nulls(&mut self, n: usize);
10080

10181
/// Converts `self` into an immutable vector.
@@ -115,7 +95,6 @@ pub trait VectorMutOps: private::Sealed + Into<VectorMut> {
11595
/// `at > capacity`).
11696
fn split_off(&mut self, at: usize) -> Self;
11797

118-
// TODO(connor): Should this panic if other has a different nullability?
11998
/// Absorbs a mutable vector that was previously split off.
12099
///
121100
/// If the two vectors were previously contiguous and not mutated in a way that causes

0 commit comments

Comments
 (0)