Skip to content

Commit 54b7ee0

Browse files
authored
Fix BoolArray::new to take BitBuffer (#5690)
Signed-off-by: Nicholas Gates <[email protected]>
1 parent 4f802b6 commit 54b7ee0

File tree

4 files changed

+48
-85
lines changed

4 files changed

+48
-85
lines changed

vortex-array/src/arrays/bool/array.rs

Lines changed: 43 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
use arrow_array::BooleanArray;
55
use vortex_buffer::BitBuffer;
66
use vortex_buffer::BitBufferMut;
7-
use vortex_buffer::ByteBuffer;
87
use vortex_dtype::DType;
98
use vortex_error::VortexExpect;
109
use vortex_error::VortexResult;
@@ -49,12 +48,21 @@ use crate::validity::Validity;
4948
#[derive(Clone, Debug)]
5049
pub struct BoolArray {
5150
pub(super) dtype: DType,
52-
pub(super) buffer: BitBuffer,
51+
pub(super) bits: BitBuffer,
5352
pub(super) validity: Validity,
5453
pub(super) stats_set: ArrayStats,
5554
}
5655

5756
impl BoolArray {
57+
/// Constructs a new `BoolArray`.
58+
///
59+
/// # Panics
60+
///
61+
/// Panics if the validity length is not equal to the bit buffer length.
62+
pub fn new(bits: BitBuffer, validity: Validity) -> Self {
63+
Self::try_new(bits, validity).vortex_expect("Failed to create BoolArray")
64+
}
65+
5866
/// Constructs a new `BoolArray`.
5967
///
6068
/// See [`BoolArray::new_unchecked`] for more information.
@@ -63,80 +71,51 @@ impl BoolArray {
6371
///
6472
/// Returns an error if the provided components do not satisfy the invariants documented in
6573
/// [`BoolArray::new_unchecked`].
66-
pub fn try_new(
67-
buffer: ByteBuffer,
68-
offset: usize,
69-
len: usize,
70-
validity: Validity,
71-
) -> VortexResult<Self> {
72-
Self::validate(&buffer, offset, len, &validity)?;
73-
74-
// SAFETY: validate ensures all invariants are met.
75-
Ok(unsafe { Self::new_unchecked(buffer, offset, len, validity) })
74+
pub fn try_new(bits: BitBuffer, validity: Validity) -> VortexResult<Self> {
75+
let bits = bits.shrink_offset();
76+
Self::validate(&bits, &validity)?;
77+
Ok(Self {
78+
dtype: DType::Bool(validity.nullability()),
79+
bits,
80+
validity,
81+
stats_set: ArrayStats::default(),
82+
})
7683
}
7784

7885
/// Creates a new [`BoolArray`] without validation from these components:
7986
///
80-
/// * `buffer` is a raw [`ByteBuffer`] holding the packed bits.
81-
/// * `offset` is the number of bits in the start of the buffer that should be skipped when
82-
/// looking up the i-th value.
83-
/// * `len` is the length of the array, which should correspond to the number of bits.
84-
/// * `validity` holds the null values.
85-
///
8687
/// # Safety
8788
///
88-
/// The caller must ensure all of the following invariants are satisfied:
89-
///
90-
/// - `buffer` must contain at least `(offset + len).div_ceil(8)` bytes.
91-
/// - `offset` must be less than 8 (it represents the bit offset within the first byte).
92-
/// - If `validity` is `Validity::Array`, its length must exactly equal `len`.
93-
pub unsafe fn new_unchecked(
94-
buffer: ByteBuffer,
95-
offset: usize,
96-
len: usize,
97-
validity: Validity,
98-
) -> Self {
99-
#[cfg(debug_assertions)]
100-
Self::validate(&buffer, offset, len, &validity)
101-
.vortex_expect("[Debug Assertion]: Invalid `BoolArray` parameters");
102-
103-
let buffer = BitBuffer::new_with_offset(buffer, len, offset);
104-
let buffer = buffer.shrink_offset();
105-
Self {
106-
dtype: DType::Bool(validity.nullability()),
107-
buffer,
108-
validity,
109-
stats_set: ArrayStats::default(),
89+
/// The caller must ensure that the validity length is equal to the bit buffer length.
90+
pub unsafe fn new_unchecked(bits: BitBuffer, validity: Validity) -> Self {
91+
if cfg!(debug_assertions) {
92+
Self::new(bits, validity)
93+
} else {
94+
Self {
95+
dtype: DType::Bool(validity.nullability()),
96+
bits,
97+
validity,
98+
stats_set: ArrayStats::default(),
99+
}
110100
}
111101
}
112102

113103
/// Validates the components that would be used to create a [`BoolArray`].
114104
///
115105
/// This function checks all the invariants required by [`BoolArray::new_unchecked`].
116-
pub fn validate(
117-
buffer: &ByteBuffer,
118-
offset: usize,
119-
len: usize,
120-
validity: &Validity,
121-
) -> VortexResult<()> {
122-
vortex_ensure!(
123-
offset < 8,
124-
"offset must be less than whole byte, was {offset} bits"
125-
);
126-
127-
// Validate the buffer is large enough to hold all the bits
128-
let required_bytes = offset.saturating_add(len).div_ceil(8);
106+
pub fn validate(bits: &BitBuffer, validity: &Validity) -> VortexResult<()> {
129107
vortex_ensure!(
130-
buffer.len() >= required_bytes,
131-
"BoolArray with offset={offset} len={len} cannot be built from buffer of size {}",
132-
buffer.len()
108+
bits.offset() < 8,
109+
"BitBuffer offset must be <8, got {}",
110+
bits.offset()
133111
);
134112

135113
// Validate validity
136114
if let Some(validity_len) = validity.maybe_len() {
137115
vortex_ensure!(
138-
validity_len == len,
139-
"BoolArray of size {len} cannot be built with validity of size {validity_len}"
116+
validity_len == bits.len(),
117+
"BoolArray of size {} cannot be built with validity of size {validity_len}",
118+
bits.len()
140119
);
141120
}
142121

@@ -157,7 +136,7 @@ impl BoolArray {
157136
let buffer = buffer.shrink_offset();
158137
Self {
159138
dtype: DType::Bool(validity.nullability()),
160-
buffer,
139+
bits: buffer,
161140
validity,
162141
stats_set: ArrayStats::default(),
163142
}
@@ -179,16 +158,16 @@ impl BoolArray {
179158
/// Returns the underlying [`BitBuffer`] of the array.
180159
pub fn bit_buffer(&self) -> &BitBuffer {
181160
assert!(
182-
self.buffer.offset() < 8,
161+
self.bits.offset() < 8,
183162
"Offset must be <8, did we forget to call shrink_offset? Found {}",
184-
self.buffer.offset()
163+
self.bits.offset()
185164
);
186-
&self.buffer
165+
&self.bits
187166
}
188167

189168
/// Returns the underlying [`BitBuffer`] ofthe array
190169
pub fn into_bit_buffer(self) -> BitBuffer {
191-
self.buffer
170+
self.bits
192171
}
193172

194173
pub fn to_mask(&self) -> Mask {
@@ -247,10 +226,7 @@ impl FromIterator<Option<bool>> for BoolArray {
247226

248227
impl IntoArray for BitBuffer {
249228
fn into_array(self) -> ArrayRef {
250-
let (offset, len, buffer) = self.into_inner();
251-
BoolArray::try_new(buffer, offset, len, Validity::NonNullable)
252-
.vortex_expect("known correct")
253-
.into_array()
229+
BoolArray::new(self, Validity::NonNullable).into_array()
254230
}
255231
}
256232

vortex-array/src/arrays/bool/vtable/array.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ use crate::vtable::BaseArrayVTable;
1515

1616
impl BaseArrayVTable<BoolVTable> for BoolVTable {
1717
fn len(array: &BoolArray) -> usize {
18-
array.buffer.len()
18+
array.bits.len()
1919
}
2020

2121
fn dtype(array: &BoolArray) -> &DType {

vortex-array/src/arrays/bool/vtable/mod.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4+
use vortex_buffer::BitBuffer;
45
use vortex_buffer::BufferHandle;
56
use vortex_dtype::DType;
67
use vortex_error::VortexExpect;
@@ -104,7 +105,9 @@ impl VTable for BoolVTable {
104105
};
105106

106107
let buffer = buffers[0].clone().try_to_bytes()?;
107-
BoolArray::try_new(buffer, metadata.offset as usize, len, validity)
108+
let bits = BitBuffer::new_with_offset(buffer, len, metadata.offset as usize);
109+
110+
BoolArray::try_new(bits, validity)
108111
}
109112

110113
fn bind_kernel(array: &Self::Array, _ctx: &mut BindCtx) -> VortexResult<KernelRef> {

vortex-array/src/arrays/validation_tests.rs

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,22 +22,6 @@ mod tests {
2222
use crate::arrays::*;
2323
use crate::validity::Validity;
2424

25-
#[test]
26-
fn test_bool_array_validation_success() {
27-
// Valid case: buffer has enough bytes for the array.
28-
let buffer = ByteBuffer::from(vec![0xff, 0xff]);
29-
let result = BoolArray::try_new(buffer, 0, 10, Validity::NonNullable);
30-
assert!(result.is_ok());
31-
}
32-
33-
#[test]
34-
fn test_bool_array_validation_failure_buffer_too_small() {
35-
// Invalid case: buffer too small.
36-
let buffer = ByteBuffer::from(vec![0xff]); // Only 1 byte.
37-
let result = BoolArray::try_new(buffer, 0, 16, Validity::NonNullable); // Needs 2 bytes.
38-
assert!(result.is_err());
39-
}
40-
4125
#[test]
4226
fn test_chunked_array_validation_success() {
4327
// Valid case: all chunks have the same dtype.

0 commit comments

Comments
 (0)