Skip to content

Commit fc3f11b

Browse files
robert3005a10ygatesn
authored
Use BitBuffer instead of BooleanBuffer (#4940)
Replace usages of BooleanBuffer with our own BitBuffer. This lets us have our own backing Buffer for BitBuffer and implement additional optimisations for use in vortex. previously #2456 This pr requires resolving outstanding performance issues before merging 1. Make iterators cheaper to construct, likely can't be owned 2. Improve append_packed_range performance. Arrow has a function that is faster than bitvec crate --------- Signed-off-by: Robert Kruszewski <[email protected]> Signed-off-by: Nicholas Gates <[email protected]> Co-authored-by: Andrew Duffy <[email protected]> Co-authored-by: Nicholas Gates <[email protected]>
1 parent a9fade4 commit fc3f11b

File tree

163 files changed

+1441
-1413
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

163 files changed

+1441
-1413
lines changed

Cargo.lock

Lines changed: 0 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

encodings/alp/src/alp/compute/between.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ mod tests {
104104
let res = between_impl(arr, lower, upper, Nullability::Nullable, options)
105105
.unwrap()
106106
.to_bool()
107-
.boolean_buffer()
107+
.bit_buffer()
108108
.iter()
109109
.collect_vec();
110110
assert_eq!(res.len(), 1);

encodings/alp/src/alp/compute/compare.rs

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ mod tests {
148148
{
149149
alp_scalar_compare(alp, value, operator)
150150
.unwrap()
151-
.map(|a| a.to_bool().boolean_buffer().iter().collect())
151+
.map(|a| a.to_bool().bit_buffer().iter().collect())
152152
}
153153

154154
#[test]
@@ -166,7 +166,7 @@ mod tests {
166166
.unwrap()
167167
.to_bool();
168168

169-
for v in r.boolean_buffer().iter() {
169+
for v in r.bit_buffer().iter() {
170170
assert!(!v);
171171
}
172172

@@ -175,7 +175,7 @@ mod tests {
175175
.unwrap()
176176
.to_bool();
177177

178-
for v in r.boolean_buffer().iter() {
178+
for v in r.bit_buffer().iter() {
179179
assert!(v);
180180
}
181181
}
@@ -196,15 +196,15 @@ mod tests {
196196
.unwrap()
197197
.to_bool();
198198

199-
assert!(r_eq.boolean_buffer().iter().all(|v| !v));
199+
assert!(r_eq.bit_buffer().iter().all(|v| !v));
200200

201201
#[allow(clippy::excessive_precision)]
202202
let r_neq = alp_scalar_compare(&encoded, 1.234444f32, Operator::NotEq)
203203
.unwrap()
204204
.unwrap()
205205
.to_bool();
206206

207-
assert!(r_neq.boolean_buffer().iter().all(|v| v));
207+
assert!(r_neq.bit_buffer().iter().all(|v| v));
208208
}
209209

210210
#[test]
@@ -223,31 +223,31 @@ mod tests {
223223
.to_bool();
224224

225225
// !(0.0605_f32 >= 0.06051_f32);
226-
assert!(r_gte.boolean_buffer().iter().all(|v| !v));
226+
assert!(r_gte.bit_buffer().iter().all(|v| !v));
227227

228228
let r_gt = alp_scalar_compare(&encoded, 0.06051_f32, Operator::Gt)
229229
.unwrap()
230230
.unwrap()
231231
.to_bool();
232232

233233
// (0.0605_f32 > 0.06051_f32);
234-
assert!(r_gt.boolean_buffer().iter().all(|v| !v));
234+
assert!(r_gt.bit_buffer().iter().all(|v| !v));
235235

236236
let r_lte = alp_scalar_compare(&encoded, 0.06051_f32, Operator::Lte)
237237
.unwrap()
238238
.unwrap()
239239
.to_bool();
240240

241241
// 0.0605_f32 <= 0.06051_f32;
242-
assert!(r_lte.boolean_buffer().iter().all(|v| v));
242+
assert!(r_lte.bit_buffer().iter().all(|v| v));
243243

244244
let r_lt = alp_scalar_compare(&encoded, 0.06051_f32, Operator::Lt)
245245
.unwrap()
246246
.unwrap()
247247
.to_bool();
248248

249249
//0.0605_f32 < 0.06051_f32;
250-
assert!(r_lt.boolean_buffer().iter().all(|v| v));
250+
assert!(r_lt.bit_buffer().iter().all(|v| v));
251251
}
252252

253253
#[test]
@@ -311,7 +311,7 @@ mod tests {
311311
.unwrap()
312312
.to_bool();
313313

314-
for v in r.boolean_buffer().iter() {
314+
for v in r.bit_buffer().iter() {
315315
assert!(!v);
316316
}
317317
}

encodings/alp/src/alp/compute/filter.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,9 @@ register_kernel!(FilterKernelAdapter(ALPVTable).lift());
3434
#[cfg(test)]
3535
mod test {
3636
use rstest::rstest;
37-
use vortex_array::IntoArray;
3837
use vortex_array::arrays::PrimitiveArray;
3938
use vortex_array::compute::conformance::filter::test_filter_conformance;
39+
use vortex_array::{ArrayRef, IntoArray};
4040
use vortex_buffer::buffer;
4141

4242
use crate::ALPEncoding;
@@ -50,7 +50,7 @@ mod test {
5050
1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
5151
11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0
5252
].into_array())]
53-
fn test_filter_alp_conformance(#[case] array: vortex_array::ArrayRef) {
53+
fn test_filter_alp_conformance(#[case] array: ArrayRef) {
5454
let alp = ALPEncoding
5555
.encode(&array.to_canonical(), None)
5656
.unwrap()

encodings/alp/src/alp/compute/mod.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ mod tests {
3333
// Arrays with patterns
3434
#[case::repeating_pattern(alp_encode(&PrimitiveArray::from_iter([1.1f32, 2.2, 3.3, 1.1, 2.2, 3.3, 1.1, 2.2, 3.3]), None).unwrap())]
3535
#[case::close_values(alp_encode(&PrimitiveArray::from_iter([100.001f64, 100.002, 100.003, 100.004, 100.005]), None).unwrap())]
36-
3736
fn test_alp_consistency(#[case] array: ALPArray) {
3837
test_array_consistency(array.as_ref());
3938
}

encodings/alp/src/alp_rd/compute/mod.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@ mod tests {
6464
let encoder = RDEncoder::new(&values);
6565
encoder.encode(&arr)
6666
})]
67-
6867
fn test_alp_rd_consistency(#[case] array: ALPRDArray) {
6968
test_array_consistency(array.as_ref());
7069
}

encodings/bytebool/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ version = { workspace = true }
1717
workspace = true
1818

1919
[dependencies]
20-
arrow-buffer = { workspace = true }
2120
num-traits = { workspace = true }
2221
vortex-array = { workspace = true }
2322
vortex-buffer = { workspace = true }

encodings/bytebool/src/array.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
use std::fmt::Debug;
55
use std::ops::Range;
66

7-
use arrow_buffer::BooleanBuffer;
87
use vortex_array::arrays::BoolArray;
98
use vortex_array::stats::{ArrayStats, StatsSetRef};
109
use vortex_array::validity::Validity;
@@ -13,7 +12,7 @@ use vortex_array::vtable::{
1312
ValidityVTableFromValidityHelper,
1413
};
1514
use vortex_array::{ArrayRef, Canonical, EncodingId, EncodingRef, IntoArray, vtable};
16-
use vortex_buffer::ByteBuffer;
15+
use vortex_buffer::{BitBuffer, ByteBuffer};
1716
use vortex_dtype::DType;
1817
use vortex_error::vortex_panic;
1918
use vortex_scalar::Scalar;
@@ -114,9 +113,9 @@ impl ArrayVTable<ByteBoolVTable> for ByteBoolVTable {
114113

115114
impl CanonicalVTable<ByteBoolVTable> for ByteBoolVTable {
116115
fn canonicalize(array: &ByteBoolArray) -> Canonical {
117-
let boolean_buffer = BooleanBuffer::from(array.as_slice());
116+
let boolean_buffer = BitBuffer::from(array.as_slice());
118117
let validity = array.validity().clone();
119-
Canonical::Bool(BoolArray::from_bool_buffer(boolean_buffer, validity))
118+
Canonical::Bool(BoolArray::from_bit_buffer(boolean_buffer, validity))
120119
}
121120
}
122121

encodings/decimal-byte-parts/src/decimal_byte_parts/compute/compare.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ mod tests {
149149
let res = compare(lhs.as_ref(), rhs.as_ref(), Operator::Eq).unwrap();
150150

151151
assert_eq!(
152-
res.to_bool().boolean_buffer().iter().collect::<Vec<_>>(),
152+
res.to_bool().bit_buffer().iter().collect::<Vec<_>>(),
153153
vec![false, false, true]
154154
);
155155
}
@@ -206,13 +206,13 @@ mod tests {
206206

207207
let res = compare(lhs.as_ref(), rhs.as_ref(), Operator::Eq).unwrap();
208208

209-
assert_eq!(res.to_bool().bool_vec().unwrap(), vec![false, false, false]);
209+
assert_eq!(res.to_bool().bool_vec(), vec![false, false, false]);
210210

211211
let res = compare(lhs.as_ref(), rhs.as_ref(), Operator::Gt).unwrap();
212-
assert_eq!(res.to_bool().bool_vec().unwrap(), vec![true, true, true]);
212+
assert_eq!(res.to_bool().bool_vec(), vec![true, true, true]);
213213

214214
let res = compare(lhs.as_ref(), rhs.as_ref(), Operator::Lt).unwrap();
215-
assert_eq!(res.to_bool().bool_vec().unwrap(), vec![false, false, false]);
215+
assert_eq!(res.to_bool().bool_vec(), vec![false, false, false]);
216216

217217
// This cannot be converted to a i32.
218218
let rhs = ConstantArray::new(
@@ -222,12 +222,12 @@ mod tests {
222222

223223
let res = compare(lhs.as_ref(), rhs.as_ref(), Operator::Eq).unwrap();
224224

225-
assert_eq!(res.to_bool().bool_vec().unwrap(), vec![false, false, false]);
225+
assert_eq!(res.to_bool().bool_vec(), vec![false, false, false]);
226226

227227
let res = compare(lhs.as_ref(), rhs.as_ref(), Operator::Gt).unwrap();
228-
assert_eq!(res.to_bool().bool_vec().unwrap(), vec![false, false, false]);
228+
assert_eq!(res.to_bool().bool_vec(), vec![false, false, false]);
229229

230230
let res = compare(lhs.as_ref(), rhs.as_ref(), Operator::Lt).unwrap();
231-
assert_eq!(res.to_bool().bool_vec().unwrap(), vec![true, true, true]);
231+
assert_eq!(res.to_bool().bool_vec(), vec![true, true, true]);
232232
}
233233
}

encodings/dict/src/array.rs

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33

44
use std::fmt::Debug;
55

6-
use arrow_buffer::BooleanBuffer;
76
use vortex_array::stats::{ArrayStats, StatsSetRef};
87
use vortex_array::vtable::{ArrayVTable, NotSupported, VTable, ValidityVTable};
98
use vortex_array::{Array, ArrayRef, EncodingId, EncodingRef, ToCanonical, vtable};
9+
use vortex_buffer::BitBuffer;
1010
use vortex_dtype::{DType, match_each_integer_ptype};
1111
use vortex_error::{VortexExpect as _, VortexResult, vortex_bail};
1212
use vortex_mask::{AllOr, Mask};
@@ -142,13 +142,13 @@ impl ValidityVTable<DictVTable> for DictVTable {
142142

143143
fn validity_mask(array: &DictArray) -> Mask {
144144
let codes_validity = array.codes().validity_mask();
145-
match codes_validity.boolean_buffer() {
145+
match codes_validity.bit_buffer() {
146146
AllOr::All => {
147147
let primitive_codes = array.codes().to_primitive();
148148
let values_mask = array.values().validity_mask();
149149
let is_valid_buffer = match_each_integer_ptype!(primitive_codes.ptype(), |P| {
150150
let codes_slice = primitive_codes.as_slice::<P>();
151-
BooleanBuffer::collect_bool(array.len(), |idx| {
151+
BitBuffer::collect_bool(array.len(), |idx| {
152152
#[allow(clippy::cast_possible_truncation)]
153153
values_mask.value(codes_slice[idx] as usize)
154154
})
@@ -162,7 +162,7 @@ impl ValidityVTable<DictVTable> for DictVTable {
162162
let is_valid_buffer = match_each_integer_ptype!(primitive_codes.ptype(), |P| {
163163
let codes_slice = primitive_codes.as_slice::<P>();
164164
#[allow(clippy::cast_possible_truncation)]
165-
BooleanBuffer::collect_bool(array.len(), |idx| {
165+
BitBuffer::collect_bool(array.len(), |idx| {
166166
validity_buff.value(idx) && values_mask.value(codes_slice[idx] as usize)
167167
})
168168
});
@@ -174,15 +174,14 @@ impl ValidityVTable<DictVTable> for DictVTable {
174174

175175
#[cfg(test)]
176176
mod test {
177-
use arrow_buffer::BooleanBuffer;
178177
use rand::distr::{Distribution, StandardUniform};
179178
use rand::prelude::StdRng;
180179
use rand::{Rng, SeedableRng};
181180
use vortex_array::arrays::{ChunkedArray, PrimitiveArray};
182181
use vortex_array::builders::builder_with_capacity;
183182
use vortex_array::validity::Validity;
184183
use vortex_array::{Array, ArrayRef, IntoArray, ToCanonical};
185-
use vortex_buffer::buffer;
184+
use vortex_buffer::{BitBuffer, buffer};
186185
use vortex_dtype::Nullability::NonNullable;
187186
use vortex_dtype::{DType, NativePType, PType, UnsignedPType};
188187
use vortex_error::{VortexExpect, VortexUnwrap, vortex_panic};
@@ -195,7 +194,7 @@ mod test {
195194
let dict = DictArray::try_new(
196195
PrimitiveArray::new(
197196
buffer![0u32, 1, 2, 2, 1],
198-
Validity::from(BooleanBuffer::from(vec![true, false, true, false, true])),
197+
Validity::from(BitBuffer::from(vec![true, false, true, false, true])),
199198
)
200199
.into_array(),
201200
PrimitiveArray::new(buffer![3, 6, 9], Validity::AllValid).into_array(),
@@ -214,7 +213,7 @@ mod test {
214213
buffer![0u32, 1, 2, 2, 1].into_array(),
215214
PrimitiveArray::new(
216215
buffer![3, 6, 9],
217-
Validity::from(BooleanBuffer::from(vec![true, false, false])),
216+
Validity::from(BitBuffer::from(vec![true, false, false])),
218217
)
219218
.into_array(),
220219
)
@@ -231,12 +230,12 @@ mod test {
231230
let dict = DictArray::try_new(
232231
PrimitiveArray::new(
233232
buffer![0u32, 1, 2, 2, 1],
234-
Validity::from(BooleanBuffer::from(vec![true, false, true, false, true])),
233+
Validity::from(BitBuffer::from(vec![true, false, true, false, true])),
235234
)
236235
.into_array(),
237236
PrimitiveArray::new(
238237
buffer![3, 6, 9],
239-
Validity::from(BooleanBuffer::from(vec![false, true, true])),
238+
Validity::from(BitBuffer::from(vec![false, true, true])),
240239
)
241240
.into_array(),
242241
)
@@ -253,7 +252,7 @@ mod test {
253252
let dict = DictArray::try_new(
254253
PrimitiveArray::new(
255254
buffer![0u32, 1, 2, 2, 1],
256-
Validity::from(BooleanBuffer::from(vec![true, false, true, false, true])),
255+
Validity::from(BitBuffer::from(vec![true, false, true, false, true])),
257256
)
258257
.into_array(),
259258
PrimitiveArray::new(buffer![3, 6, 9], Validity::NonNullable).into_array(),
@@ -312,8 +311,8 @@ mod test {
312311

313312
assert_eq!(into_prim.as_slice::<u64>(), prim_into.as_slice::<u64>());
314313
assert_eq!(
315-
into_prim.validity_mask().boolean_buffer(),
316-
prim_into.validity_mask().boolean_buffer()
314+
into_prim.validity_mask().bit_buffer(),
315+
prim_into.validity_mask().bit_buffer()
317316
)
318317
}
319318
}

0 commit comments

Comments
 (0)