Skip to content

Commit 180a8b9

Browse files
authored
Vortex vector compute (#5054)
Signed-off-by: Nicholas Gates <[email protected]>
1 parent 6b8df0c commit 180a8b9

File tree

22 files changed

+957
-4
lines changed

22 files changed

+957
-4
lines changed

Cargo.lock

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ members = [
88
"vortex-array",
99
"vortex-btrblocks",
1010
"vortex-buffer",
11+
"vortex-compute",
1112
"vortex-cxx",
1213
"vortex-datafusion",
1314
"vortex-dtype",
@@ -112,6 +113,7 @@ dirs = "6.0.0"
112113
divan = { package = "codspeed-divan-compat", version = "4.0.4" }
113114
dyn-hash = "0.2.0"
114115
enum-iterator = "2.0.0"
116+
enum-map = "2.7.3"
115117
erased-serde = "0.4"
116118
fastlanes = "0.5"
117119
flatbuffers = "25.2.10"
@@ -216,6 +218,7 @@ vortex-array = { version = "0.1.0", path = "./vortex-array", default-features =
216218
vortex-btrblocks = { version = "0.1.0", path = "./vortex-btrblocks", default-features = false }
217219
vortex-buffer = { version = "0.1.0", path = "./vortex-buffer", default-features = false }
218220
vortex-bytebool = { version = "0.1.0", path = "./encodings/bytebool", default-features = false }
221+
vortex-compute = { version = "0.1.0", path = "./vortex-compute", default-features = false }
219222
vortex-datafusion = { version = "0.1.0", path = "./vortex-datafusion", default-features = false }
220223
vortex-datetime-parts = { version = "0.1.0", path = "./encodings/datetime-parts", default-features = false }
221224
vortex-decimal-byte-parts = { version = "0.1.0", path = "encodings/decimal-byte-parts", default-features = false }
@@ -242,6 +245,7 @@ vortex-sequence = { version = "0.1.0", path = "encodings/sequence", default-feat
242245
vortex-sparse = { version = "0.1.0", path = "./encodings/sparse", default-features = false }
243246
vortex-tui = { version = "0.1.0", path = "./vortex-tui", default-features = false }
244247
vortex-utils = { version = "0.1.0", path = "./vortex-utils", default-features = false }
248+
vortex-vector = { version = "0.1.0", path = "./vortex-vector", default-features = false }
245249
vortex-zigzag = { version = "0.1.0", path = "./encodings/zigzag", default-features = false }
246250
vortex-zstd = { version = "0.1.0", path = "./encodings/zstd", default-features = false }
247251
# END crates published by this project

vortex-buffer/benches/vortex_bitbuffer.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,15 @@ fn bitwise_not_vortex_buffer(bencher: Bencher, length: usize) {
229229
});
230230
}
231231

232+
#[divan::bench(args = INPUT_SIZE)]
233+
fn bitwise_not_vortex_buffer_mut(bencher: Bencher, length: usize) {
234+
bencher
235+
.with_inputs(|| BitBufferMut::from_iter((0..length).map(|i| i % 2 == 0)))
236+
.bench_values(|buffer| {
237+
divan::black_box(!buffer);
238+
});
239+
}
240+
232241
#[divan::bench(args = INPUT_SIZE)]
233242
fn bitwise_not_arrow_buffer(bencher: Bencher, length: usize) {
234243
bencher

vortex-buffer/src/bit/buf_mut.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4+
use std::ops::Not;
5+
46
use arrow_buffer::bit_chunk_iterator::BitChunks;
57
use bitvec::view::BitView;
68

7-
use crate::bit::{get_bit_unchecked, set_bit_unchecked, unset_bit_unchecked};
9+
use crate::bit::{get_bit_unchecked, ops, set_bit_unchecked, unset_bit_unchecked};
810
use crate::{BitBuffer, BufferMut, ByteBufferMut, buffer_mut};
911

1012
/// A mutable bitset buffer that allows random access to individual bits for set and get.
@@ -476,6 +478,16 @@ impl Default for BitBufferMut {
476478
}
477479
}
478480

481+
// Mutate-in-place implementation of bitwise NOT.
482+
impl Not for BitBufferMut {
483+
type Output = BitBufferMut;
484+
485+
fn not(mut self) -> Self::Output {
486+
ops::bitwise_unary_op_mut(&mut self, |b| !b);
487+
self
488+
}
489+
}
490+
479491
impl From<&[bool]> for BitBufferMut {
480492
fn from(value: &[bool]) -> Self {
481493
let mut buf = BitBufferMut::new_unset(value.len());

vortex-buffer/src/bit/macros.rs

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,22 @@ macro_rules! bitbuffer {
1717
() => (
1818
$crate::BitBuffer::empty()
1919
);
20+
21+
// We capture single-element 0/1 cases to avoid ambiguity with the
22+
// comma-separated expression case.
23+
(0) => {
24+
$crate::BitBuffer::from_iter([false])
25+
};
26+
(1) => {
27+
$crate::BitBuffer::from_iter([true])
28+
};
29+
2030
($elem:expr; $n:expr) => (
2131
$crate::BitBuffer::full($elem, $n)
2232
);
2333
($($x:expr),+ $(,)?) => (
2434
$crate::BitBuffer::from_iter([$($x),+])
2535
);
26-
// Match space-separated bit literals (0 or 1)
2736
($($bit:tt)+) => {
2837
$crate::BitBuffer::from_iter([$( $crate::bitbuffer!(@bit $bit) ),+])
2938
};
@@ -45,13 +54,22 @@ macro_rules! bitbuffer_mut {
4554
() => (
4655
$crate::BitBufferMut::empty()
4756
);
57+
58+
// We capture single-element 0/1 cases to avoid ambiguity with the
59+
// comma-separated expression case.
60+
(0) => {
61+
$crate::BitBuffer::from_iter([false])
62+
};
63+
(1) => {
64+
$crate::BitBuffer::from_iter([true])
65+
};
66+
4867
($elem:expr; $n:expr) => (
4968
$crate::BitBufferMut::full($elem, $n)
5069
);
5170
($($x:expr),+ $(,)?) => (
5271
$crate::BitBufferMut::from_iter([$($x),+])
5372
);
54-
// Match space-separated bit literals (0 or 1)
5573
($($bit:tt)+) => {
5674
$crate::BitBufferMut::from_iter([$( $crate::bitbuffer_mut!(@bit $bit) ),+])
5775
};

vortex-buffer/src/bit/ops.rs

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

44
use crate::trusted_len::TrustedLenExt;
5-
use crate::{BitBuffer, Buffer};
5+
use crate::{BitBuffer, BitBufferMut, Buffer};
66

77
pub(super) fn bitwise_unary_op<F: FnMut(u64) -> u64>(buffer: &BitBuffer, op: F) -> BitBuffer {
88
let iter = buffer.chunks().iter_padded().map(op);
@@ -13,6 +13,38 @@ pub(super) fn bitwise_unary_op<F: FnMut(u64) -> u64>(buffer: &BitBuffer, op: F)
1313
BitBuffer::new(result, buffer.len())
1414
}
1515

16+
pub(super) fn bitwise_unary_op_mut<F: FnMut(u64) -> u64>(buffer: &mut BitBufferMut, mut op: F) {
17+
let slice_mut = buffer.as_mut_slice();
18+
19+
// The number of complete u64 words in the buffer (unaligned)
20+
let u64_len = slice_mut.len() / 8;
21+
let remainder = slice_mut.len() % 8;
22+
23+
// Create a pointer to the *unaligned* u64 words
24+
let mut ptr = slice_mut.as_mut_ptr() as *mut u64;
25+
for _ in 0..u64_len {
26+
let value = unsafe { ptr.read_unaligned() };
27+
let value = op(value);
28+
unsafe { ptr.write_unaligned(value) };
29+
ptr = unsafe { ptr.add(1) };
30+
}
31+
32+
// Read remainder into a u64;
33+
let mut remainder_u64 = 0u64;
34+
let ptr = ptr as *mut u8;
35+
for i in 0..remainder {
36+
let byte = unsafe { ptr.add(i).read() };
37+
remainder_u64 |= (byte as u64) << (i * 8);
38+
}
39+
let remainder_u64 = op(remainder_u64);
40+
41+
// Write back remainder
42+
for i in 0..remainder {
43+
let byte = ((remainder_u64 >> (i * 8)) & 0xFF) as u8;
44+
unsafe { ptr.add(i).write(byte) };
45+
}
46+
}
47+
1648
pub(super) fn bitwise_binary_op<F: FnMut(u64, u64) -> u64>(
1749
left: &BitBuffer,
1850
right: &BitBuffer,

vortex-compute/Cargo.toml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
[package]
2+
name = "vortex-compute"
3+
authors = { workspace = true }
4+
categories = { workspace = true }
5+
description = "Compute functions that operator over Vortex vectors, buffers, and masks"
6+
edition = { workspace = true }
7+
homepage = { workspace = true }
8+
include = { workspace = true }
9+
keywords = { workspace = true }
10+
license = { workspace = true }
11+
readme = { workspace = true }
12+
repository = { workspace = true }
13+
rust-version = { workspace = true }
14+
version = { workspace = true }
15+
16+
[package.metadata.docs.rs]
17+
all-features = true
18+
19+
[lints]
20+
workspace = true
21+
22+
[dependencies]
23+
vortex-buffer = { workspace = true }
24+
vortex-error = { workspace = true }
25+
vortex-mask = { workspace = true }
26+
vortex-vector = { workspace = true }
27+
28+
[features]
29+
default = ["filter", "logical"]
30+
31+
filter = []
32+
logical = []
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use vortex_buffer::{BitBuffer, BitBufferMut, get_bit};
5+
use vortex_mask::{Mask, MaskIter};
6+
7+
use crate::filter::Filter;
8+
9+
/// If the filter density is above 80%, we use slices to filter the array instead of indices.
10+
// TODO(ngates): we need more experimentation to determine the best threshold here.
11+
const FILTER_SLICES_DENSITY_THRESHOLD: f64 = 0.8;
12+
13+
impl Filter for BitBuffer {
14+
fn filter(&self, mask: &Mask) -> Self {
15+
assert_eq!(mask.len(), self.len());
16+
match mask {
17+
Mask::AllTrue(_) => self.clone(),
18+
Mask::AllFalse(_) => Self::empty(),
19+
Mask::Values(v) => match v.threshold_iter(FILTER_SLICES_DENSITY_THRESHOLD) {
20+
MaskIter::Indices(indices) => filter_indices(self, indices),
21+
MaskIter::Slices(slices) => filter_slices(self, mask.true_count(), slices),
22+
},
23+
}
24+
}
25+
}
26+
27+
fn filter_indices(bools: &BitBuffer, indices: &[usize]) -> BitBuffer {
28+
let buffer = bools.inner().as_ref();
29+
BitBuffer::collect_bool(indices.len(), |idx| {
30+
let idx = *unsafe { indices.get_unchecked(idx) };
31+
get_bit(buffer, bools.offset() + idx)
32+
})
33+
}
34+
35+
fn filter_slices(buffer: &BitBuffer, output_len: usize, slices: &[(usize, usize)]) -> BitBuffer {
36+
let mut builder = BitBufferMut::with_capacity(output_len);
37+
for (start, end) in slices {
38+
// TODO(ngates): we probably want a borrowed slice for things like this.
39+
builder.append_buffer(&buffer.slice(*start..*end));
40+
}
41+
builder.freeze()
42+
}
43+
44+
#[cfg(test)]
45+
mod test {
46+
use vortex_buffer::bitbuffer;
47+
48+
use super::*;
49+
50+
#[test]
51+
fn filter_bool_by_slice_test() {
52+
let bits = bitbuffer![1 1 0];
53+
54+
let filtered = filter_slices(&bits, 2, &[(0, 1), (2, 3)]);
55+
assert_eq!(2, filtered.len());
56+
57+
assert_eq!(filtered, bitbuffer![1 0])
58+
}
59+
60+
#[test]
61+
fn filter_bool_by_index_test() {
62+
let buf = bitbuffer![1 1 0];
63+
let filtered = filter_indices(&buf, &[0, 2]);
64+
assert_eq!(2, filtered.len());
65+
assert_eq!(filtered, bitbuffer![1 0])
66+
}
67+
}

vortex-compute/src/filter/bool.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use vortex_mask::Mask;
5+
use vortex_vector::{BoolVector, VectorOps};
6+
7+
use crate::filter::Filter;
8+
9+
impl Filter for BoolVector {
10+
fn filter(&self, mask: &Mask) -> Self {
11+
Self::new(self.bits().filter(mask), self.validity().filter(mask))
12+
}
13+
}
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use vortex_buffer::{Buffer, BufferMut};
5+
use vortex_mask::{Mask, MaskIter};
6+
7+
use crate::filter::Filter;
8+
9+
// This is modeled after the constant with the equivalent name in arrow-rs.
10+
const FILTER_SLICES_SELECTIVITY_THRESHOLD: f64 = 0.8;
11+
12+
impl<T: Copy> Filter for Buffer<T> {
13+
fn filter(&self, mask: &Mask) -> Self {
14+
assert_eq!(mask.len(), self.len());
15+
match mask {
16+
Mask::AllTrue(_) => self.clone(),
17+
Mask::AllFalse(_) => Self::empty(),
18+
Mask::Values(v) => match v.threshold_iter(FILTER_SLICES_SELECTIVITY_THRESHOLD) {
19+
MaskIter::Indices(indices) => filter_indices(self.as_slice(), indices),
20+
MaskIter::Slices(slices) => {
21+
filter_slices(self.as_slice(), mask.true_count(), slices)
22+
}
23+
},
24+
}
25+
}
26+
}
27+
28+
fn filter_indices<T: Copy>(values: &[T], indices: &[usize]) -> Buffer<T> {
29+
Buffer::<T>::from_trusted_len_iter(indices.iter().map(|&idx| values[idx]))
30+
}
31+
32+
fn filter_slices<T>(values: &[T], output_len: usize, slices: &[(usize, usize)]) -> Buffer<T> {
33+
let mut out = BufferMut::<T>::with_capacity(output_len);
34+
for (start, end) in slices {
35+
out.extend_from_slice(&values[*start..*end]);
36+
}
37+
out.freeze()
38+
}
39+
40+
#[cfg(test)]
41+
mod tests {
42+
use vortex_buffer::buffer;
43+
use vortex_mask::Mask;
44+
45+
use super::*;
46+
47+
#[test]
48+
fn test_filter_buffer_by_indices() {
49+
let buf = buffer![10u32, 20, 30, 40, 50];
50+
let mask = Mask::from_iter([true, false, true, false, true]);
51+
52+
let result = buf.filter(&mask);
53+
assert_eq!(result, buffer![10u32, 30, 50]);
54+
}
55+
56+
#[test]
57+
fn test_filter_buffer_all_true() {
58+
let buf = buffer![1u64, 2, 3];
59+
let mask = Mask::new_true(3);
60+
61+
let result = buf.filter(&mask);
62+
assert_eq!(result, buffer![1u64, 2, 3]);
63+
}
64+
65+
#[test]
66+
fn test_filter_buffer_all_false() {
67+
let buf = buffer![1i32, 2, 3, 4];
68+
let mask = Mask::new_false(4);
69+
70+
let result = buf.filter(&mask);
71+
assert!(result.is_empty());
72+
}
73+
74+
#[test]
75+
fn test_filter_indices_direct() {
76+
let buf = buffer![100u32, 200, 300, 400];
77+
let result = filter_indices(buf.as_slice(), &[0, 2, 3]);
78+
assert_eq!(result, buffer![100u32, 300, 400]);
79+
}
80+
81+
#[test]
82+
fn test_filter_slices_direct() {
83+
let buf = buffer![1u32, 2, 3, 4, 5];
84+
let result = filter_slices(buf.as_slice(), 3, &[(0, 2), (4, 5)]);
85+
assert_eq!(result, buffer![1u32, 2, 5]);
86+
}
87+
}

0 commit comments

Comments
 (0)