Skip to content

Commit 1ae589a

Browse files
committed
BitView Filter
Signed-off-by: Nicholas Gates <[email protected]>
1 parent 33a5553 commit 1ae589a

File tree

9 files changed

+168
-14
lines changed

9 files changed

+168
-14
lines changed

Cargo.lock

Lines changed: 12 additions & 10 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vortex-buffer/Cargo.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ all-features = true
1818

1919
[features]
2020
arrow = []
21+
bench = []
2122
memmap2 = ["dep:memmap2"]
2223
serde = ["dep:serde"]
2324
warn-copy = ["dep:log"]
@@ -43,7 +44,13 @@ workspace = true
4344
[dev-dependencies]
4445
arrow-buffer = { workspace = true }
4546
divan = { workspace = true }
47+
rand = { workspace = true }
4648
rstest = { workspace = true }
49+
vortex-buffer = { path = ".", features = ["bench"] }
50+
51+
[[bench]]
52+
name = "bitview_filter"
53+
harness = false
4754

4855
[[bench]]
4956
name = "vortex_buffer"
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
#![allow(clippy::unwrap_used)]
5+
6+
use std::hint::black_box;
7+
use std::iter::Iterator;
8+
9+
use divan::Bencher;
10+
use rand::prelude::StdRng;
11+
use rand::{Rng, SeedableRng};
12+
use vortex_buffer::bench::bench_filter_in_place_scalar;
13+
use vortex_buffer::{buffer_mut, BitBuffer};
14+
15+
fn main() {
16+
divan::main();
17+
}
18+
19+
// Focus on benchmarking for our known vector length.
20+
const N: usize = 1024;
21+
type BitView<'a> = vortex_buffer::BitView<'a, 128>;
22+
const MASK_DENSITY: &[f64] = &[
23+
0.0, 0.01, 0.05, 0.1, 0.25, // 0.3,
24+
// 0.4,
25+
0.5, // 0.6,
26+
0.75, // 0.85,
27+
0.9, // 0.95,
28+
0.99, 1.00,
29+
];
30+
31+
#[divan::bench(
32+
types = [u8, u16, u32, u64, u128],
33+
args = MASK_DENSITY,
34+
)]
35+
fn filter_scalar_in_place<T: Default + Copy>(bencher: Bencher, mask_density: f64) {
36+
let mut buffer = buffer_mut![T::default(); N];
37+
38+
let mut rng = StdRng::seed_from_u64(0);
39+
let mask = (0..N)
40+
.map(|_| rng.random_bool(mask_density))
41+
.collect::<BitBuffer>();
42+
43+
bencher.bench_local(|| {
44+
let view = BitView::new(mask.inner().as_ref().try_into().unwrap());
45+
bench_filter_in_place_scalar(&view, &mut buffer);
46+
black_box(&mut buffer);
47+
});
48+
}

vortex-buffer/src/bit/view.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,10 @@ impl<const NB: usize> BitView<'static, NB> {
5252
}
5353

5454
impl<'a, const NB: usize> BitView<'a, NB> {
55-
const N: usize = NB * 8;
56-
const N_WORDS: usize = NB * 8 / (usize::BITS as usize);
55+
/// The number of bits in the view.
56+
pub const N: usize = NB * 8;
57+
/// The number of machine words in the view.
58+
pub const N_WORDS: usize = NB * 8 / (usize::BITS as usize);
5759

5860
const _ASSERT_MULTIPLE_OF_8: () = assert!(
5961
NB % 8 == 0,
@@ -148,7 +150,7 @@ impl<'a, const NB: usize> BitView<'a, NB> {
148150
/// The words are loaded using unaligned loads to ensure correct bit ordering.
149151
/// For example, bit 0 is located in `word & 1 << 0`, bit 63 is located in `word & 1 << 63`,
150152
/// assuming the word size is 64 bits.
151-
fn iter_words(&self) -> impl Iterator<Item = usize> + '_ {
153+
pub fn iter_words(&self) -> impl Iterator<Item = usize> + '_ {
152154
let ptr = self.bits.as_ptr().cast::<usize>();
153155
// We use constant N_WORDS to trigger loop unrolling.
154156
(0..Self::N_WORDS).map(move |idx| unsafe { ptr.add(idx).read_unaligned() })

vortex-buffer/src/filter/mod.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! Implementations for filtering data using a [`BitView`] selection mask.
5+
//!
6+
//! These implementations are highly optimized as filtering is such a core operation within Vortex.
7+
8+
pub(crate) mod scalar_in_place;
9+
10+
use crate::BitView;
11+
12+
impl<'a, const NB: usize> BitView<'a, NB> {
13+
/// Filters the given slice of items in place.
14+
///
15+
/// After calling this method, the first `self.true_count()` elements of `items`
16+
/// will contain the filtered items. The remaining elements beyond that point are undefined.
17+
pub fn filter_in_place<T: Copy>(&self, items: &mut [T]) {
18+
match self.true_count() {
19+
0 => {
20+
// No items to keep; do nothing.
21+
}
22+
n if n == items.len() => {
23+
// All items to keep; do nothing.
24+
}
25+
_ => {
26+
// Some items to keep; do the filtering.
27+
scalar_in_place::filter_in_place_scalar(self, items);
28+
}
29+
}
30+
}
31+
}
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use crate::BitView;
5+
use std::ptr;
6+
7+
/// Benchmark wrapper for [`filter_in_place_scalar`].
8+
#[doc(hidden)]
9+
#[cfg(feature = "bench")]
10+
pub fn bench_filter_in_place_scalar<const NB: usize, T: Copy>(
11+
bit_view: &BitView<NB>,
12+
items: &mut [T],
13+
) {
14+
filter_in_place_scalar(bit_view, items);
15+
}
16+
17+
/// Filters the given slice of items in place according to the provided BitView using scalar
18+
/// (non-SIMD) code.
19+
///
20+
/// The caller *should* handle where the BitView has zero or full true counts to avoid unnecessary
21+
/// work.
22+
pub(crate) fn filter_in_place_scalar<const NB: usize, T: Copy>(
23+
bit_view: &BitView<NB>,
24+
items: &mut [T],
25+
) {
26+
let mut read_ptr = items.as_ptr();
27+
let mut write_ptr = items.as_mut_ptr();
28+
29+
for mut word in bit_view.iter_words() {
30+
match word {
31+
usize::MAX => {
32+
// All items => copy usize::BITS items.
33+
unsafe {
34+
// We cannot guarantee non-overlapping, so use ptr::copy rather than
35+
// ptr::copy_nonoverlapping.
36+
ptr::copy(read_ptr, write_ptr, usize::BITS as usize);
37+
read_ptr = read_ptr.add(usize::BITS as usize);
38+
write_ptr = write_ptr.add(usize::BITS as usize);
39+
}
40+
}
41+
_ => {
42+
// Note this also handles word == 0 case by skipping the loop entirely.
43+
while word != 0 {
44+
let bit_pos = word.trailing_zeros();
45+
unsafe {
46+
ptr::copy(read_ptr.add(bit_pos as usize), write_ptr, 1);
47+
write_ptr = write_ptr.add(1);
48+
}
49+
word &= word - 1; // Clear the bit at `bit_pos`
50+
}
51+
unsafe { read_ptr = read_ptr.add(usize::BITS as usize) };
52+
}
53+
}
54+
}
55+
}

vortex-buffer/src/lib.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ mod r#const;
6767
#[cfg(gpu_unstable)]
6868
mod cuda;
6969
mod debug;
70+
mod filter;
7071
mod macros;
7172
#[cfg(feature = "memmap2")]
7273
mod memmap2;
@@ -83,3 +84,9 @@ pub type ByteBufferMut = BufferMut<u8>;
8384

8485
/// A const-aligned buffer of u8.
8586
pub type ConstByteBuffer<const A: usize> = ConstBuffer<u8, A>;
87+
88+
/// Functions exported for benchmarking purposes.
89+
#[cfg(feature = "bench")]
90+
pub mod bench {
91+
pub use super::filter::scalar_in_place::bench_filter_in_place_scalar;
92+
}

vortex-compute/src/filter/buffer.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4+
mod scalar;
5+
46
use vortex_buffer::{Buffer, BufferMut};
57
use vortex_mask::{Mask, MaskIter};
68

@@ -202,7 +204,7 @@ mod tests {
202204
assert_eq!(result, buffer![1u32, 2, 5]);
203205
}
204206

205-
use vortex_buffer::{BufferMut, buffer_mut};
207+
use vortex_buffer::{buffer_mut, BufferMut};
206208

207209
#[test]
208210
fn test_filter_all_true() {

vortex-compute/src/filter/slice.rs

Whitespace-only changes.

0 commit comments

Comments
 (0)