Skip to content

Commit b1f94ab

Browse files
committed
BitView Filter
Signed-off-by: Nicholas Gates <[email protected]>
1 parent 1ae589a commit b1f94ab

File tree

13 files changed

+779
-71
lines changed

13 files changed

+779
-71
lines changed

Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vortex-buffer/Cargo.toml

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ all-features = true
1818

1919
[features]
2020
arrow = []
21-
bench = []
2221
memmap2 = ["dep:memmap2"]
2322
serde = ["dep:serde"]
2423
warn-copy = ["dep:log"]
@@ -44,13 +43,7 @@ workspace = true
4443
[dev-dependencies]
4544
arrow-buffer = { workspace = true }
4645
divan = { workspace = true }
47-
rand = { workspace = true }
4846
rstest = { workspace = true }
49-
vortex-buffer = { path = ".", features = ["bench"] }
50-
51-
[[bench]]
52-
name = "bitview_filter"
53-
harness = false
5447

5548
[[bench]]
5649
name = "vortex_buffer"

vortex-buffer/src/bit/view.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,17 @@ impl<'a, const NB: usize> BitView<'a, NB> {
156156
(0..Self::N_WORDS).map(move |idx| unsafe { ptr.add(idx).read_unaligned() })
157157
}
158158

159+
/// Iterate the [`BitView`] in fixed-size words.
160+
///
161+
/// The words are loaded using unaligned loads to ensure correct bit ordering.
162+
/// For example, bit 0 is located in `word & 1 << 0`, bit 63 is located in `word & 1 << 63`,
163+
/// assuming the word size is 64 bits.
164+
pub fn iter_sized<W: 'static>(&self) -> impl Iterator<Item = W> + '_ {
165+
let ptr = self.bits.as_ptr().cast::<W>();
166+
// We use constant N_WORDS to trigger loop unrolling.
167+
(0..(NB / size_of::<W>())).map(move |idx| unsafe { ptr.add(idx).read_unaligned() })
168+
}
169+
159170
/// Runs the provided function `f` for each index of a `true` bit in the view.
160171
pub fn iter_ones<F>(&self, mut f: F)
161172
where

vortex-buffer/src/filter/mod.rs

Lines changed: 0 additions & 31 deletions
This file was deleted.

vortex-buffer/src/lib.rs

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ mod r#const;
6767
#[cfg(gpu_unstable)]
6868
mod cuda;
6969
mod debug;
70-
mod filter;
7170
mod macros;
7271
#[cfg(feature = "memmap2")]
7372
mod memmap2;
@@ -84,9 +83,3 @@ pub type ByteBufferMut = BufferMut<u8>;
8483

8584
/// A const-aligned buffer of u8.
8685
pub type ConstByteBuffer<const A: usize> = ConstBuffer<u8, A>;
87-
88-
/// Functions exported for benchmarking purposes.
89-
#[cfg(feature = "bench")]
90-
pub mod bench {
91-
pub use super::filter::scalar_in_place::bench_filter_in_place_scalar;
92-
}

vortex-compute/Cargo.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,16 @@ num-traits = { workspace = true }
3434
[features]
3535
default = ["arrow"]
3636
arrow = ["dep:arrow-array", "dep:arrow-buffer", "dep:arrow-schema"]
37+
bench = []
3738

3839
[dev-dependencies]
3940
divan = { workspace = true }
41+
rand = { workspace = true }
42+
vortex-compute = { path = ".", features = ["bench"] }
43+
44+
[[bench]]
45+
name = "filter_bitview"
46+
harness = false
4047

4148
[[bench]]
4249
name = "filter_buffer_mut"
Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ use std::iter::Iterator;
99
use divan::Bencher;
1010
use rand::prelude::StdRng;
1111
use rand::{Rng, SeedableRng};
12-
use vortex_buffer::bench::bench_filter_in_place_scalar;
1312
use vortex_buffer::{buffer_mut, BitBuffer};
13+
use vortex_compute::bench;
1414

1515
fn main() {
1616
divan::main();
@@ -19,6 +19,7 @@ fn main() {
1919
// Focus on benchmarking for our known vector length.
2020
const N: usize = 1024;
2121
type BitView<'a> = vortex_buffer::BitView<'a, 128>;
22+
2223
const MASK_DENSITY: &[f64] = &[
2324
0.0, 0.01, 0.05, 0.1, 0.25, // 0.3,
2425
// 0.4,
@@ -28,11 +29,23 @@ const MASK_DENSITY: &[f64] = &[
2829
0.99, 1.00,
2930
];
3031

31-
#[divan::bench(
32-
types = [u8, u16, u32, u64, u128],
33-
args = MASK_DENSITY,
34-
)]
35-
fn filter_scalar_in_place<T: Default + Copy>(bencher: Bencher, mask_density: f64) {
32+
#[divan::bench(types = [u8, u16, u32, u64, u128],args = MASK_DENSITY)]
33+
fn filter_scalar<T: Default + Copy>(bencher: Bencher, mask_density: f64) {
34+
bench_filter_fn(bencher, mask_density, bench::bench_filter_scalar::<_, T>)
35+
}
36+
37+
#[cfg(target_arch = "aarch64")]
38+
#[divan::bench(types = [u8, u16, u32, u64, u128],args = MASK_DENSITY)]
39+
fn filter_neon<T: Default + Copy>(bencher: Bencher, mask_density: f64) {
40+
if std::arch::is_aarch64_feature_detected!("neon") {
41+
bench_filter_fn(bencher, mask_density, bench::bench_filter_neon::<_, T>)
42+
}
43+
}
44+
45+
fn bench_filter_fn<T: Default + Copy, F>(bencher: Bencher, mask_density: f64, f: F)
46+
where
47+
F: Fn(&BitView, &mut [T]),
48+
{
3649
let mut buffer = buffer_mut![T::default(); N];
3750

3851
let mut rng = StdRng::seed_from_u64(0);
@@ -42,7 +55,7 @@ fn filter_scalar_in_place<T: Default + Copy>(bencher: Bencher, mask_density: f64
4255

4356
bencher.bench_local(|| {
4457
let view = BitView::new(mask.inner().as_ref().try_into().unwrap());
45-
bench_filter_in_place_scalar(&view, &mut buffer);
58+
f(&view, &mut buffer);
4659
black_box(&mut buffer);
4760
});
4861
}

vortex-compute/src/filter/buffer.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4-
mod scalar;
5-
64
use vortex_buffer::{Buffer, BufferMut};
75
use vortex_mask::{Mask, MaskIter};
86

vortex-compute/src/filter/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use std::ops::Deref;
88
mod bitbuffer;
99
mod buffer;
1010
mod mask;
11+
pub(crate) mod slice;
1112
mod vector;
1213

1314
/// Function for filtering based on a selection mask.
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use crate::filter::Filter;
5+
use std::arch::is_aarch64_feature_detected;
6+
use vortex_buffer::BitView;
7+
8+
#[cfg(target_arch = "aarch64")]
9+
pub(crate) mod neon;
10+
pub(crate) mod scalar;
11+
12+
impl<'a, const NB: usize, T: Copy> Filter<BitView<'a, NB>> for &mut [T] {
13+
type Output = ();
14+
15+
fn filter(self, mask: &BitView<'a, NB>) -> Self::Output {
16+
#[cfg(target_arch = "aarch64")]
17+
{
18+
if is_aarch64_feature_detected!("neon") {
19+
return unsafe { neon::filter_neon(self, mask) };
20+
}
21+
}
22+
23+
// Otherwise, fall back to scalar implementation
24+
scalar::filter_scalar(self, mask);
25+
}
26+
}

0 commit comments

Comments
 (0)