Skip to content

Commit 76018d9

Browse files
committed
BitView Filter
Signed-off-by: Nicholas Gates <[email protected]>
1 parent b1f94ab commit 76018d9

File tree

9 files changed

+462
-716
lines changed

9 files changed

+462
-716
lines changed

vortex-compute/benches/filter_bitview.rs

Lines changed: 53 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
#![allow(clippy::unwrap_used)]
55

6+
use std::arch;
67
use std::hint::black_box;
78
use std::iter::Iterator;
89

@@ -11,6 +12,7 @@ use rand::prelude::StdRng;
1112
use rand::{Rng, SeedableRng};
1213
use vortex_buffer::{buffer_mut, BitBuffer};
1314
use vortex_compute::bench;
15+
use vortex_compute::filter::Filter;
1416

1517
fn main() {
1618
divan::main();
@@ -20,6 +22,34 @@ fn main() {
2022
const N: usize = 1024;
2123
type BitView<'a> = vortex_buffer::BitView<'a, 128>;
2224

25+
trait FilterImpl {
26+
fn filter<T: Copy>(bitview: &BitView, slice: &mut [T]);
27+
}
28+
29+
/// The main entry point for the filter function that performs all the dispatch.
30+
struct ActualFilter;
31+
impl FilterImpl for ActualFilter {
32+
fn filter<T: Copy>(bitview: &BitView, slice: &mut [T]) {
33+
slice.filter(bitview)
34+
}
35+
}
36+
37+
struct ScalarFilter;
38+
impl FilterImpl for ScalarFilter {
39+
fn filter<T: Copy>(bitview: &BitView, slice: &mut [T]) {
40+
bench::bench_filter_scalar::<_, T>(bitview, slice)
41+
}
42+
}
43+
44+
struct NeonFilter;
45+
impl FilterImpl for NeonFilter {
46+
fn filter<T: Copy>(bitview: &BitView, slice: &mut [T]) {
47+
if arch::is_aarch64_feature_detected!("neon") {
48+
bench::bench_filter_neon::<_, T>(bitview, slice)
49+
}
50+
}
51+
}
52+
2353
const MASK_DENSITY: &[f64] = &[
2454
0.0, 0.01, 0.05, 0.1, 0.25, // 0.3,
2555
// 0.4,
@@ -29,23 +59,32 @@ const MASK_DENSITY: &[f64] = &[
2959
0.99, 1.00,
3060
];
3161

32-
#[divan::bench(types = [u8, u16, u32, u64, u128],args = MASK_DENSITY)]
33-
fn filter_scalar<T: Default + Copy>(bencher: Bencher, mask_density: f64) {
34-
bench_filter_fn(bencher, mask_density, bench::bench_filter_scalar::<_, T>)
62+
#[divan::bench(types = [ScalarFilter, NeonFilter, ActualFilter], args = MASK_DENSITY)]
63+
fn filter_u8<F: FilterImpl>(bencher: Bencher, mask_density: f64) {
64+
bench_filter_fn::<F, u8>(bencher, mask_density)
3565
}
3666

37-
#[cfg(target_arch = "aarch64")]
38-
#[divan::bench(types = [u8, u16, u32, u64, u128],args = MASK_DENSITY)]
39-
fn filter_neon<T: Default + Copy>(bencher: Bencher, mask_density: f64) {
40-
if std::arch::is_aarch64_feature_detected!("neon") {
41-
bench_filter_fn(bencher, mask_density, bench::bench_filter_neon::<_, T>)
42-
}
67+
#[divan::bench(types = [ScalarFilter, NeonFilter, ActualFilter], args = MASK_DENSITY)]
68+
fn filter_u16<F: FilterImpl>(bencher: Bencher, mask_density: f64) {
69+
bench_filter_fn::<F, u16>(bencher, mask_density)
70+
}
71+
72+
#[divan::bench(types = [ScalarFilter, NeonFilter, ActualFilter], args = MASK_DENSITY)]
73+
fn filter_u32<F: FilterImpl>(bencher: Bencher, mask_density: f64) {
74+
bench_filter_fn::<F, u32>(bencher, mask_density)
75+
}
76+
77+
#[divan::bench(types = [ScalarFilter, NeonFilter, ActualFilter], args = MASK_DENSITY)]
78+
fn filter_u64<F: FilterImpl>(bencher: Bencher, mask_density: f64) {
79+
bench_filter_fn::<F, u64>(bencher, mask_density)
80+
}
81+
82+
#[divan::bench(types = [ScalarFilter, NeonFilter, ActualFilter], args = MASK_DENSITY)]
83+
fn filter_u128<F: FilterImpl>(bencher: Bencher, mask_density: f64) {
84+
bench_filter_fn::<F, u128>(bencher, mask_density)
4385
}
4486

45-
fn bench_filter_fn<T: Default + Copy, F>(bencher: Bencher, mask_density: f64, f: F)
46-
where
47-
F: Fn(&BitView, &mut [T]),
48-
{
87+
fn bench_filter_fn<F: FilterImpl, T: Default + Copy>(bencher: Bencher, mask_density: f64) {
4988
let mut buffer = buffer_mut![T::default(); N];
5089

5190
let mut rng = StdRng::seed_from_u64(0);
@@ -55,7 +94,7 @@ where
5594

5695
bencher.bench_local(|| {
5796
let view = BitView::new(mask.inner().as_ref().try_into().unwrap());
58-
f(&view, &mut buffer);
97+
F::filter(&view, &mut buffer);
5998
black_box(&mut buffer);
6099
});
61100
}

vortex-compute/src/filter/buffer.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ mod tests {
202202
assert_eq!(result, buffer![1u32, 2, 5]);
203203
}
204204

205-
use vortex_buffer::{buffer_mut, BufferMut};
205+
use vortex_buffer::{BufferMut, buffer_mut};
206206

207207
#[test]
208208
fn test_filter_all_true() {

vortex-compute/src/filter/slice/mod.rs

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4-
use crate::filter::Filter;
54
use std::arch::is_aarch64_feature_detected;
5+
66
use vortex_buffer::BitView;
77

8+
use crate::filter::Filter;
9+
810
#[cfg(target_arch = "aarch64")]
911
pub(crate) mod neon;
1012
pub(crate) mod scalar;
@@ -16,6 +18,19 @@ impl<'a, const NB: usize, T: Copy> Filter<BitView<'a, NB>> for &mut [T] {
1618
#[cfg(target_arch = "aarch64")]
1719
{
1820
if is_aarch64_feature_detected!("neon") {
21+
// NEON is only faster for sufficiently dense masks.
22+
match size_of::<T>() {
23+
1 | 2 if mask.true_count() < (BitView::<NB>::N / 4) => {
24+
// For u8 and u16, the threshold is ~0.25
25+
return scalar::filter_scalar(self, mask);
26+
}
27+
4 if mask.true_count() < (3 * BitView::<NB>::N / 4) => {
28+
// For u32, the threshold is ~0.75
29+
return scalar::filter_scalar(self, mask);
30+
}
31+
_ => {}
32+
}
33+
1934
return unsafe { neon::filter_neon(self, mask) };
2035
}
2136
}

0 commit comments

Comments
 (0)