Skip to content

Commit ee634d4

Browse files
committed
BitView Filter
Signed-off-by: Nicholas Gates <[email protected]>
1 parent 76018d9 commit ee634d4

File tree

6 files changed

+24
-18
lines changed

6 files changed

+24
-18
lines changed

vortex-compute/benches/filter_bitview.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use std::iter::Iterator;
1010
use divan::Bencher;
1111
use rand::prelude::StdRng;
1212
use rand::{Rng, SeedableRng};
13-
use vortex_buffer::{buffer_mut, BitBuffer};
13+
use vortex_buffer::{BitBuffer, buffer_mut};
1414
use vortex_compute::bench;
1515
use vortex_compute::filter::Filter;
1616

vortex-compute/src/filter/slice/neon/mod.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,19 +12,19 @@ mod neon_u32;
1212
mod neon_u8;
1313

1414
use std::arch::is_aarch64_feature_detected;
15+
1516
use vortex_buffer::BitView;
1617
use vortex_error::vortex_panic;
1718

1819
/// Benchmark wrapper for [`filter_neon`].
1920
#[doc(hidden)]
2021
#[cfg(feature = "bench")]
21-
#[cfg(target_arch = "aarch64")]
2222
#[inline(never)]
2323
pub fn bench_filter_neon<const NB: usize, T: Copy>(bit_view: &BitView<NB>, slice: &mut [T]) {
24-
if !is_aarch64_feature_detected!("neon") {
25-
vortex_panic!("NEON not detected on this CPU");
24+
if is_aarch64_feature_detected!("neon") {
25+
unsafe { filter_neon(slice, bit_view) }
2626
}
27-
unsafe { filter_neon(slice, bit_view) }
27+
vortex_panic!("NEON not detected on this CPU");
2828
}
2929

3030
/// Filters the given slice of items in place according to the provided BitView using neon
@@ -52,6 +52,7 @@ pub(super) unsafe fn filter_neon<const NB: usize, T: Copy>(slice: &mut [T], mask
5252
}
5353

5454
#[cfg(test)]
55+
#[allow(clippy::cast_possible_truncation)]
5556
mod tests {
5657
use super::*;
5758

vortex-compute/src/filter/slice/neon/neon_u16.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33

44
#![allow(unsafe_op_in_unsafe_fn)]
55

6-
use crate::filter::slice::neon::neon_u8::SHUFFLE_MASKS;
76
use std::arch::aarch64::*;
87
use std::ptr;
8+
99
use vortex_buffer::BitView;
1010

11+
use crate::filter::slice::neon::neon_u8::SHUFFLE_MASKS;
12+
1113
/// For u16 types, we perform a similar strategy to u8 with a few key differences.
1214
///
1315
/// When it comes to shuffling u16 elements, we load u16x8 values into a uint8x8x2 vector. This
@@ -57,10 +59,10 @@ pub(super) unsafe fn filter_neon_u16<const NB: usize>(data: *mut u16, mask: &Bit
5759
let count = byte.count_ones() as usize;
5860
let shuffle_vec = vld1_u8(SHUFFLE_MASKS[byte as usize].as_ptr());
5961
// Shuffle both lower and higher byte vectors separately.
60-
let compressed = uint8x8x2_t {
61-
0: vtbl1_u8(values.0, shuffle_vec),
62-
1: vtbl1_u8(values.1, shuffle_vec),
63-
};
62+
let compressed = uint8x8x2_t(
63+
vtbl1_u8(values.0, shuffle_vec),
64+
vtbl1_u8(values.1, shuffle_vec),
65+
);
6466

6567
// Store all compressed values, and only increment write_ptr by count.
6668
vst2_u8(write_ptr.cast(), compressed);

vortex-compute/src/filter/slice/neon/neon_u32.rs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33

44
#![allow(unsafe_op_in_unsafe_fn)]
55

6-
use crate::filter::slice::neon::neon_u8::SHUFFLE_MASKS;
76
use std::arch::aarch64::*;
87
use std::ptr;
8+
99
use vortex_buffer::BitView;
1010

11+
use crate::filter::slice::neon::neon_u8::SHUFFLE_MASKS;
12+
1113
/// For u32 values we can only look at 4 values at a time (128 bits).
1214
/// Therefore, we have a very manageable 16 possible bitmask combinations (0..15) and therefore
1315
/// avoid the need for large lookup tables.
@@ -43,12 +45,12 @@ pub(super) unsafe fn filter_neon_u32<const NB: usize>(data: *mut u32, mask: &Bit
4345
let shuffle_vec = vld1_u8(SHUFFLE_MASKS[byte as usize].as_ptr());
4446

4547
// Shuffle all four byte vectors separately.
46-
let compressed = uint8x8x4_t {
47-
0: vtbl1_u8(values.0, shuffle_vec),
48-
1: vtbl1_u8(values.1, shuffle_vec),
49-
2: vtbl1_u8(values.2, shuffle_vec),
50-
3: vtbl1_u8(values.3, shuffle_vec),
51-
};
48+
let compressed = uint8x8x4_t(
49+
vtbl1_u8(values.0, shuffle_vec),
50+
vtbl1_u8(values.1, shuffle_vec),
51+
vtbl1_u8(values.2, shuffle_vec),
52+
vtbl1_u8(values.3, shuffle_vec),
53+
);
5254

5355
// Store all compressed values, and only increment write_ptr by count.
5456
vst4_u8(write_ptr.cast(), compressed);

vortex-compute/src/filter/slice/neon/neon_u8.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
use std::arch::aarch64::*;
77
use std::ptr;
8+
89
use vortex_buffer::BitView;
910

1011
/// For u8 types, we use NEON's tbl lookup instruction to perform a shuffle based on a pre-computed

vortex-compute/src/filter/slice/scalar.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ pub(super) fn filter_scalar<const NB: usize, T: Copy>(slice: &mut [T], mask: &Bi
4545
let bit_pos = word.trailing_zeros();
4646
word &= word - 1; // Clear the bit at `bit_pos`
4747
let span = word.trailing_ones();
48-
word = word >> span;
48+
word >>= span;
4949
unsafe {
5050
ptr::copy(read_ptr.add(bit_pos as usize), write_ptr, span as usize);
5151
write_ptr = write_ptr.add(span as usize);

0 commit comments

Comments
 (0)