Skip to content

Commit 9f20bd0

Browse files
committed
Replace intrinsics::cttz_nonzero with NonZero::trailing_zeros
1 parent d677fd4 commit 9f20bd0

File tree

5 files changed

+42
-37
lines changed

5 files changed

+42
-37
lines changed

src/raw/bitmask.rs

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
use super::imp::{BitMaskWord, BITMASK_ITER_MASK, BITMASK_MASK, BITMASK_STRIDE};
2-
#[cfg(feature = "nightly")]
3-
use core::intrinsics;
1+
use super::imp::{
2+
BitMaskWord, NonZeroBitMaskWord, BITMASK_ITER_MASK, BITMASK_MASK, BITMASK_STRIDE,
3+
};
44

55
/// A bit mask which contains the result of a `Match` operation on a `Group` and
66
/// allows iterating through them.
@@ -47,26 +47,13 @@ impl BitMask {
4747
/// Returns the first set bit in the `BitMask`, if there is one.
4848
#[inline]
4949
pub(crate) fn lowest_set_bit(self) -> Option<usize> {
50-
if self.0 == 0 {
51-
None
50+
if let Some(nonzero) = NonZeroBitMaskWord::new(self.0) {
51+
Some(Self::nonzero_trailing_zeros(nonzero))
5252
} else {
53-
Some(unsafe { self.lowest_set_bit_nonzero() })
53+
None
5454
}
5555
}
5656

57-
/// Returns the first set bit in the `BitMask`, if there is one. The
58-
/// bitmask must not be empty.
59-
#[inline]
60-
#[cfg(feature = "nightly")]
61-
pub(crate) unsafe fn lowest_set_bit_nonzero(self) -> usize {
62-
intrinsics::cttz_nonzero(self.0) as usize / BITMASK_STRIDE
63-
}
64-
#[inline]
65-
#[cfg(not(feature = "nightly"))]
66-
pub(crate) unsafe fn lowest_set_bit_nonzero(self) -> usize {
67-
self.trailing_zeros()
68-
}
69-
7057
/// Returns the number of trailing zeroes in the `BitMask`.
7158
#[inline]
7259
pub(crate) fn trailing_zeros(self) -> usize {
@@ -82,6 +69,18 @@ impl BitMask {
8269
}
8370
}
8471

72+
/// Same as above but takes a `NonZeroBitMaskWord`.
73+
#[inline]
74+
fn nonzero_trailing_zeros(nonzero: NonZeroBitMaskWord) -> usize {
75+
if cfg!(target_arch = "arm") && BITMASK_STRIDE % 8 == 0 {
76+
// SAFETY: A byte-swapped non-zero value is still non-zero.
77+
let swapped = unsafe { NonZeroBitMaskWord::new_unchecked(nonzero.get().swap_bytes()) };
78+
swapped.leading_zeros() as usize / BITMASK_STRIDE
79+
} else {
80+
nonzero.trailing_zeros() as usize / BITMASK_STRIDE
81+
}
82+
}
83+
8584
/// Returns the number of leading zeroes in the `BitMask`.
8685
#[inline]
8786
pub(crate) fn leading_zeros(self) -> usize {

src/raw/generic.rs

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,24 @@ use core::{mem, ptr};
55
// Use the native word size as the group size. Using a 64-bit group size on
66
// a 32-bit architecture will just end up being more expensive because
77
// shifts and multiplies will need to be emulated.
8-
#[cfg(any(
9-
target_pointer_width = "64",
10-
target_arch = "aarch64",
11-
target_arch = "x86_64",
12-
target_arch = "wasm32",
13-
))]
14-
type GroupWord = u64;
15-
#[cfg(all(
16-
any(target_pointer_width = "32", target_pointer_width = "16"),
17-
not(target_arch = "aarch64"),
18-
not(target_arch = "x86_64"),
19-
not(target_arch = "wasm32"),
20-
))]
21-
type GroupWord = u32;
8+
9+
cfg_if! {
10+
if #[cfg(any(
11+
target_pointer_width = "64",
12+
target_arch = "aarch64",
13+
target_arch = "x86_64",
14+
target_arch = "wasm32",
15+
))] {
16+
type GroupWord = u64;
17+
type NonZeroGroupWord = core::num::NonZeroU64;
18+
} else {
19+
type GroupWord = u32;
20+
type NonZeroGroupWord = core::num::NonZeroU32;
21+
}
22+
}
2223

2324
pub(crate) type BitMaskWord = GroupWord;
25+
pub(crate) type NonZeroBitMaskWord = NonZeroGroupWord;
2426
pub(crate) const BITMASK_STRIDE: usize = 8;
2527
// We only care about the highest bit of each byte for the mask.
2628
#[allow(clippy::cast_possible_truncation, clippy::unnecessary_cast)]

src/raw/mod.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1648,14 +1648,14 @@ impl<A: Allocator + Clone> RawTableInner<A> {
16481648
// we will never end up in the given branch, since
16491649
// `(probe_seq.pos + bit) & self.bucket_mask` in `find_insert_slot_in_group` cannot
16501650
// return a full bucket index. For tables smaller than the group width, calling the
1651-
// `lowest_set_bit_nonzero` function (when `nightly` feature enabled) is also
1651+
// `unwrap_unchecked` function is also
16521652
// safe, as the trailing control bytes outside the range of the table are filled
16531653
// with EMPTY bytes, so this second scan either finds an empty slot (due to the
1654-
// load factor) or hits the trailing control bytes (containing EMPTY). See
1655-
// `intrinsics::cttz_nonzero` for more information.
1654+
// load factor) or hits the trailing control bytes (containing EMPTY).
16561655
index = Group::load_aligned(self.ctrl(0))
16571656
.match_empty_or_deleted()
1658-
.lowest_set_bit_nonzero();
1657+
.lowest_set_bit()
1658+
.unwrap_unchecked();
16591659
}
16601660
InsertSlot { index }
16611661
}

src/raw/neon.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@ use super::bitmask::BitMask;
22
use super::EMPTY;
33
use core::arch::aarch64 as neon;
44
use core::mem;
5+
use core::num::NonZeroU64;
56

67
pub(crate) type BitMaskWord = u64;
8+
pub(crate) type NonZeroBitMaskWord = NonZeroU64;
79
pub(crate) const BITMASK_STRIDE: usize = 8;
810
pub(crate) const BITMASK_MASK: BitMaskWord = !0;
911
pub(crate) const BITMASK_ITER_MASK: BitMaskWord = 0x8080_8080_8080_8080;

src/raw/sse2.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
use super::bitmask::BitMask;
22
use super::EMPTY;
33
use core::mem;
4+
use core::num::NonZeroU16;
45

56
#[cfg(target_arch = "x86")]
67
use core::arch::x86;
78
#[cfg(target_arch = "x86_64")]
89
use core::arch::x86_64 as x86;
910

1011
pub(crate) type BitMaskWord = u16;
12+
pub(crate) type NonZeroBitMaskWord = NonZeroU16;
1113
pub(crate) const BITMASK_STRIDE: usize = 1;
1214
pub(crate) const BITMASK_MASK: BitMaskWord = 0xffff;
1315
pub(crate) const BITMASK_ITER_MASK: BitMaskWord = !0;

0 commit comments

Comments
 (0)