diff --git a/src/control/group/sse2.rs b/src/control/group/sse2.rs index 0d4b10822..2107a07ca 100644 --- a/src/control/group/sse2.rs +++ b/src/control/group/sse2.rs @@ -73,7 +73,7 @@ impl Group { /// Returns a `BitMask` indicating all tags in the group which have /// the given value. #[inline] - pub(crate) fn match_tag(self, tag: Tag) -> BitMask { + pub(crate) fn match_tag(self, tag: i32) -> BitMask { #[allow( clippy::cast_possible_wrap, // tag.0: Tag as i8 // tag: i32 as u16 @@ -83,7 +83,7 @@ impl Group { clippy::cast_possible_truncation )] unsafe { - let cmp = x86::_mm_cmpeq_epi8(self.0, x86::_mm_set1_epi8(tag.0 as i8)); + let cmp = x86::_mm_cmpeq_epi8(self.0, x86::_mm_set1_epi32(tag)); BitMask(x86::_mm_movemask_epi8(cmp) as u16) } } @@ -92,7 +92,7 @@ impl Group { /// `EMPTY`. #[inline] pub(crate) fn match_empty(self) -> BitMask { - self.match_tag(Tag::EMPTY) + self.match_tag(Tag::EMPTY32) } /// Returns a `BitMask` indicating all tags in the group which are @@ -107,15 +107,25 @@ impl Group { clippy::cast_possible_truncation )] unsafe { - // A tag is EMPTY or DELETED iff the high bit is set - BitMask(x86::_mm_movemask_epi8(self.0) as u16) + let cmp = x86::_mm_cmpgt_epi8(self.0, x86::_mm_set1_epi32(Tag::MAX_TAG32)); + BitMask(x86::_mm_movemask_epi8(cmp) as u16) } } /// Returns a `BitMask` indicating all tags in the group which are full. #[inline] pub(crate) fn match_full(&self) -> BitMask { - self.match_empty_or_deleted().invert() + #[allow( + // tag: i32 as u16 + // note: _mm_movemask_epi8 returns a 16-bit mask in a i32, the + // upper 16-bits of the i32 are zeroed: + clippy::cast_sign_loss, + clippy::cast_possible_truncation + )] + unsafe { + let cmp = x86::_mm_cmplt_epi8(self.0, x86::_mm_set1_epi32(Tag::DELETED32)); + BitMask(x86::_mm_movemask_epi8(cmp) as u16) + } } /// Performs the following transformation on all tags in the group: @@ -124,22 +134,16 @@ impl Group { /// - `FULL => DELETED` #[inline] pub(crate) fn convert_special_to_empty_and_full_to_deleted(self) -> Self { - // Map high_bit = 1 (EMPTY or DELETED) to 1111_1111 - // and high_bit = 0 (FULL) to 1000_0000 - // - // Here's this logic expanded to concrete values: - // let special = 0 > tag = 1111_1111 (true) or 0000_0000 (false) - // 1111_1111 | 1000_0000 = 1111_1111 - // 0000_0000 | 1000_0000 = 1000_0000 - #[allow( - clippy::cast_possible_wrap, // tag: Tag::DELETED.0 as i8 - )] + debug_assert_eq!(127, Tag::EMPTY.0); + debug_assert_eq!(126, Tag::DELETED.0); + + #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)] unsafe { - let zero = x86::_mm_setzero_si128(); - let special = x86::_mm_cmpgt_epi8(zero, self.0); + let is_special = x86::_mm_cmpgt_epi8(self.0, x86::_mm_set1_epi32(Tag::MAX_TAG32)); + Group(x86::_mm_or_si128( - special, - x86::_mm_set1_epi8(Tag::DELETED.0 as i8), + x86::_mm_and_si128(is_special, x86::_mm_set1_epi32(Tag::EMPTY32)), // EMPTY if special + x86::_mm_set1_epi32(Tag::DELETED32), // else DELETED )) } } diff --git a/src/control/tag.rs b/src/control/tag.rs index 817dd55cd..6179bba7f 100644 --- a/src/control/tag.rs +++ b/src/control/tag.rs @@ -3,50 +3,79 @@ use core::{fmt, mem}; /// Single tag in a control group. #[derive(Copy, Clone, PartialEq, Eq)] #[repr(transparent)] -pub(crate) struct Tag(pub(super) u8); +pub(crate) struct Tag(pub(super) i8); impl Tag { /// Control tag value for an empty bucket. - pub(crate) const EMPTY: Tag = Tag(0b1111_1111); + pub(crate) const EMPTY: Tag = Tag(0b0111_1111); // 127 + pub(crate) const EMPTY32: i32 = 0x7F7F7F7F; /// Control tag value for a deleted bucket. - pub(crate) const DELETED: Tag = Tag(0b1000_0000); + pub(crate) const DELETED: Tag = Tag(0b0111_1110); // 126 + pub(crate) const DELETED32: i32 = 0x7E7E7E7E; + + pub(crate) const MAX_TAG32: i32 = 0x7D7D7D7D; // 4*125 /// Checks whether a control tag represents a full bucket (top bit is clear). #[inline] pub(crate) const fn is_full(self) -> bool { - self.0 & 0x80 == 0 + self.0 < Tag::DELETED.0 } /// Checks whether a control tag represents a special value (top bit is set). #[inline] pub(crate) const fn is_special(self) -> bool { - self.0 & 0x80 != 0 + self.0 >= Tag::DELETED.0 } /// Checks whether a special control value is EMPTY (just check 1 bit). #[inline] pub(crate) const fn special_is_empty(self) -> bool { debug_assert!(self.is_special()); - self.0 & 0x01 != 0 + self.0 == Tag::EMPTY.0 } /// Creates a control tag representing a full bucket with the given hash. - #[inline] + #[inline(always)] #[allow(clippy::cast_possible_truncation)] pub(crate) const fn full(hash: u64) -> Tag { - // Constant for function that grabs the top 7 bits of the hash. + Tag(Self::full32(hash) as i8) + } + + #[inline] + #[allow(clippy::cast_possible_truncation)] + pub(crate) const fn full32(hash: u64) -> i32 { + // Constant for function that grabs the top 8 bits * 4 of the hash. const MIN_HASH_LEN: usize = if mem::size_of::() < mem::size_of::() { mem::size_of::() } else { mem::size_of::() }; - // Grab the top 7 bits of the hash. While the hash is normally a full 64-bit + // Constant array of 8 bits duplicated as 32 bits + // with re-mapping of special values. + const fn compute_control() -> [i32; 256] { + let mut result = [0; 256]; + + let mut i: u32 = 0; + while i < 256 { + result[i as usize] = (i | (i << 8) | (i << 16) | (i << 24)) as i32; + i += 1; + } + + // Avoid overlap with special values. + result[Tag::EMPTY.0 as usize] = 0x29292929; + result[Tag::DELETED.0 as usize] = 0x53535353; + + result + } + const CONTROL: [i32; 256] = compute_control(); + + // Grab the top 8 bits of the hash. While the hash is normally a full 64-bit // value, some hash functions (such as FxHash) produce a usize result // instead, which means that the top 32 bits are 0 on 32-bit platforms. // So we use MIN_HASH_LEN constant to handle this. - let top7 = hash >> (MIN_HASH_LEN * 8 - 7); - Tag((top7 & 0x7f) as u8) // truncation + let top8 = (hash >> (MIN_HASH_LEN * 8 - 8)) as usize; + CONTROL[top8] // truncation } } impl fmt::Debug for Tag { @@ -58,7 +87,7 @@ impl fmt::Debug for Tag { f.pad("DELETED") } } else { - f.debug_tuple("full").field(&(self.0 & 0x7F)).finish() + f.debug_tuple("full").field(&self.0).finish() } } } @@ -78,6 +107,6 @@ impl TagSliceExt for [Tag] { #[inline] fn fill_tag(&mut self, tag: Tag) { // SAFETY: We have access to the entire slice, so, we can write to the entire slice. - unsafe { self.as_mut_ptr().write_bytes(tag.0, self.len()) } + unsafe { self.as_mut_ptr().write_bytes(tag.0 as u8, self.len()) } } } diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 6a8d37d82..0b52e40fd 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -1326,7 +1326,7 @@ impl RawTable { /// Returns an iterator over occupied buckets that could match a given hash. /// - /// `RawTable` only stores 7 bits of the hash value, so this iterator may + /// `RawTable` only stores 8 bits of the hash value, so this iterator may /// return items that have a hash value different than the one provided. You /// should always validate the returned values before using them. /// @@ -1683,7 +1683,7 @@ impl RawTableInner { ) -> Result { let mut insert_slot = None; - let tag_hash = Tag::full(hash); + let tag_hash = Tag::full32(hash); let mut probe_seq = self.probe_seq(hash); loop { @@ -1893,7 +1893,7 @@ impl RawTableInner { /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[inline(always)] unsafe fn find_inner(&self, hash: u64, eq: &mut dyn FnMut(usize) -> bool) -> Option { - let tag_hash = Tag::full(hash); + let tag_hash = Tag::full32(hash); let mut probe_seq = self.probe_seq(hash); loop { @@ -4006,7 +4006,7 @@ impl FusedIterator for RawDrain<'_, T, A> {} /// Iterator over occupied buckets that could match a given hash. /// -/// `RawTable` only stores 7 bits of the hash value, so this iterator may return +/// `RawTable` only stores 8 bits of the hash value, so this iterator may return /// items that have a hash value different than the one provided. You should /// always validate the returned values before using them. /// @@ -4033,8 +4033,8 @@ struct RawIterHashInner { bucket_mask: usize, ctrl: NonNull, - // The top 7 bits of the hash. - tag_hash: Tag, + // The top 8 bits of the hash. + tag_hash: i32, // The sequence of groups to probe in the search. probe_seq: ProbeSeq, @@ -4079,7 +4079,7 @@ impl Default for RawIterHash { impl RawIterHashInner { #[cfg_attr(feature = "inline-more", inline)] unsafe fn new(table: &RawTableInner, hash: u64) -> Self { - let tag_hash = Tag::full(hash); + let tag_hash = Tag::full32(hash); let probe_seq = table.probe_seq(hash); let group = Group::load(table.ctrl(probe_seq.pos)); let bitmask = group.match_tag(tag_hash).into_iter();