Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 24 additions & 20 deletions src/control/group/sse2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ impl Group {
/// Returns a `BitMask` indicating all tags in the group which have
/// the given value.
#[inline]
pub(crate) fn match_tag(self, tag: Tag) -> BitMask {
pub(crate) fn match_tag(self, tag: i32) -> BitMask {
#[allow(
clippy::cast_possible_wrap, // tag.0: Tag as i8
// tag: i32 as u16
Expand All @@ -83,7 +83,7 @@ impl Group {
clippy::cast_possible_truncation
)]
unsafe {
let cmp = x86::_mm_cmpeq_epi8(self.0, x86::_mm_set1_epi8(tag.0 as i8));
let cmp = x86::_mm_cmpeq_epi8(self.0, x86::_mm_set1_epi32(tag));
BitMask(x86::_mm_movemask_epi8(cmp) as u16)
}
}
Expand All @@ -92,7 +92,7 @@ impl Group {
/// `EMPTY`.
#[inline]
pub(crate) fn match_empty(self) -> BitMask {
self.match_tag(Tag::EMPTY)
self.match_tag(Tag::EMPTY32)
}

/// Returns a `BitMask` indicating all tags in the group which are
Expand All @@ -107,15 +107,25 @@ impl Group {
clippy::cast_possible_truncation
)]
unsafe {
// A tag is EMPTY or DELETED iff the high bit is set
BitMask(x86::_mm_movemask_epi8(self.0) as u16)
let cmp = x86::_mm_cmpgt_epi8(self.0, x86::_mm_set1_epi32(Tag::MAX_TAG32));
BitMask(x86::_mm_movemask_epi8(cmp) as u16)
}
}

/// Returns a `BitMask` indicating all tags in the group which are full.
#[inline]
pub(crate) fn match_full(&self) -> BitMask {
self.match_empty_or_deleted().invert()
#[allow(
// tag: i32 as u16
// note: _mm_movemask_epi8 returns a 16-bit mask in a i32, the
// upper 16-bits of the i32 are zeroed:
clippy::cast_sign_loss,
clippy::cast_possible_truncation
)]
unsafe {
let cmp = x86::_mm_cmplt_epi8(self.0, x86::_mm_set1_epi32(Tag::DELETED32));
BitMask(x86::_mm_movemask_epi8(cmp) as u16)
}
}

/// Performs the following transformation on all tags in the group:
Expand All @@ -124,22 +134,16 @@ impl Group {
/// - `FULL => DELETED`
#[inline]
pub(crate) fn convert_special_to_empty_and_full_to_deleted(self) -> Self {
// Map high_bit = 1 (EMPTY or DELETED) to 1111_1111
// and high_bit = 0 (FULL) to 1000_0000
//
// Here's this logic expanded to concrete values:
// let special = 0 > tag = 1111_1111 (true) or 0000_0000 (false)
// 1111_1111 | 1000_0000 = 1111_1111
// 0000_0000 | 1000_0000 = 1000_0000
#[allow(
clippy::cast_possible_wrap, // tag: Tag::DELETED.0 as i8
)]
debug_assert_eq!(127, Tag::EMPTY.0);
debug_assert_eq!(126, Tag::DELETED.0);

#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
unsafe {
let zero = x86::_mm_setzero_si128();
let special = x86::_mm_cmpgt_epi8(zero, self.0);
let is_special = x86::_mm_cmpgt_epi8(self.0, x86::_mm_set1_epi32(Tag::MAX_TAG32));

Group(x86::_mm_or_si128(
special,
x86::_mm_set1_epi8(Tag::DELETED.0 as i8),
x86::_mm_and_si128(is_special, x86::_mm_set1_epi32(Tag::EMPTY32)), // EMPTY if special
x86::_mm_set1_epi32(Tag::DELETED32), // else DELETED
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can simplify this: the compare gives you 0 and -1, to which you can just add 127 to get EMPTY and DELETED.

))
}
}
Expand Down
55 changes: 42 additions & 13 deletions src/control/tag.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,50 +3,79 @@ use core::{fmt, mem};
/// Single tag in a control group.
#[derive(Copy, Clone, PartialEq, Eq)]
#[repr(transparent)]
pub(crate) struct Tag(pub(super) u8);
pub(crate) struct Tag(pub(super) i8);
impl Tag {
/// Control tag value for an empty bucket.
pub(crate) const EMPTY: Tag = Tag(0b1111_1111);
pub(crate) const EMPTY: Tag = Tag(0b0111_1111); // 127
pub(crate) const EMPTY32: i32 = 0x7F7F7F7F;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There should probably be a separate ExpandedTag type for this rather than a plain i32.


/// Control tag value for a deleted bucket.
pub(crate) const DELETED: Tag = Tag(0b1000_0000);
pub(crate) const DELETED: Tag = Tag(0b0111_1110); // 126
pub(crate) const DELETED32: i32 = 0x7E7E7E7E;

pub(crate) const MAX_TAG32: i32 = 0x7D7D7D7D; // 4*125

/// Checks whether a control tag represents a full bucket (top bit is clear).
#[inline]
pub(crate) const fn is_full(self) -> bool {
self.0 & 0x80 == 0
self.0 < Tag::DELETED.0
}

/// Checks whether a control tag represents a special value (top bit is set).
#[inline]
pub(crate) const fn is_special(self) -> bool {
self.0 & 0x80 != 0
self.0 >= Tag::DELETED.0
}

/// Checks whether a special control value is EMPTY (just check 1 bit).
#[inline]
pub(crate) const fn special_is_empty(self) -> bool {
debug_assert!(self.is_special());
self.0 & 0x01 != 0
self.0 == Tag::EMPTY.0
}

/// Creates a control tag representing a full bucket with the given hash.
#[inline]
#[inline(always)]
#[allow(clippy::cast_possible_truncation)]
pub(crate) const fn full(hash: u64) -> Tag {
// Constant for function that grabs the top 7 bits of the hash.
Tag(Self::full32(hash) as i8)
}

#[inline]
#[allow(clippy::cast_possible_truncation)]
pub(crate) const fn full32(hash: u64) -> i32 {
// Constant for function that grabs the top 8 bits * 4 of the hash.
const MIN_HASH_LEN: usize = if mem::size_of::<usize>() < mem::size_of::<u64>() {
mem::size_of::<usize>()
} else {
mem::size_of::<u64>()
};

// Grab the top 7 bits of the hash. While the hash is normally a full 64-bit
// Constant array of 8 bits duplicated as 32 bits
// with re-mapping of special values.
const fn compute_control() -> [i32; 256] {
let mut result = [0; 256];

let mut i: u32 = 0;
while i < 256 {
result[i as usize] = (i | (i << 8) | (i << 16) | (i << 24)) as i32;
i += 1;
}

// Avoid overlap with special values.
result[Tag::EMPTY.0 as usize] = 0x29292929;
result[Tag::DELETED.0 as usize] = 0x53535353;

result
}
const CONTROL: [i32; 256] = compute_control();

// Grab the top 8 bits of the hash. While the hash is normally a full 64-bit
// value, some hash functions (such as FxHash) produce a usize result
// instead, which means that the top 32 bits are 0 on 32-bit platforms.
// So we use MIN_HASH_LEN constant to handle this.
let top7 = hash >> (MIN_HASH_LEN * 8 - 7);
Tag((top7 & 0x7f) as u8) // truncation
let top8 = (hash >> (MIN_HASH_LEN * 8 - 8)) as usize;
CONTROL[top8] // truncation
}
}
impl fmt::Debug for Tag {
Expand All @@ -58,7 +87,7 @@ impl fmt::Debug for Tag {
f.pad("DELETED")
}
} else {
f.debug_tuple("full").field(&(self.0 & 0x7F)).finish()
f.debug_tuple("full").field(&self.0).finish()
}
}
}
Expand All @@ -78,6 +107,6 @@ impl TagSliceExt for [Tag] {
#[inline]
fn fill_tag(&mut self, tag: Tag) {
// SAFETY: We have access to the entire slice, so, we can write to the entire slice.
unsafe { self.as_mut_ptr().write_bytes(tag.0, self.len()) }
unsafe { self.as_mut_ptr().write_bytes(tag.0 as u8, self.len()) }
}
}
14 changes: 7 additions & 7 deletions src/raw/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1326,7 +1326,7 @@ impl<T, A: Allocator> RawTable<T, A> {

/// Returns an iterator over occupied buckets that could match a given hash.
///
/// `RawTable` only stores 7 bits of the hash value, so this iterator may
/// `RawTable` only stores 8 bits of the hash value, so this iterator may
/// return items that have a hash value different than the one provided. You
/// should always validate the returned values before using them.
///
Expand Down Expand Up @@ -1683,7 +1683,7 @@ impl RawTableInner {
) -> Result<usize, InsertSlot> {
let mut insert_slot = None;

let tag_hash = Tag::full(hash);
let tag_hash = Tag::full32(hash);
let mut probe_seq = self.probe_seq(hash);

loop {
Expand Down Expand Up @@ -1893,7 +1893,7 @@ impl RawTableInner {
/// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
#[inline(always)]
unsafe fn find_inner(&self, hash: u64, eq: &mut dyn FnMut(usize) -> bool) -> Option<usize> {
let tag_hash = Tag::full(hash);
let tag_hash = Tag::full32(hash);
let mut probe_seq = self.probe_seq(hash);

loop {
Expand Down Expand Up @@ -4006,7 +4006,7 @@ impl<T, A: Allocator> FusedIterator for RawDrain<'_, T, A> {}

/// Iterator over occupied buckets that could match a given hash.
///
/// `RawTable` only stores 7 bits of the hash value, so this iterator may return
/// `RawTable` only stores 8 bits of the hash value, so this iterator may return
/// items that have a hash value different than the one provided. You should
/// always validate the returned values before using them.
///
Expand All @@ -4033,8 +4033,8 @@ struct RawIterHashInner {
bucket_mask: usize,
ctrl: NonNull<u8>,

// The top 7 bits of the hash.
tag_hash: Tag,
// The top 8 bits of the hash.
tag_hash: i32,

// The sequence of groups to probe in the search.
probe_seq: ProbeSeq,
Expand Down Expand Up @@ -4079,7 +4079,7 @@ impl<T> Default for RawIterHash<T> {
impl RawIterHashInner {
#[cfg_attr(feature = "inline-more", inline)]
unsafe fn new(table: &RawTableInner, hash: u64) -> Self {
let tag_hash = Tag::full(hash);
let tag_hash = Tag::full32(hash);
let probe_seq = table.probe_seq(hash);
let group = Group::load(table.ctrl(probe_seq.pos));
let bitmask = group.match_tag(tag_hash).into_iter();
Expand Down
Loading