Skip to content

Commit 50c4d35

Browse files
committed
Doc erase and prepare_rehash_in_place
1 parent 7685fcd commit 50c4d35

File tree

1 file changed

+135
-19
lines changed

1 file changed

+135
-19
lines changed

src/raw/mod.rs

Lines changed: 135 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -342,9 +342,9 @@ impl<T> Bucket<T> {
342342
/// [`<*mut T>::sub`]: https://doc.rust-lang.org/core/primitive.pointer.html#method.sub-1
343343
/// [`NonNull::new_unchecked`]: https://doc.rust-lang.org/stable/std/ptr/struct.NonNull.html#method.new_unchecked
344344
/// [`RawTable::data_end`]: crate::raw::RawTable::data_end
345-
/// [`RawTableInner::data_end<T>`]: crate::raw::RawTableInner::data_end<T>
345+
/// [`RawTableInner::data_end<T>`]: RawTableInner::data_end<T>
346346
/// [`RawTable::buckets`]: crate::raw::RawTable::buckets
347-
/// [`RawTableInner::buckets`]: crate::raw::RawTableInner::buckets
347+
/// [`RawTableInner::buckets`]: RawTableInner::buckets
348348
#[inline]
349349
unsafe fn from_base_index(base: NonNull<T>, index: usize) -> Self {
350350
// If mem::size_of::<T>() != 0 then return a pointer to an `element` in
@@ -414,9 +414,9 @@ impl<T> Bucket<T> {
414414
/// [`Bucket`]: crate::raw::Bucket
415415
/// [`from_base_index`]: crate::raw::Bucket::from_base_index
416416
/// [`RawTable::data_end`]: crate::raw::RawTable::data_end
417-
/// [`RawTableInner::data_end<T>`]: crate::raw::RawTableInner::data_end<T>
417+
/// [`RawTableInner::data_end<T>`]: RawTableInner::data_end<T>
418418
/// [`RawTable`]: crate::raw::RawTable
419-
/// [`RawTableInner`]: crate::raw::RawTableInner
419+
/// [`RawTableInner`]: RawTableInner
420420
/// [`<*const T>::offset_from`]: https://doc.rust-lang.org/nightly/core/primitive.pointer.html#method.offset_from
421421
#[inline]
422422
unsafe fn to_base_index(&self, base: NonNull<T>) -> usize {
@@ -549,7 +549,7 @@ impl<T> Bucket<T> {
549549
/// [`<*mut T>::sub`]: https://doc.rust-lang.org/core/primitive.pointer.html#method.sub-1
550550
/// [`NonNull::new_unchecked`]: https://doc.rust-lang.org/stable/std/ptr/struct.NonNull.html#method.new_unchecked
551551
/// [`RawTable::buckets`]: crate::raw::RawTable::buckets
552-
/// [`RawTableInner::buckets`]: crate::raw::RawTableInner::buckets
552+
/// [`RawTableInner::buckets`]: RawTableInner::buckets
553553
#[inline]
554554
unsafe fn next_n(&self, offset: usize) -> Self {
555555
let ptr = if Self::IS_ZERO_SIZED_TYPE {
@@ -1630,7 +1630,8 @@ impl<A: Allocator + Clone> RawTableInner<A> {
16301630
// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
16311631
let result = (probe_seq.pos + bit) & self.bucket_mask;
16321632

1633-
// In tables smaller than the group width, trailing control
1633+
// In tables smaller than the group width
1634+
// (self.buckets() < Group::WIDTH), trailing control
16341635
// bytes outside the range of the table are filled with
16351636
// EMPTY entries. These will unfortunately trigger a
16361637
// match, but once masked may point to a full bucket that
@@ -1651,8 +1652,9 @@ impl<A: Allocator + Clone> RawTableInner<A> {
16511652
// and properly aligned, because the table is already allocated
16521653
// (see `TableLayout::calculate_layout_for` and `ptr::read`);
16531654
//
1654-
// * For tables larger than the group width, we will never end up in the given
1655-
// branch, since `(probe_seq.pos + bit) & self.bucket_mask` cannot return a
1655+
// * For tables larger than the group width (self.buckets() >= Group::WIDTH),
1656+
// we will never end up in the given branch, since
1657+
// `(probe_seq.pos + bit) & self.bucket_mask` cannot return a
16561658
// full bucket index. For tables smaller than the group width, calling the
16571659
// `lowest_set_bit_nonzero` function (when `nightly` feature enabled) is also
16581660
// safe, as the trailing control bytes outside the range of the table are filled
@@ -1719,12 +1721,49 @@ impl<A: Allocator + Clone> RawTableInner<A> {
17191721
}
17201722
}
17211723

1724+
/// Prepares for rehashing data in place (that is, without allocating new memory).
1725+
/// Converts all full index `control bytes` to `DELETED` and all `DELETED` control
1726+
/// bytes to `EMPTY`, i.e. performs the following conversion:
1727+
///
1728+
/// - `EMPTY` control bytes -> `EMPTY`;
1729+
/// - `DELETED` control bytes -> `EMPTY`;
1730+
/// - `FULL` control bytes -> `DELETED`.
1731+
///
1732+
/// This function does not make any changes to the `data` parts of the table,
1733+
/// or any changes to the the `items` or `growth_left` field of the table.
1734+
///
1735+
/// # Safety
1736+
///
1737+
/// You must observe the following safety rules when calling this function:
1738+
///
1739+
/// * The [`RawTableInner`] has already been allocated;
1740+
///
1741+
/// * The caller of this function must convert the `DELETED` bytes back to `FULL`
1742+
/// bytes when re-inserting them into their ideal position (which was impossible
1743+
/// to do during the first insert due to tombstones). If the caller does not do
1744+
/// this, then calling this function may result in a memory leak.
1745+
///
1746+
/// Calling this function on a table that has not been allocated results in
1747+
/// [`undefined behavior`].
1748+
///
1749+
/// See also [`Bucket::as_ptr`] method, for more information about of properly removing
1750+
/// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`].
1751+
///
1752+
/// [`Bucket::as_ptr`]: Bucket::as_ptr
1753+
/// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
17221754
#[allow(clippy::mut_mut)]
17231755
#[inline]
17241756
unsafe fn prepare_rehash_in_place(&mut self) {
1725-
// Bulk convert all full control bytes to DELETED, and all DELETED
1726-
// control bytes to EMPTY. This effectively frees up all buckets
1727-
// containing a DELETED entry.
1757+
// Bulk convert all full control bytes to DELETED, and all DELETED control bytes to EMPTY.
1758+
// This effectively frees up all buckets containing a DELETED entry.
1759+
//
1760+
// SAFETY:
1761+
// 1. `i` is guaranteed to be within bounds since we are iterating from zero to `buckets - 1`;
1762+
// 2. Even if `i` will be `i == self.bucket_mask`, it is safe to call `Group::load_aligned`
1763+
// due to the extended control bytes range, which is `self.bucket_mask + 1 + Group::WIDTH`;
1764+
// 3. The caller of this function guarantees that [`RawTableInner`] has already been allocated;
1765+
// 4. We can use `Group::load_aligned` and `Group::store_aligned` here since we start from 0
1766+
// and go to the end with a step equal to `Group::WIDTH` (see TableLayout::calculate_layout_for).
17281767
for i in (0..self.buckets()).step_by(Group::WIDTH) {
17291768
let group = Group::load_aligned(self.ctrl(i));
17301769
let group = group.convert_special_to_empty_and_full_to_deleted();
@@ -1733,10 +1772,19 @@ impl<A: Allocator + Clone> RawTableInner<A> {
17331772

17341773
// Fix up the trailing control bytes. See the comments in set_ctrl
17351774
// for the handling of tables smaller than the group width.
1736-
if self.buckets() < Group::WIDTH {
1775+
//
1776+
// SAFETY: The caller of this function guarantees that [`RawTableInner`]
1777+
// has already been allocated
1778+
if unlikely(self.buckets() < Group::WIDTH) {
1779+
// SAFETY: We have `self.bucket_mask + 1 + Group::WIDTH` number of control bytes,
1780+
// so copying `self.buckets() == self.bucket_mask + 1` bytes with offset equal to
1781+
// `Group::WIDTH` is safe
17371782
self.ctrl(0)
17381783
.copy_to(self.ctrl(Group::WIDTH), self.buckets());
17391784
} else {
1785+
// SAFETY: We have `self.bucket_mask + 1 + Group::WIDTH` number of
1786+
// control bytes,so copying `Group::WIDTH` bytes with offset equal
1787+
// to `self.buckets() == self.bucket_mask + 1` is safe
17401788
self.ctrl(0)
17411789
.copy_to(self.ctrl(self.buckets()), Group::WIDTH);
17421790
}
@@ -2236,27 +2284,95 @@ impl<A: Allocator + Clone> RawTableInner<A> {
22362284
self.growth_left = bucket_mask_to_capacity(self.bucket_mask);
22372285
}
22382286

2287+
/// Erases the [`Bucket`]'s control byte at the given index so that it does not
2288+
/// triggered as full, decreases the `items` of the table and, if it can be done,
2289+
/// increases `self.growth_left`.
2290+
///
2291+
/// This function does not actually erase / drop the [`Bucket`] itself, i.e. it
2292+
/// does not make any changes to the `data` parts of the table. The caller of this
2293+
/// function must take care to properly drop the `data`, otherwise calling this
2294+
/// function may result in a memory leak.
2295+
///
2296+
/// # Safety
2297+
///
2298+
/// You must observe the following safety rules when calling this function:
2299+
///
2300+
/// * The [`RawTableInner`] has already been allocated;
2301+
///
2302+
/// * It must be the full control byte at the given position;
2303+
///
2304+
/// * The `index` must not be greater than the `RawTableInner.bucket_mask`, i.e.
2305+
/// `index <= RawTableInner.bucket_mask` or, in other words, `(index + 1)` must
2306+
/// be no greater than the number returned by the function [`RawTableInner::buckets`].
2307+
///
2308+
/// Calling this function on a table that has not been allocated results in [`undefined behavior`].
2309+
///
2310+
/// Calling this function on a table with no elements is unspecified, but calling subsequent
2311+
/// functions is likely to result in [`undefined behavior`] due to overflow subtraction
2312+
/// (`self.items -= 1 cause overflow when self.items == 0`).
2313+
///
2314+
/// See also [`Bucket::as_ptr`] method, for more information about of properly removing
2315+
/// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`].
2316+
///
2317+
/// [`RawTableInner::buckets`]: RawTableInner::buckets
2318+
/// [`Bucket::as_ptr`]: Bucket::as_ptr
2319+
/// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
22392320
#[inline]
22402321
unsafe fn erase(&mut self, index: usize) {
22412322
debug_assert!(self.is_bucket_full(index));
2323+
2324+
// This is the same as `index.wrapping_sub(Group::WIDTH) % self.buckets()` because
2325+
// the number of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
22422326
let index_before = index.wrapping_sub(Group::WIDTH) & self.bucket_mask;
2327+
// SAFETY:
2328+
// - The caller must uphold the safety contract for `erase` method;
2329+
// - `index_before` is guaranteed to be in range due to masking with `self.bucket_mask`
22432330
let empty_before = Group::load(self.ctrl(index_before)).match_empty();
22442331
let empty_after = Group::load(self.ctrl(index)).match_empty();
22452332

2246-
// If we are inside a continuous block of Group::WIDTH full or deleted
2247-
// cells then a probe window may have seen a full block when trying to
2248-
// insert. We therefore need to keep that block non-empty so that
2249-
// lookups will continue searching to the next probe window.
2333+
// Inserting and searching in the map is performed by two key functions:
2334+
//
2335+
// - The `find_insert_slot` function that looks up the index of any `EMPTY` or `DELETED`
2336+
// slot in a group to be able to insert. If it doesn't find an `EMPTY` or `DELETED`
2337+
// slot immediately in the first group, it jumps to the next `Group` looking for it,
2338+
// and so on until it has gone through all the groups in the control bytes.
2339+
//
2340+
// - The `find_inner` function that looks for the index of the desired element by looking
2341+
// at all the `FULL` bytes in the group. If it did not find the element right away, and
2342+
// there is no `EMPTY` byte in the group, then this means that the `find_insert_slot`
2343+
// function may have found a suitable slot in the next group. Therefore, `find_inner`
2344+
// jumps further, and if it does not find the desired element and again there is no `EMPTY`
2345+
// byte, then it jumps further, and so on. The search stops only if `find_inner` function
2346+
// finds the desired element or hits an `EMPTY` slot/byte.
2347+
//
2348+
// Accordingly, this leads to two consequences:
2349+
//
2350+
// - The map must have `EMPTY` slots (bytes);
2351+
//
2352+
// - You can't just mark the byte to be erased as `EMPTY`, because otherwise the `find_inner`
2353+
// function may stumble upon an `EMPTY` byte before finding the desired element and stop
2354+
// searching.
2355+
//
2356+
// Thus it is necessary to check all bytes after and before the erased element. If we are in
2357+
// a contiguous `Group` of `FULL` or `DELETED` bytes (the number of `FULL` or `DELETED` bytes
2358+
// before and after is greater than or equal to `Group::WIDTH`), then we must mark our byte as
2359+
// `DELETED` in order for the `find_inner` function to go further. On the other hand, if there
2360+
// is at least one `EMPTY` slot in the `Group`, then the `find_inner` function will still stumble
2361+
// upon an `EMPTY` byte, so we can safely mark our erased byte as `EMPTY` as well.
2362+
//
2363+
// Finally, since `index_before == (index.wrapping_sub(Group::WIDTH) & self.bucket_mask) == index`
2364+
// and given all of the above, tables smaller than the group width (self.buckets() < Group::WIDTH)
2365+
// cannot have `DELETED` bytes.
22502366
//
2251-
// Note that in this context `leading_zeros` refers to the bytes at the
2252-
// end of a group, while `trailing_zeros` refers to the bytes at the
2253-
// beginning of a group.
2367+
// Note that in this context `leading_zeros` refers to the bytes at the end of a group, while
2368+
// `trailing_zeros` refers to the bytes at the beginning of a group.
22542369
let ctrl = if empty_before.leading_zeros() + empty_after.trailing_zeros() >= Group::WIDTH {
22552370
DELETED
22562371
} else {
22572372
self.growth_left += 1;
22582373
EMPTY
22592374
};
2375+
// SAFETY: the caller must uphold the safety contract for `erase` method.
22602376
self.set_ctrl(index, ctrl);
22612377
self.items -= 1;
22622378
}

0 commit comments

Comments
 (0)