@@ -352,9 +352,9 @@ impl<T> Bucket<T> {
352
352
/// [`<*mut T>::sub`]: https://doc.rust-lang.org/core/primitive.pointer.html#method.sub-1
353
353
/// [`NonNull::new_unchecked`]: https://doc.rust-lang.org/stable/std/ptr/struct.NonNull.html#method.new_unchecked
354
354
/// [`RawTable::data_end`]: crate::raw::RawTable::data_end
355
- /// [`RawTableInner::data_end<T>`]: crate::raw:: RawTableInner::data_end<T>
355
+ /// [`RawTableInner::data_end<T>`]: RawTableInner::data_end<T>
356
356
/// [`RawTable::buckets`]: crate::raw::RawTable::buckets
357
- /// [`RawTableInner::buckets`]: crate::raw:: RawTableInner::buckets
357
+ /// [`RawTableInner::buckets`]: RawTableInner::buckets
358
358
#[ inline]
359
359
unsafe fn from_base_index ( base : NonNull < T > , index : usize ) -> Self {
360
360
// If mem::size_of::<T>() != 0 then return a pointer to an `element` in
@@ -424,9 +424,9 @@ impl<T> Bucket<T> {
424
424
/// [`Bucket`]: crate::raw::Bucket
425
425
/// [`from_base_index`]: crate::raw::Bucket::from_base_index
426
426
/// [`RawTable::data_end`]: crate::raw::RawTable::data_end
427
- /// [`RawTableInner::data_end<T>`]: crate::raw:: RawTableInner::data_end<T>
427
+ /// [`RawTableInner::data_end<T>`]: RawTableInner::data_end<T>
428
428
/// [`RawTable`]: crate::raw::RawTable
429
- /// [`RawTableInner`]: crate::raw:: RawTableInner
429
+ /// [`RawTableInner`]: RawTableInner
430
430
/// [`<*const T>::offset_from`]: https://doc.rust-lang.org/nightly/core/primitive.pointer.html#method.offset_from
431
431
#[ inline]
432
432
unsafe fn to_base_index ( & self , base : NonNull < T > ) -> usize {
@@ -560,7 +560,7 @@ impl<T> Bucket<T> {
560
560
/// [`<*mut T>::sub`]: https://doc.rust-lang.org/core/primitive.pointer.html#method.sub-1
561
561
/// [`NonNull::new_unchecked`]: https://doc.rust-lang.org/stable/std/ptr/struct.NonNull.html#method.new_unchecked
562
562
/// [`RawTable::buckets`]: crate::raw::RawTable::buckets
563
- /// [`RawTableInner::buckets`]: crate::raw:: RawTableInner::buckets
563
+ /// [`RawTableInner::buckets`]: RawTableInner::buckets
564
564
#[ inline]
565
565
unsafe fn next_n ( & self , offset : usize ) -> Self {
566
566
let ptr = if Self :: IS_ZERO_SIZED_TYPE {
@@ -1642,7 +1642,8 @@ impl<A: Allocator + Clone> RawTableInner<A> {
1642
1642
// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
1643
1643
let result = ( probe_seq. pos + bit) & self . bucket_mask ;
1644
1644
1645
- // In tables smaller than the group width, trailing control
1645
+ // In tables smaller than the group width
1646
+ // (self.buckets() < Group::WIDTH), trailing control
1646
1647
// bytes outside the range of the table are filled with
1647
1648
// EMPTY entries. These will unfortunately trigger a
1648
1649
// match, but once masked may point to a full bucket that
@@ -1663,8 +1664,9 @@ impl<A: Allocator + Clone> RawTableInner<A> {
1663
1664
// and properly aligned, because the table is already allocated
1664
1665
// (see `TableLayout::calculate_layout_for` and `ptr::read`);
1665
1666
//
1666
- // * For tables larger than the group width, we will never end up in the given
1667
- // branch, since `(probe_seq.pos + bit) & self.bucket_mask` cannot return a
1667
+ // * For tables larger than the group width (self.buckets() >= Group::WIDTH),
1668
+ // we will never end up in the given branch, since
1669
+ // `(probe_seq.pos + bit) & self.bucket_mask` cannot return a
1668
1670
// full bucket index. For tables smaller than the group width, calling the
1669
1671
// `lowest_set_bit_nonzero` function (when `nightly` feature enabled) is also
1670
1672
// safe, as the trailing control bytes outside the range of the table are filled
@@ -1731,12 +1733,49 @@ impl<A: Allocator + Clone> RawTableInner<A> {
1731
1733
}
1732
1734
}
1733
1735
1736
+ /// Prepares for rehashing data in place (that is, without allocating new memory).
1737
+ /// Converts all full index `control bytes` to `DELETED` and all `DELETED` control
1738
+ /// bytes to `EMPTY`, i.e. performs the following conversion:
1739
+ ///
1740
+ /// - `EMPTY` control bytes -> `EMPTY`;
1741
+ /// - `DELETED` control bytes -> `EMPTY`;
1742
+ /// - `FULL` control bytes -> `DELETED`.
1743
+ ///
1744
+ /// This function does not make any changes to the `data` parts of the table,
1745
+ /// or any changes to the the `items` or `growth_left` field of the table.
1746
+ ///
1747
+ /// # Safety
1748
+ ///
1749
+ /// You must observe the following safety rules when calling this function:
1750
+ ///
1751
+ /// * The [`RawTableInner`] has already been allocated;
1752
+ ///
1753
+ /// * The caller of this function must convert the `DELETED` bytes back to `FULL`
1754
+ /// bytes when re-inserting them into their ideal position (which was impossible
1755
+ /// to do during the first insert due to tombstones). If the caller does not do
1756
+ /// this, then calling this function may result in a memory leak.
1757
+ ///
1758
+ /// Calling this function on a table that has not been allocated results in
1759
+ /// [`undefined behavior`].
1760
+ ///
1761
+ /// See also [`Bucket::as_ptr`] method, for more information about of properly removing
1762
+ /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`].
1763
+ ///
1764
+ /// [`Bucket::as_ptr`]: Bucket::as_ptr
1765
+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
1734
1766
#[ allow( clippy:: mut_mut) ]
1735
1767
#[ inline]
1736
1768
unsafe fn prepare_rehash_in_place ( & mut self ) {
1737
- // Bulk convert all full control bytes to DELETED, and all DELETED
1738
- // control bytes to EMPTY. This effectively frees up all buckets
1739
- // containing a DELETED entry.
1769
+ // Bulk convert all full control bytes to DELETED, and all DELETED control bytes to EMPTY.
1770
+ // This effectively frees up all buckets containing a DELETED entry.
1771
+ //
1772
+ // SAFETY:
1773
+ // 1. `i` is guaranteed to be within bounds since we are iterating from zero to `buckets - 1`;
1774
+ // 2. Even if `i` will be `i == self.bucket_mask`, it is safe to call `Group::load_aligned`
1775
+ // due to the extended control bytes range, which is `self.bucket_mask + 1 + Group::WIDTH`;
1776
+ // 3. The caller of this function guarantees that [`RawTableInner`] has already been allocated;
1777
+ // 4. We can use `Group::load_aligned` and `Group::store_aligned` here since we start from 0
1778
+ // and go to the end with a step equal to `Group::WIDTH` (see TableLayout::calculate_layout_for).
1740
1779
for i in ( 0 ..self . buckets ( ) ) . step_by ( Group :: WIDTH ) {
1741
1780
let group = Group :: load_aligned ( self . ctrl ( i) ) ;
1742
1781
let group = group. convert_special_to_empty_and_full_to_deleted ( ) ;
@@ -1745,10 +1784,19 @@ impl<A: Allocator + Clone> RawTableInner<A> {
1745
1784
1746
1785
// Fix up the trailing control bytes. See the comments in set_ctrl
1747
1786
// for the handling of tables smaller than the group width.
1748
- if self . buckets ( ) < Group :: WIDTH {
1787
+ //
1788
+ // SAFETY: The caller of this function guarantees that [`RawTableInner`]
1789
+ // has already been allocated
1790
+ if unlikely ( self . buckets ( ) < Group :: WIDTH ) {
1791
+ // SAFETY: We have `self.bucket_mask + 1 + Group::WIDTH` number of control bytes,
1792
+ // so copying `self.buckets() == self.bucket_mask + 1` bytes with offset equal to
1793
+ // `Group::WIDTH` is safe
1749
1794
self . ctrl ( 0 )
1750
1795
. copy_to ( self . ctrl ( Group :: WIDTH ) , self . buckets ( ) ) ;
1751
1796
} else {
1797
+ // SAFETY: We have `self.bucket_mask + 1 + Group::WIDTH` number of
1798
+ // control bytes,so copying `Group::WIDTH` bytes with offset equal
1799
+ // to `self.buckets() == self.bucket_mask + 1` is safe
1752
1800
self . ctrl ( 0 )
1753
1801
. copy_to ( self . ctrl ( self . buckets ( ) ) , Group :: WIDTH ) ;
1754
1802
}
@@ -2248,27 +2296,95 @@ impl<A: Allocator + Clone> RawTableInner<A> {
2248
2296
self . growth_left = bucket_mask_to_capacity ( self . bucket_mask ) ;
2249
2297
}
2250
2298
2299
+ /// Erases the [`Bucket`]'s control byte at the given index so that it does not
2300
+ /// triggered as full, decreases the `items` of the table and, if it can be done,
2301
+ /// increases `self.growth_left`.
2302
+ ///
2303
+ /// This function does not actually erase / drop the [`Bucket`] itself, i.e. it
2304
+ /// does not make any changes to the `data` parts of the table. The caller of this
2305
+ /// function must take care to properly drop the `data`, otherwise calling this
2306
+ /// function may result in a memory leak.
2307
+ ///
2308
+ /// # Safety
2309
+ ///
2310
+ /// You must observe the following safety rules when calling this function:
2311
+ ///
2312
+ /// * The [`RawTableInner`] has already been allocated;
2313
+ ///
2314
+ /// * It must be the full control byte at the given position;
2315
+ ///
2316
+ /// * The `index` must not be greater than the `RawTableInner.bucket_mask`, i.e.
2317
+ /// `index <= RawTableInner.bucket_mask` or, in other words, `(index + 1)` must
2318
+ /// be no greater than the number returned by the function [`RawTableInner::buckets`].
2319
+ ///
2320
+ /// Calling this function on a table that has not been allocated results in [`undefined behavior`].
2321
+ ///
2322
+ /// Calling this function on a table with no elements is unspecified, but calling subsequent
2323
+ /// functions is likely to result in [`undefined behavior`] due to overflow subtraction
2324
+ /// (`self.items -= 1 cause overflow when self.items == 0`).
2325
+ ///
2326
+ /// See also [`Bucket::as_ptr`] method, for more information about of properly removing
2327
+ /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`].
2328
+ ///
2329
+ /// [`RawTableInner::buckets`]: RawTableInner::buckets
2330
+ /// [`Bucket::as_ptr`]: Bucket::as_ptr
2331
+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
2251
2332
#[ inline]
2252
2333
unsafe fn erase ( & mut self , index : usize ) {
2253
2334
debug_assert ! ( self . is_bucket_full( index) ) ;
2335
+
2336
+ // This is the same as `index.wrapping_sub(Group::WIDTH) % self.buckets()` because
2337
+ // the number of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
2254
2338
let index_before = index. wrapping_sub ( Group :: WIDTH ) & self . bucket_mask ;
2339
+ // SAFETY:
2340
+ // - The caller must uphold the safety contract for `erase` method;
2341
+ // - `index_before` is guaranteed to be in range due to masking with `self.bucket_mask`
2255
2342
let empty_before = Group :: load ( self . ctrl ( index_before) ) . match_empty ( ) ;
2256
2343
let empty_after = Group :: load ( self . ctrl ( index) ) . match_empty ( ) ;
2257
2344
2258
- // If we are inside a continuous block of Group::WIDTH full or deleted
2259
- // cells then a probe window may have seen a full block when trying to
2260
- // insert. We therefore need to keep that block non-empty so that
2261
- // lookups will continue searching to the next probe window.
2345
+ // Inserting and searching in the map is performed by two key functions:
2346
+ //
2347
+ // - The `find_insert_slot` function that looks up the index of any `EMPTY` or `DELETED`
2348
+ // slot in a group to be able to insert. If it doesn't find an `EMPTY` or `DELETED`
2349
+ // slot immediately in the first group, it jumps to the next `Group` looking for it,
2350
+ // and so on until it has gone through all the groups in the control bytes.
2351
+ //
2352
+ // - The `find_inner` function that looks for the index of the desired element by looking
2353
+ // at all the `FULL` bytes in the group. If it did not find the element right away, and
2354
+ // there is no `EMPTY` byte in the group, then this means that the `find_insert_slot`
2355
+ // function may have found a suitable slot in the next group. Therefore, `find_inner`
2356
+ // jumps further, and if it does not find the desired element and again there is no `EMPTY`
2357
+ // byte, then it jumps further, and so on. The search stops only if `find_inner` function
2358
+ // finds the desired element or hits an `EMPTY` slot/byte.
2359
+ //
2360
+ // Accordingly, this leads to two consequences:
2361
+ //
2362
+ // - The map must have `EMPTY` slots (bytes);
2363
+ //
2364
+ // - You can't just mark the byte to be erased as `EMPTY`, because otherwise the `find_inner`
2365
+ // function may stumble upon an `EMPTY` byte before finding the desired element and stop
2366
+ // searching.
2367
+ //
2368
+ // Thus it is necessary to check all bytes after and before the erased element. If we are in
2369
+ // a contiguous `Group` of `FULL` or `DELETED` bytes (the number of `FULL` or `DELETED` bytes
2370
+ // before and after is greater than or equal to `Group::WIDTH`), then we must mark our byte as
2371
+ // `DELETED` in order for the `find_inner` function to go further. On the other hand, if there
2372
+ // is at least one `EMPTY` slot in the `Group`, then the `find_inner` function will still stumble
2373
+ // upon an `EMPTY` byte, so we can safely mark our erased byte as `EMPTY` as well.
2374
+ //
2375
+ // Finally, since `index_before == (index.wrapping_sub(Group::WIDTH) & self.bucket_mask) == index`
2376
+ // and given all of the above, tables smaller than the group width (self.buckets() < Group::WIDTH)
2377
+ // cannot have `DELETED` bytes.
2262
2378
//
2263
- // Note that in this context `leading_zeros` refers to the bytes at the
2264
- // end of a group, while `trailing_zeros` refers to the bytes at the
2265
- // beginning of a group.
2379
+ // Note that in this context `leading_zeros` refers to the bytes at the end of a group, while
2380
+ // `trailing_zeros` refers to the bytes at the beginning of a group.
2266
2381
let ctrl = if empty_before. leading_zeros ( ) + empty_after. trailing_zeros ( ) >= Group :: WIDTH {
2267
2382
DELETED
2268
2383
} else {
2269
2384
self . growth_left += 1 ;
2270
2385
EMPTY
2271
2386
} ;
2387
+ // SAFETY: the caller must uphold the safety contract for `erase` method.
2272
2388
self . set_ctrl ( index, ctrl) ;
2273
2389
self . items -= 1 ;
2274
2390
}
0 commit comments