@@ -342,9 +342,9 @@ impl<T> Bucket<T> {
342
342
/// [`<*mut T>::sub`]: https://doc.rust-lang.org/core/primitive.pointer.html#method.sub-1
343
343
/// [`NonNull::new_unchecked`]: https://doc.rust-lang.org/stable/std/ptr/struct.NonNull.html#method.new_unchecked
344
344
/// [`RawTable::data_end`]: crate::raw::RawTable::data_end
345
- /// [`RawTableInner::data_end<T>`]: crate::raw:: RawTableInner::data_end<T>
345
+ /// [`RawTableInner::data_end<T>`]: RawTableInner::data_end<T>
346
346
/// [`RawTable::buckets`]: crate::raw::RawTable::buckets
347
- /// [`RawTableInner::buckets`]: crate::raw:: RawTableInner::buckets
347
+ /// [`RawTableInner::buckets`]: RawTableInner::buckets
348
348
#[ inline]
349
349
unsafe fn from_base_index ( base : NonNull < T > , index : usize ) -> Self {
350
350
// If mem::size_of::<T>() != 0 then return a pointer to an `element` in
@@ -414,9 +414,9 @@ impl<T> Bucket<T> {
414
414
/// [`Bucket`]: crate::raw::Bucket
415
415
/// [`from_base_index`]: crate::raw::Bucket::from_base_index
416
416
/// [`RawTable::data_end`]: crate::raw::RawTable::data_end
417
- /// [`RawTableInner::data_end<T>`]: crate::raw:: RawTableInner::data_end<T>
417
+ /// [`RawTableInner::data_end<T>`]: RawTableInner::data_end<T>
418
418
/// [`RawTable`]: crate::raw::RawTable
419
- /// [`RawTableInner`]: crate::raw:: RawTableInner
419
+ /// [`RawTableInner`]: RawTableInner
420
420
/// [`<*const T>::offset_from`]: https://doc.rust-lang.org/nightly/core/primitive.pointer.html#method.offset_from
421
421
#[ inline]
422
422
unsafe fn to_base_index ( & self , base : NonNull < T > ) -> usize {
@@ -549,7 +549,7 @@ impl<T> Bucket<T> {
549
549
/// [`<*mut T>::sub`]: https://doc.rust-lang.org/core/primitive.pointer.html#method.sub-1
550
550
/// [`NonNull::new_unchecked`]: https://doc.rust-lang.org/stable/std/ptr/struct.NonNull.html#method.new_unchecked
551
551
/// [`RawTable::buckets`]: crate::raw::RawTable::buckets
552
- /// [`RawTableInner::buckets`]: crate::raw:: RawTableInner::buckets
552
+ /// [`RawTableInner::buckets`]: RawTableInner::buckets
553
553
#[ inline]
554
554
unsafe fn next_n ( & self , offset : usize ) -> Self {
555
555
let ptr = if Self :: IS_ZERO_SIZED_TYPE {
@@ -1630,7 +1630,8 @@ impl<A: Allocator + Clone> RawTableInner<A> {
1630
1630
// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
1631
1631
let result = ( probe_seq. pos + bit) & self . bucket_mask ;
1632
1632
1633
- // In tables smaller than the group width, trailing control
1633
+ // In tables smaller than the group width
1634
+ // (self.buckets() < Group::WIDTH), trailing control
1634
1635
// bytes outside the range of the table are filled with
1635
1636
// EMPTY entries. These will unfortunately trigger a
1636
1637
// match, but once masked may point to a full bucket that
@@ -1651,8 +1652,9 @@ impl<A: Allocator + Clone> RawTableInner<A> {
1651
1652
// and properly aligned, because the table is already allocated
1652
1653
// (see `TableLayout::calculate_layout_for` and `ptr::read`);
1653
1654
//
1654
- // * For tables larger than the group width, we will never end up in the given
1655
- // branch, since `(probe_seq.pos + bit) & self.bucket_mask` cannot return a
1655
+ // * For tables larger than the group width (self.buckets() >= Group::WIDTH),
1656
+ // we will never end up in the given branch, since
1657
+ // `(probe_seq.pos + bit) & self.bucket_mask` cannot return a
1656
1658
// full bucket index. For tables smaller than the group width, calling the
1657
1659
// `lowest_set_bit_nonzero` function (when `nightly` feature enabled) is also
1658
1660
// safe, as the trailing control bytes outside the range of the table are filled
@@ -1719,12 +1721,49 @@ impl<A: Allocator + Clone> RawTableInner<A> {
1719
1721
}
1720
1722
}
1721
1723
1724
+ /// Prepares for rehashing data in place (that is, without allocating new memory).
1725
+ /// Converts all full index `control bytes` to `DELETED` and all `DELETED` control
1726
+ /// bytes to `EMPTY`, i.e. performs the following conversion:
1727
+ ///
1728
+ /// - `EMPTY` control bytes -> `EMPTY`;
1729
+ /// - `DELETED` control bytes -> `EMPTY`;
1730
+ /// - `FULL` control bytes -> `DELETED`.
1731
+ ///
1732
+ /// This function does not make any changes to the `data` parts of the table,
1733
+ /// or any changes to the the `items` or `growth_left` field of the table.
1734
+ ///
1735
+ /// # Safety
1736
+ ///
1737
+ /// You must observe the following safety rules when calling this function:
1738
+ ///
1739
+ /// * The [`RawTableInner`] has already been allocated;
1740
+ ///
1741
+ /// * The caller of this function must convert the `DELETED` bytes back to `FULL`
1742
+ /// bytes when re-inserting them into their ideal position (which was impossible
1743
+ /// to do during the first insert due to tombstones). If the caller does not do
1744
+ /// this, then calling this function may result in a memory leak.
1745
+ ///
1746
+ /// Calling this function on a table that has not been allocated results in
1747
+ /// [`undefined behavior`].
1748
+ ///
1749
+ /// See also [`Bucket::as_ptr`] method, for more information about of properly removing
1750
+ /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`].
1751
+ ///
1752
+ /// [`Bucket::as_ptr`]: Bucket::as_ptr
1753
+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
1722
1754
#[ allow( clippy:: mut_mut) ]
1723
1755
#[ inline]
1724
1756
unsafe fn prepare_rehash_in_place ( & mut self ) {
1725
- // Bulk convert all full control bytes to DELETED, and all DELETED
1726
- // control bytes to EMPTY. This effectively frees up all buckets
1727
- // containing a DELETED entry.
1757
+ // Bulk convert all full control bytes to DELETED, and all DELETED control bytes to EMPTY.
1758
+ // This effectively frees up all buckets containing a DELETED entry.
1759
+ //
1760
+ // SAFETY:
1761
+ // 1. `i` is guaranteed to be within bounds since we are iterating from zero to `buckets - 1`;
1762
+ // 2. Even if `i` will be `i == self.bucket_mask`, it is safe to call `Group::load_aligned`
1763
+ // due to the extended control bytes range, which is `self.bucket_mask + 1 + Group::WIDTH`;
1764
+ // 3. The caller of this function guarantees that [`RawTableInner`] has already been allocated;
1765
+ // 4. We can use `Group::load_aligned` and `Group::store_aligned` here since we start from 0
1766
+ // and go to the end with a step equal to `Group::WIDTH` (see TableLayout::calculate_layout_for).
1728
1767
for i in ( 0 ..self . buckets ( ) ) . step_by ( Group :: WIDTH ) {
1729
1768
let group = Group :: load_aligned ( self . ctrl ( i) ) ;
1730
1769
let group = group. convert_special_to_empty_and_full_to_deleted ( ) ;
@@ -1733,10 +1772,19 @@ impl<A: Allocator + Clone> RawTableInner<A> {
1733
1772
1734
1773
// Fix up the trailing control bytes. See the comments in set_ctrl
1735
1774
// for the handling of tables smaller than the group width.
1736
- if self . buckets ( ) < Group :: WIDTH {
1775
+ //
1776
+ // SAFETY: The caller of this function guarantees that [`RawTableInner`]
1777
+ // has already been allocated
1778
+ if unlikely ( self . buckets ( ) < Group :: WIDTH ) {
1779
+ // SAFETY: We have `self.bucket_mask + 1 + Group::WIDTH` number of control bytes,
1780
+ // so copying `self.buckets() == self.bucket_mask + 1` bytes with offset equal to
1781
+ // `Group::WIDTH` is safe
1737
1782
self . ctrl ( 0 )
1738
1783
. copy_to ( self . ctrl ( Group :: WIDTH ) , self . buckets ( ) ) ;
1739
1784
} else {
1785
+ // SAFETY: We have `self.bucket_mask + 1 + Group::WIDTH` number of
1786
+ // control bytes,so copying `Group::WIDTH` bytes with offset equal
1787
+ // to `self.buckets() == self.bucket_mask + 1` is safe
1740
1788
self . ctrl ( 0 )
1741
1789
. copy_to ( self . ctrl ( self . buckets ( ) ) , Group :: WIDTH ) ;
1742
1790
}
@@ -2236,27 +2284,95 @@ impl<A: Allocator + Clone> RawTableInner<A> {
2236
2284
self . growth_left = bucket_mask_to_capacity ( self . bucket_mask ) ;
2237
2285
}
2238
2286
2287
+ /// Erases the [`Bucket`]'s control byte at the given index so that it does not
2288
+ /// triggered as full, decreases the `items` of the table and, if it can be done,
2289
+ /// increases `self.growth_left`.
2290
+ ///
2291
+ /// This function does not actually erase / drop the [`Bucket`] itself, i.e. it
2292
+ /// does not make any changes to the `data` parts of the table. The caller of this
2293
+ /// function must take care to properly drop the `data`, otherwise calling this
2294
+ /// function may result in a memory leak.
2295
+ ///
2296
+ /// # Safety
2297
+ ///
2298
+ /// You must observe the following safety rules when calling this function:
2299
+ ///
2300
+ /// * The [`RawTableInner`] has already been allocated;
2301
+ ///
2302
+ /// * It must be the full control byte at the given position;
2303
+ ///
2304
+ /// * The `index` must not be greater than the `RawTableInner.bucket_mask`, i.e.
2305
+ /// `index <= RawTableInner.bucket_mask` or, in other words, `(index + 1)` must
2306
+ /// be no greater than the number returned by the function [`RawTableInner::buckets`].
2307
+ ///
2308
+ /// Calling this function on a table that has not been allocated results in [`undefined behavior`].
2309
+ ///
2310
+ /// Calling this function on a table with no elements is unspecified, but calling subsequent
2311
+ /// functions is likely to result in [`undefined behavior`] due to overflow subtraction
2312
+ /// (`self.items -= 1 cause overflow when self.items == 0`).
2313
+ ///
2314
+ /// See also [`Bucket::as_ptr`] method, for more information about of properly removing
2315
+ /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`].
2316
+ ///
2317
+ /// [`RawTableInner::buckets`]: RawTableInner::buckets
2318
+ /// [`Bucket::as_ptr`]: Bucket::as_ptr
2319
+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
2239
2320
#[ inline]
2240
2321
unsafe fn erase ( & mut self , index : usize ) {
2241
2322
debug_assert ! ( self . is_bucket_full( index) ) ;
2323
+
2324
+ // This is the same as `index.wrapping_sub(Group::WIDTH) % self.buckets()` because
2325
+ // the number of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
2242
2326
let index_before = index. wrapping_sub ( Group :: WIDTH ) & self . bucket_mask ;
2327
+ // SAFETY:
2328
+ // - The caller must uphold the safety contract for `erase` method;
2329
+ // - `index_before` is guaranteed to be in range due to masking with `self.bucket_mask`
2243
2330
let empty_before = Group :: load ( self . ctrl ( index_before) ) . match_empty ( ) ;
2244
2331
let empty_after = Group :: load ( self . ctrl ( index) ) . match_empty ( ) ;
2245
2332
2246
- // If we are inside a continuous block of Group::WIDTH full or deleted
2247
- // cells then a probe window may have seen a full block when trying to
2248
- // insert. We therefore need to keep that block non-empty so that
2249
- // lookups will continue searching to the next probe window.
2333
+ // Inserting and searching in the map is performed by two key functions:
2334
+ //
2335
+ // - The `find_insert_slot` function that looks up the index of any `EMPTY` or `DELETED`
2336
+ // slot in a group to be able to insert. If it doesn't find an `EMPTY` or `DELETED`
2337
+ // slot immediately in the first group, it jumps to the next `Group` looking for it,
2338
+ // and so on until it has gone through all the groups in the control bytes.
2339
+ //
2340
+ // - The `find_inner` function that looks for the index of the desired element by looking
2341
+ // at all the `FULL` bytes in the group. If it did not find the element right away, and
2342
+ // there is no `EMPTY` byte in the group, then this means that the `find_insert_slot`
2343
+ // function may have found a suitable slot in the next group. Therefore, `find_inner`
2344
+ // jumps further, and if it does not find the desired element and again there is no `EMPTY`
2345
+ // byte, then it jumps further, and so on. The search stops only if `find_inner` function
2346
+ // finds the desired element or hits an `EMPTY` slot/byte.
2347
+ //
2348
+ // Accordingly, this leads to two consequences:
2349
+ //
2350
+ // - The map must have `EMPTY` slots (bytes);
2351
+ //
2352
+ // - You can't just mark the byte to be erased as `EMPTY`, because otherwise the `find_inner`
2353
+ // function may stumble upon an `EMPTY` byte before finding the desired element and stop
2354
+ // searching.
2355
+ //
2356
+ // Thus it is necessary to check all bytes after and before the erased element. If we are in
2357
+ // a contiguous `Group` of `FULL` or `DELETED` bytes (the number of `FULL` or `DELETED` bytes
2358
+ // before and after is greater than or equal to `Group::WIDTH`), then we must mark our byte as
2359
+ // `DELETED` in order for the `find_inner` function to go further. On the other hand, if there
2360
+ // is at least one `EMPTY` slot in the `Group`, then the `find_inner` function will still stumble
2361
+ // upon an `EMPTY` byte, so we can safely mark our erased byte as `EMPTY` as well.
2362
+ //
2363
+ // Finally, since `index_before == (index.wrapping_sub(Group::WIDTH) & self.bucket_mask) == index`
2364
+ // and given all of the above, tables smaller than the group width (self.buckets() < Group::WIDTH)
2365
+ // cannot have `DELETED` bytes.
2250
2366
//
2251
- // Note that in this context `leading_zeros` refers to the bytes at the
2252
- // end of a group, while `trailing_zeros` refers to the bytes at the
2253
- // beginning of a group.
2367
+ // Note that in this context `leading_zeros` refers to the bytes at the end of a group, while
2368
+ // `trailing_zeros` refers to the bytes at the beginning of a group.
2254
2369
let ctrl = if empty_before. leading_zeros ( ) + empty_after. trailing_zeros ( ) >= Group :: WIDTH {
2255
2370
DELETED
2256
2371
} else {
2257
2372
self . growth_left += 1 ;
2258
2373
EMPTY
2259
2374
} ;
2375
+ // SAFETY: the caller must uphold the safety contract for `erase` method.
2260
2376
self . set_ctrl ( index, ctrl) ;
2261
2377
self . items -= 1 ;
2262
2378
}
0 commit comments