|
2 | 2 | //! allows bidirectional lookup; i.e., given a value, one can easily find the |
3 | 3 | //! type, and vice versa. |
4 | 4 |
|
5 | | -use std::hash::{Hash, Hasher}; |
| 5 | +use std::cell::SyncUnsafeCell; |
| 6 | +use std::hash::{BuildHasher, BuildHasherDefault, Hash, Hasher}; |
| 7 | +use std::sync::atomic::{AtomicU32, Ordering}; |
| 8 | +use std::sync::{LazyLock, Mutex}; |
6 | 9 | use std::{fmt, str}; |
7 | 10 |
|
8 | | -use rustc_arena::DroplessArena; |
9 | | -use rustc_data_structures::fx::FxIndexSet; |
| 11 | +use rustc_data_structures::fx::FxHasher; |
10 | 12 | use rustc_data_structures::stable_hasher::{ |
11 | 13 | HashStable, StableCompare, StableHasher, ToStableHashKey, |
12 | 14 | }; |
@@ -2475,18 +2477,9 @@ impl Symbol { |
2475 | 2477 | with_session_globals(|session_globals| session_globals.symbol_interner.intern(string)) |
2476 | 2478 | } |
2477 | 2479 |
|
2478 | | - /// Access the underlying string. This is a slowish operation because it |
2479 | | - /// requires locking the symbol interner. |
2480 | | - /// |
2481 | | - /// Note that the lifetime of the return value is a lie. It's not the same |
2482 | | - /// as `&self`, but actually tied to the lifetime of the underlying |
2483 | | - /// interner. Interners are long-lived, and there are very few of them, and |
2484 | | - /// this function is typically used for short-lived things, so in practice |
2485 | | - /// it works out ok. |
| 2480 | + /// Access the underlying string. |
2486 | 2481 | pub fn as_str(&self) -> &str { |
2487 | | - with_session_globals(|session_globals| unsafe { |
2488 | | - std::mem::transmute::<&str, &str>(session_globals.symbol_interner.get(*self)) |
2489 | | - }) |
| 2482 | + with_session_globals(|session_globals| session_globals.symbol_interner.get(*self)) |
2490 | 2483 | } |
2491 | 2484 |
|
2492 | 2485 | pub fn as_u32(self) -> u32 { |
@@ -2541,53 +2534,212 @@ impl StableCompare for Symbol { |
2541 | 2534 | } |
2542 | 2535 | } |
2543 | 2536 |
|
2544 | | -pub(crate) struct Interner(Lock<InternerInner>); |
| 2537 | +// This is never de-initialized and stores interned &str in static storage. |
| 2538 | +// Each str is stored length-prefixed (u32), and we allow for random-access indexing with a u32 |
| 2539 | +// index by direct lookup in the arena. Indices <2^16 are stored in a separate structure (they are |
| 2540 | +// pre-allocated at dense addresses so we can't use the same lockless O(1) hack for them). |
| 2541 | +static GLOBAL_ARENA: LazyLock<StringArena> = LazyLock::new(|| StringArena::new()); |
2545 | 2542 |
|
2546 | | -// The `&'static str`s in this type actually point into the arena. |
2547 | | -// |
2548 | | -// This type is private to prevent accidentally constructing more than one |
2549 | | -// `Interner` on the same thread, which makes it easy to mix up `Symbol`s |
2550 | | -// between `Interner`s. |
2551 | | -struct InternerInner { |
2552 | | - arena: DroplessArena, |
2553 | | - strings: FxIndexSet<&'static str>, |
| 2543 | +const CHUNK_SIZE: usize = 4 * 1024 * 1024; |
| 2544 | +const CHUNKS: usize = (u32::MAX as usize).div_ceil(CHUNK_SIZE); |
| 2545 | + |
| 2546 | +struct StringChunk { |
| 2547 | + array: LazyLock<Box<[SyncUnsafeCell<u8>; CHUNK_SIZE]>>, |
2554 | 2548 | } |
2555 | 2549 |
|
2556 | | -impl Interner { |
2557 | | - fn prefill(init: &[&'static str]) -> Self { |
2558 | | - Interner(Lock::new(InternerInner { |
2559 | | - arena: Default::default(), |
2560 | | - strings: init.iter().copied().collect(), |
2561 | | - })) |
| 2550 | +impl Default for StringChunk { |
| 2551 | + fn default() -> Self { |
| 2552 | + Self { |
| 2553 | + array: LazyLock::new(|| unsafe { |
| 2554 | + // SAFETY: Zero-init'd UnsafeCell<u8> is initialized and has no other invariants to |
| 2555 | + // worry about. |
| 2556 | + Box::new_zeroed().assume_init() |
| 2557 | + }), |
| 2558 | + } |
2562 | 2559 | } |
| 2560 | +} |
2563 | 2561 |
|
2564 | | - #[inline] |
2565 | | - fn intern(&self, string: &str) -> Symbol { |
2566 | | - let mut inner = self.0.lock(); |
2567 | | - if let Some(idx) = inner.strings.get_index_of(string) { |
2568 | | - return Symbol::new(idx as u32); |
| 2562 | +struct StringArena { |
| 2563 | + chunks: [StringChunk; CHUNKS], |
| 2564 | + // this guards writes to `write_at`, but not reads, which proceed lock-free. write_at is only moved |
| 2565 | + // forward so this ends up being safe. |
| 2566 | + writer: std::sync::Mutex<()>, |
| 2567 | + write_at: AtomicU32, |
| 2568 | +} |
| 2569 | + |
| 2570 | +impl StringArena { |
| 2571 | + fn new() -> Self { |
| 2572 | + StringArena { |
| 2573 | + chunks: std::array::from_fn(|_| StringChunk::default()), |
| 2574 | + // Reserve 2^16 u32 indices -- these will be used for pre-filled interning where we |
| 2575 | + // have a dense SymbolIndex space. We could make this exact but it doesn't really |
| 2576 | + // matter for this initial test anyway. |
| 2577 | + write_at: AtomicU32::new(u32::from(u16::MAX)), |
| 2578 | + writer: Mutex::new(()), |
2569 | 2579 | } |
| 2580 | + } |
2570 | 2581 |
|
2571 | | - let string: &str = inner.arena.alloc_str(string); |
| 2582 | + /// Copy the passed &str into this buffer. Returns an index that can be passed to `get` to |
| 2583 | + /// retrieve the &str back. |
| 2584 | + /// |
| 2585 | + /// u32 is guaranteed to be at least u16::MAX (for the first call). |
| 2586 | + #[allow(clippy::erasing_op)] |
| 2587 | + fn alloc(&self, s: &str) -> u32 { |
| 2588 | + let _guard = self.writer.lock().unwrap(); |
| 2589 | + // Allocate a chunk of the region, and fill it with the &str's length and bytes. |
| 2590 | + let start = self.write_at.load(Ordering::Acquire) as usize; |
| 2591 | + |
| 2592 | + // If we have too large a string it won't fit into a single chunk, and we currently |
| 2593 | + // can't support storing it. 4MB+ Symbols feel unlikely, but if we hit issues in |
| 2594 | + // practice we can adjust the data structure to allow variable-sized chunks which would |
| 2595 | + // let us grow near-arbitrarily large for individual symbols. |
| 2596 | + assert!(s.len() <= CHUNK_SIZE); |
| 2597 | + |
| 2598 | + // Assert we're in-bounds. Summing across chunks we have exactly u32::MAX bytes, so this |
| 2599 | + // implicitly checks we haven't exceeded our capacity overall. |
| 2600 | + // |
| 2601 | + // This is possible because if it crosses a chunk boundary we'll skip to that next chunk |
| 2602 | + // entirely. |
| 2603 | + let possible_end = start.checked_add(4).unwrap().checked_add(s.len()).unwrap(); |
| 2604 | + |
| 2605 | + // We fit into the current chunk. |
| 2606 | + let len = s.len(); |
| 2607 | + |
| 2608 | + // Make sure we can fit in our "utf-8 encoded" length. |
| 2609 | + // |
| 2610 | + // If all bytes we write are utf-8, we can treat the whole region of memory as just |
| 2611 | + // &str, which avoids needing to check that it's still utf-8 when indexing arbitrarily. |
| 2612 | + assert!(len < (1 << 28)); |
| 2613 | + let encoded_len: u32 = ((len as u32 & (0x7f << (7 * 3))) << 3) |
| 2614 | + | ((len as u32 & (0x7f << (7 * 2))) << 2) |
| 2615 | + | ((len as u32 & (0x7f << (7 * 1))) << 1) |
| 2616 | + | (len as u32 & (0x7f << (7 * 0))); |
| 2617 | + |
| 2618 | + // We're either in our initial chunk or the next one. |
| 2619 | + let (chunk, start_absolute, offset, end) = |
| 2620 | + if start / CHUNK_SIZE != possible_end / CHUNK_SIZE { |
| 2621 | + let chunk_idx = start / CHUNK_SIZE + 1; |
| 2622 | + ( |
| 2623 | + &self.chunks[chunk_idx].array, |
| 2624 | + chunk_idx * CHUNK_SIZE, |
| 2625 | + 0usize, |
| 2626 | + chunk_idx * CHUNK_SIZE + 4 + s.len(), |
| 2627 | + ) |
| 2628 | + } else { |
| 2629 | + (&self.chunks[start / CHUNK_SIZE].array, start, start % CHUNK_SIZE, possible_end) |
| 2630 | + }; |
| 2631 | + let chunk = LazyLock::force(&chunk); |
| 2632 | + |
| 2633 | + // SAFETY: |
| 2634 | + // |
| 2635 | + // * _guard above protects against concurrent alloc's (so single writer for write_at) |
| 2636 | + // * write_at only increases, and shared access is only provided to memory < write_at. |
| 2637 | + // * all chunks are zero-init'd at allocation: no uninitialized memory here. |
| 2638 | + let chunk_tail = unsafe { |
| 2639 | + std::slice::from_raw_parts_mut( |
| 2640 | + chunk.as_ptr().cast::<u8>().add(offset).cast_mut(), |
| 2641 | + CHUNK_SIZE - offset, |
| 2642 | + ) |
| 2643 | + }; |
| 2644 | + // encoded_len is utf-8-compatible |
| 2645 | + chunk_tail[..4].copy_from_slice(&encoded_len.to_ne_bytes()); |
| 2646 | + // `s` is &str, so this is trivially correct. |
| 2647 | + chunk_tail[4..][..s.len()].copy_from_slice(s.as_bytes()); |
| 2648 | + |
| 2649 | + // Semantically this releases the memory for readers. |
| 2650 | + self.write_at.store(end as u32, Ordering::Release); |
| 2651 | + |
| 2652 | + start_absolute as u32 |
| 2653 | + } |
2572 | 2654 |
|
2573 | | - // SAFETY: we can extend the arena allocation to `'static` because we |
2574 | | - // only access these while the arena is still alive. |
2575 | | - let string: &'static str = unsafe { &*(string as *const str) }; |
| 2655 | + /// Get the allocated string at the passed index. |
| 2656 | + /// |
| 2657 | + /// Note that this **does not** check that the passed index is actually an index returned by |
| 2658 | + /// `alloc`. |
| 2659 | + #[allow(clippy::erasing_op)] |
| 2660 | + fn get(&self, idx: u32) -> &str { |
| 2661 | + let end = self.write_at.load(Ordering::Acquire); |
| 2662 | + |
| 2663 | + assert!(idx < end); |
| 2664 | + |
| 2665 | + let chunk = &self.chunks[idx as usize / CHUNK_SIZE].array; |
| 2666 | + let chunk = LazyLock::force(&chunk); |
| 2667 | + |
| 2668 | + let chunk_str_len = if idx as usize / CHUNK_SIZE == end as usize / CHUNK_SIZE { |
| 2669 | + end as usize % CHUNK_SIZE |
| 2670 | + } else { |
| 2671 | + // Note that zero-initialized bytes are valid UTF-8, so even if we don't fill a full |
| 2672 | + // chunk it remains valid for our purposes. |
| 2673 | + CHUNK_SIZE |
| 2674 | + }; |
| 2675 | + |
| 2676 | + // SAFETY: write_at only increases, and guarantees &str contents behind it. We checked above that |
| 2677 | + // we are < write_at. |
| 2678 | + let chunk_head = |
| 2679 | + unsafe { std::str::from_raw_parts(chunk.as_ptr().cast::<u8>(), chunk_str_len) }; |
| 2680 | + let idx = idx as usize % CHUNK_SIZE; |
| 2681 | + |
| 2682 | + let len = u32::from_ne_bytes(chunk_head[idx..idx + 4].as_bytes().try_into().unwrap()); |
| 2683 | + let len = ((len & (0x7f << (8 * 3))) >> 3) |
| 2684 | + | ((len & (0x7f << (8 * 2))) >> 2) |
| 2685 | + | ((len & (0x7f << (8 * 1))) >> 1) |
| 2686 | + | ((len & (0x7f << (8 * 0))) >> 0); |
| 2687 | + &chunk_head[idx + 4..idx + 4 + len as usize] |
| 2688 | + } |
| 2689 | +} |
| 2690 | + |
| 2691 | +pub(crate) struct Interner(&'static [&'static str], Lock<InternerInner>); |
| 2692 | + |
| 2693 | +struct InternerInner { |
| 2694 | + strings: hashbrown::HashTable<Symbol>, |
| 2695 | +} |
| 2696 | + |
| 2697 | +impl Interner { |
| 2698 | + fn prefill(init: &'static [&'static str]) -> Self { |
| 2699 | + assert!(init.len() < u16::MAX as usize); |
| 2700 | + let mut strings = hashbrown::HashTable::new(); |
| 2701 | + |
| 2702 | + for (idx, s) in init.iter().copied().enumerate() { |
| 2703 | + let mut hasher = FxHasher::default(); |
| 2704 | + s.hash(&mut hasher); |
| 2705 | + let hash = hasher.finish(); |
| 2706 | + strings.insert_unique(hash, Symbol::new(idx as u32), |val| { |
| 2707 | + // has to be from `init` because we haven't yet inserted anything except those. |
| 2708 | + BuildHasherDefault::<FxHasher>::default().hash_one(init[val.0.index()]) |
| 2709 | + }); |
| 2710 | + } |
2576 | 2711 |
|
2577 | | - // This second hash table lookup can be avoided by using `RawEntryMut`, |
2578 | | - // but this code path isn't hot enough for it to be worth it. See |
2579 | | - // #91445 for details. |
2580 | | - let (idx, is_new) = inner.strings.insert_full(string); |
2581 | | - debug_assert!(is_new); // due to the get_index_of check above |
| 2712 | + Interner(init, Lock::new(InternerInner { strings })) |
| 2713 | + } |
2582 | 2714 |
|
2583 | | - Symbol::new(idx as u32) |
| 2715 | + #[inline] |
| 2716 | + fn intern(&self, string: &str) -> Symbol { |
| 2717 | + let hash = BuildHasherDefault::<FxHasher>::default().hash_one(string); |
| 2718 | + let mut inner = self.1.lock(); |
| 2719 | + match inner.strings.find_entry(hash, |v| self.get(*v) == string) { |
| 2720 | + Ok(e) => return *e.get(), |
| 2721 | + Err(e) => { |
| 2722 | + let idx = GLOBAL_ARENA.alloc(string); |
| 2723 | + let res = Symbol::new(idx as u32); |
| 2724 | + |
| 2725 | + e.into_table().insert_unique(hash, res, |val| { |
| 2726 | + BuildHasherDefault::<FxHasher>::default().hash_one(self.get(*val)) |
| 2727 | + }); |
| 2728 | + |
| 2729 | + res |
| 2730 | + } |
| 2731 | + } |
2584 | 2732 | } |
2585 | 2733 |
|
2586 | 2734 | /// Get the symbol as a string. |
2587 | 2735 | /// |
2588 | 2736 | /// [`Symbol::as_str()`] should be used in preference to this function. |
2589 | | - fn get(&self, symbol: Symbol) -> &str { |
2590 | | - self.0.lock().strings.get_index(symbol.0.as_usize()).unwrap() |
| 2737 | + fn get(&self, symbol: Symbol) -> &'static str { |
| 2738 | + if symbol.0.index() < u16::MAX as usize { |
| 2739 | + self.0[symbol.0.index()] |
| 2740 | + } else { |
| 2741 | + GLOBAL_ARENA.get(symbol.0.as_u32()) |
| 2742 | + } |
2591 | 2743 | } |
2592 | 2744 | } |
2593 | 2745 |
|
|
0 commit comments