Skip to content

Commit d9423af

Browse files
Lazy-chunk Symbol interner
This fixes unsoundness in the Symbol::as_str by leaking the chunks (via the static memory).
1 parent d2f335d commit d9423af

File tree

7 files changed

+214
-51
lines changed

7 files changed

+214
-51
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3671,6 +3671,7 @@ dependencies = [
36713671
"either",
36723672
"elsa",
36733673
"ena",
3674+
"hashbrown 0.15.2",
36743675
"indexmap",
36753676
"jobserver",
36763677
"libc",
@@ -4568,6 +4569,7 @@ version = "0.0.0"
45684569
dependencies = [
45694570
"blake3",
45704571
"derive-where",
4572+
"hashbrown 0.15.2",
45714573
"indexmap",
45724574
"itoa",
45734575
"md-5",

compiler/rustc_data_structures/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ bitflags = "2.4.1"
1010
either = "1.0"
1111
elsa = "1.11.0"
1212
ena = "0.14.3"
13+
hashbrown = { version = "0.15.2", default-features = false }
1314
indexmap = "2.4.0"
1415
jobserver_crate = { version = "0.1.28", package = "jobserver" }
1516
measureme = "11"

compiler/rustc_data_structures/src/marker.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ impl_dyn_send!(
6868
[std::sync::LazyLock<T, F> where T: DynSend, F: DynSend]
6969
[std::collections::HashSet<K, S> where K: DynSend, S: DynSend]
7070
[std::collections::HashMap<K, V, S> where K: DynSend, V: DynSend, S: DynSend]
71+
[hashbrown::HashTable<T> where T: DynSend]
7172
[std::collections::BTreeMap<K, V, A> where K: DynSend, V: DynSend, A: std::alloc::Allocator + Clone + DynSend]
7273
[Vec<T, A> where T: DynSend, A: std::alloc::Allocator + DynSend]
7374
[Box<T, A> where T: ?Sized + DynSend, A: std::alloc::Allocator + DynSend]
@@ -142,6 +143,7 @@ impl_dyn_sync!(
142143
[std::sync::LazyLock<T, F> where T: DynSend + DynSync, F: DynSend]
143144
[std::collections::HashSet<K, S> where K: DynSync, S: DynSync]
144145
[std::collections::HashMap<K, V, S> where K: DynSync, V: DynSync, S: DynSync]
146+
[hashbrown::HashTable<T> where T: DynSync]
145147
[std::collections::BTreeMap<K, V, A> where K: DynSync, V: DynSync, A: std::alloc::Allocator + Clone + DynSync]
146148
[Vec<T, A> where T: DynSync, A: std::alloc::Allocator + DynSync]
147149
[Box<T, A> where T: ?Sized + DynSync, A: std::alloc::Allocator + DynSync]

compiler/rustc_span/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ edition = "2021"
77
# tidy-alphabetical-start
88
blake3 = "1.5.2"
99
derive-where = "1.2.7"
10+
hashbrown = { version = "0.15.2", default-features = false }
1011
indexmap = { version = "2.0.0" }
1112
itoa = "1.0"
1213
md5 = { package = "md-5", version = "0.10.0" }

compiler/rustc_span/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,13 @@
2727
#![feature(let_chains)]
2828
#![feature(map_try_insert)]
2929
#![feature(negative_impls)]
30+
#![feature(new_zeroed_alloc)]
3031
#![feature(read_buf)]
3132
#![feature(round_char_boundary)]
3233
#![feature(rustc_attrs)]
3334
#![feature(rustdoc_internals)]
35+
#![feature(str_from_raw_parts)]
36+
#![feature(sync_unsafe_cell)]
3437
#![warn(unreachable_pub)]
3538
// tidy-alphabetical-end
3639

compiler/rustc_span/src/symbol.rs

Lines changed: 198 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@
22
//! allows bidirectional lookup; i.e., given a value, one can easily find the
33
//! type, and vice versa.
44
5-
use std::hash::{Hash, Hasher};
5+
use std::cell::SyncUnsafeCell;
6+
use std::hash::{BuildHasher, BuildHasherDefault, Hash, Hasher};
7+
use std::sync::atomic::{AtomicU32, Ordering};
8+
use std::sync::{LazyLock, Mutex};
69
use std::{fmt, str};
710

8-
use rustc_arena::DroplessArena;
9-
use rustc_data_structures::fx::FxIndexSet;
11+
use rustc_data_structures::fx::FxHasher;
1012
use rustc_data_structures::stable_hasher::{
1113
HashStable, StableCompare, StableHasher, ToStableHashKey,
1214
};
@@ -2475,18 +2477,9 @@ impl Symbol {
24752477
with_session_globals(|session_globals| session_globals.symbol_interner.intern(string))
24762478
}
24772479

2478-
/// Access the underlying string. This is a slowish operation because it
2479-
/// requires locking the symbol interner.
2480-
///
2481-
/// Note that the lifetime of the return value is a lie. It's not the same
2482-
/// as `&self`, but actually tied to the lifetime of the underlying
2483-
/// interner. Interners are long-lived, and there are very few of them, and
2484-
/// this function is typically used for short-lived things, so in practice
2485-
/// it works out ok.
2480+
/// Access the underlying string.
24862481
pub fn as_str(&self) -> &str {
2487-
with_session_globals(|session_globals| unsafe {
2488-
std::mem::transmute::<&str, &str>(session_globals.symbol_interner.get(*self))
2489-
})
2482+
with_session_globals(|session_globals| session_globals.symbol_interner.get(*self))
24902483
}
24912484

24922485
pub fn as_u32(self) -> u32 {
@@ -2541,53 +2534,212 @@ impl StableCompare for Symbol {
25412534
}
25422535
}
25432536

2544-
pub(crate) struct Interner(Lock<InternerInner>);
2537+
// This is never de-initialized and stores interned &str in static storage.
2538+
// Each str is stored length-prefixed (u32), and we allow for random-access indexing with a u32
2539+
// index by direct lookup in the arena. Indices <2^16 are stored in a separate structure (they are
2540+
// pre-allocated at dense addresses so we can't use the same lockless O(1) hack for them).
2541+
static GLOBAL_ARENA: LazyLock<StringArena> = LazyLock::new(|| StringArena::new());
25452542

2546-
// The `&'static str`s in this type actually point into the arena.
2547-
//
2548-
// This type is private to prevent accidentally constructing more than one
2549-
// `Interner` on the same thread, which makes it easy to mix up `Symbol`s
2550-
// between `Interner`s.
2551-
struct InternerInner {
2552-
arena: DroplessArena,
2553-
strings: FxIndexSet<&'static str>,
2543+
const CHUNK_SIZE: usize = 4 * 1024 * 1024;
2544+
const CHUNKS: usize = (u32::MAX as usize).div_ceil(CHUNK_SIZE);
2545+
2546+
struct StringChunk {
2547+
array: LazyLock<Box<[SyncUnsafeCell<u8>; CHUNK_SIZE]>>,
25542548
}
25552549

2556-
impl Interner {
2557-
fn prefill(init: &[&'static str]) -> Self {
2558-
Interner(Lock::new(InternerInner {
2559-
arena: Default::default(),
2560-
strings: init.iter().copied().collect(),
2561-
}))
2550+
impl Default for StringChunk {
2551+
fn default() -> Self {
2552+
Self {
2553+
array: LazyLock::new(|| unsafe {
2554+
// SAFETY: Zero-init'd UnsafeCell<u8> is initialized and has no other invariants to
2555+
// worry about.
2556+
Box::new_zeroed().assume_init()
2557+
}),
2558+
}
25622559
}
2560+
}
25632561

2564-
#[inline]
2565-
fn intern(&self, string: &str) -> Symbol {
2566-
let mut inner = self.0.lock();
2567-
if let Some(idx) = inner.strings.get_index_of(string) {
2568-
return Symbol::new(idx as u32);
2562+
struct StringArena {
2563+
chunks: [StringChunk; CHUNKS],
2564+
// this guards writes to `write_at`, but not reads, which proceed lock-free. write_at is only moved
2565+
// forward so this ends up being safe.
2566+
writer: std::sync::Mutex<()>,
2567+
write_at: AtomicU32,
2568+
}
2569+
2570+
impl StringArena {
2571+
fn new() -> Self {
2572+
StringArena {
2573+
chunks: std::array::from_fn(|_| StringChunk::default()),
2574+
// Reserve 2^16 u32 indices -- these will be used for pre-filled interning where we
2575+
// have a dense SymbolIndex space. We could make this exact but it doesn't really
2576+
// matter for this initial test anyway.
2577+
write_at: AtomicU32::new(u32::from(u16::MAX)),
2578+
writer: Mutex::new(()),
25692579
}
2580+
}
25702581

2571-
let string: &str = inner.arena.alloc_str(string);
2582+
/// Copy the passed &str into this buffer. Returns an index that can be passed to `get` to
2583+
/// retrieve the &str back.
2584+
///
2585+
/// u32 is guaranteed to be at least u16::MAX (for the first call).
2586+
#[allow(clippy::erasing_op)]
2587+
fn alloc(&self, s: &str) -> u32 {
2588+
let _guard = self.writer.lock().unwrap();
2589+
// Allocate a chunk of the region, and fill it with the &str's length and bytes.
2590+
let start = self.write_at.load(Ordering::Acquire) as usize;
2591+
2592+
// If we have too large a string it won't fit into a single chunk, and we currently
2593+
// can't support storing it. 4MB+ Symbols feel unlikely, but if we hit issues in
2594+
// practice we can adjust the data structure to allow variable-sized chunks which would
2595+
// let us grow near-arbitrarily large for individual symbols.
2596+
assert!(s.len() <= CHUNK_SIZE);
2597+
2598+
// Assert we're in-bounds. Summing across chunks we have exactly u32::MAX bytes, so this
2599+
// implicitly checks we haven't exceeded our capacity overall.
2600+
//
2601+
// This is possible because if it crosses a chunk boundary we'll skip to that next chunk
2602+
// entirely.
2603+
let possible_end = start.checked_add(4).unwrap().checked_add(s.len()).unwrap();
2604+
2605+
// We fit into the current chunk.
2606+
let len = s.len();
2607+
2608+
// Make sure we can fit in our "utf-8 encoded" length.
2609+
//
2610+
// If all bytes we write are utf-8, we can treat the whole region of memory as just
2611+
// &str, which avoids needing to check that it's still utf-8 when indexing arbitrarily.
2612+
assert!(len < (1 << 28));
2613+
let encoded_len: u32 = ((len as u32 & (0x7f << (7 * 3))) << 3)
2614+
| ((len as u32 & (0x7f << (7 * 2))) << 2)
2615+
| ((len as u32 & (0x7f << (7 * 1))) << 1)
2616+
| (len as u32 & (0x7f << (7 * 0)));
2617+
2618+
// We're either in our initial chunk or the next one.
2619+
let (chunk, start_absolute, offset, end) =
2620+
if start / CHUNK_SIZE != possible_end / CHUNK_SIZE {
2621+
let chunk_idx = start / CHUNK_SIZE + 1;
2622+
(
2623+
&self.chunks[chunk_idx].array,
2624+
chunk_idx * CHUNK_SIZE,
2625+
0usize,
2626+
chunk_idx * CHUNK_SIZE + 4 + s.len(),
2627+
)
2628+
} else {
2629+
(&self.chunks[start / CHUNK_SIZE].array, start, start % CHUNK_SIZE, possible_end)
2630+
};
2631+
let chunk = LazyLock::force(&chunk);
2632+
2633+
// SAFETY:
2634+
//
2635+
// * _guard above protects against concurrent alloc's (so single writer for write_at)
2636+
// * write_at only increases, and shared access is only provided to memory < write_at.
2637+
// * all chunks are zero-init'd at allocation: no uninitialized memory here.
2638+
let chunk_tail = unsafe {
2639+
std::slice::from_raw_parts_mut(
2640+
chunk.as_ptr().cast::<u8>().add(offset).cast_mut(),
2641+
CHUNK_SIZE - offset,
2642+
)
2643+
};
2644+
// encoded_len is utf-8-compatible
2645+
chunk_tail[..4].copy_from_slice(&encoded_len.to_ne_bytes());
2646+
// `s` is &str, so this is trivially correct.
2647+
chunk_tail[4..][..s.len()].copy_from_slice(s.as_bytes());
2648+
2649+
// Semantically this releases the memory for readers.
2650+
self.write_at.store(end as u32, Ordering::Release);
2651+
2652+
start_absolute as u32
2653+
}
25722654

2573-
// SAFETY: we can extend the arena allocation to `'static` because we
2574-
// only access these while the arena is still alive.
2575-
let string: &'static str = unsafe { &*(string as *const str) };
2655+
/// Get the allocated string at the passed index.
2656+
///
2657+
/// Note that this **does not** check that the passed index is actually an index returned by
2658+
/// `alloc`.
2659+
#[allow(clippy::erasing_op)]
2660+
fn get(&self, idx: u32) -> &str {
2661+
let end = self.write_at.load(Ordering::Acquire);
2662+
2663+
assert!(idx < end);
2664+
2665+
let chunk = &self.chunks[idx as usize / CHUNK_SIZE].array;
2666+
let chunk = LazyLock::force(&chunk);
2667+
2668+
let chunk_str_len = if idx as usize / CHUNK_SIZE == end as usize / CHUNK_SIZE {
2669+
end as usize % CHUNK_SIZE
2670+
} else {
2671+
// Note that zero-initialized bytes are valid UTF-8, so even if we don't fill a full
2672+
// chunk it remains valid for our purposes.
2673+
CHUNK_SIZE
2674+
};
2675+
2676+
// SAFETY: write_at only increases, and guarantees &str contents behind it. We checked above that
2677+
// we are < write_at.
2678+
let chunk_head =
2679+
unsafe { std::str::from_raw_parts(chunk.as_ptr().cast::<u8>(), chunk_str_len) };
2680+
let idx = idx as usize % CHUNK_SIZE;
2681+
2682+
let len = u32::from_ne_bytes(chunk_head[idx..idx + 4].as_bytes().try_into().unwrap());
2683+
let len = ((len & (0x7f << (8 * 3))) >> 3)
2684+
| ((len & (0x7f << (8 * 2))) >> 2)
2685+
| ((len & (0x7f << (8 * 1))) >> 1)
2686+
| ((len & (0x7f << (8 * 0))) >> 0);
2687+
&chunk_head[idx + 4..idx + 4 + len as usize]
2688+
}
2689+
}
2690+
2691+
pub(crate) struct Interner(&'static [&'static str], Lock<InternerInner>);
2692+
2693+
struct InternerInner {
2694+
strings: hashbrown::HashTable<Symbol>,
2695+
}
2696+
2697+
impl Interner {
2698+
fn prefill(init: &'static [&'static str]) -> Self {
2699+
assert!(init.len() < u16::MAX as usize);
2700+
let mut strings = hashbrown::HashTable::new();
2701+
2702+
for (idx, s) in init.iter().copied().enumerate() {
2703+
let mut hasher = FxHasher::default();
2704+
s.hash(&mut hasher);
2705+
let hash = hasher.finish();
2706+
strings.insert_unique(hash, Symbol::new(idx as u32), |val| {
2707+
// has to be from `init` because we haven't yet inserted anything except those.
2708+
BuildHasherDefault::<FxHasher>::default().hash_one(init[val.0.index()])
2709+
});
2710+
}
25762711

2577-
// This second hash table lookup can be avoided by using `RawEntryMut`,
2578-
// but this code path isn't hot enough for it to be worth it. See
2579-
// #91445 for details.
2580-
let (idx, is_new) = inner.strings.insert_full(string);
2581-
debug_assert!(is_new); // due to the get_index_of check above
2712+
Interner(init, Lock::new(InternerInner { strings }))
2713+
}
25822714

2583-
Symbol::new(idx as u32)
2715+
#[inline]
2716+
fn intern(&self, string: &str) -> Symbol {
2717+
let hash = BuildHasherDefault::<FxHasher>::default().hash_one(string);
2718+
let mut inner = self.1.lock();
2719+
match inner.strings.find_entry(hash, |v| self.get(*v) == string) {
2720+
Ok(e) => return *e.get(),
2721+
Err(e) => {
2722+
let idx = GLOBAL_ARENA.alloc(string);
2723+
let res = Symbol::new(idx as u32);
2724+
2725+
e.into_table().insert_unique(hash, res, |val| {
2726+
BuildHasherDefault::<FxHasher>::default().hash_one(self.get(*val))
2727+
});
2728+
2729+
res
2730+
}
2731+
}
25842732
}
25852733

25862734
/// Get the symbol as a string.
25872735
///
25882736
/// [`Symbol::as_str()`] should be used in preference to this function.
2589-
fn get(&self, symbol: Symbol) -> &str {
2590-
self.0.lock().strings.get_index(symbol.0.as_usize()).unwrap()
2737+
fn get(&self, symbol: Symbol) -> &'static str {
2738+
if symbol.0.index() < u16::MAX as usize {
2739+
self.0[symbol.0.index()]
2740+
} else {
2741+
GLOBAL_ARENA.get(symbol.0.as_u32())
2742+
}
25912743
}
25922744
}
25932745

compiler/rustc_span/src/symbol/tests.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,17 @@ use crate::create_default_session_globals_then;
44
#[test]
55
fn interner_tests() {
66
let i = Interner::prefill(&[]);
7+
let dog = i.intern("dog");
78
// first one is zero:
8-
assert_eq!(i.intern("dog"), Symbol::new(0));
9+
assert_eq!(i.intern("dog"), dog);
910
// re-use gets the same entry:
10-
assert_eq!(i.intern("dog"), Symbol::new(0));
11+
assert_eq!(i.intern("dog"), dog);
1112
// different string gets a different #:
12-
assert_eq!(i.intern("cat"), Symbol::new(1));
13-
assert_eq!(i.intern("cat"), Symbol::new(1));
13+
let cat = i.intern("cat");
14+
assert_eq!(i.intern("cat"), cat);
15+
assert_eq!(i.intern("cat"), cat);
1416
// dog is still at zero
15-
assert_eq!(i.intern("dog"), Symbol::new(0));
17+
assert_eq!(i.intern("dog"), dog);
1618
}
1719

1820
#[test]

0 commit comments

Comments
 (0)