Skip to content

Commit 90ae6de

Browse files
borsMuscraft
authored andcommitted
Auto merge of rust-lang#145737 - cjgillot:gvn-valueset, r=saethlin
GVN: stop hashing opaque values GVN generates values that are not meant to be unified with any other. For instance `Opaque` (aka we don't know anything), non-deterministic constants and borrows. The current algorithm generates a unique index, so the generated `Value` will be different from all the existing. This is wasteful, as we should not hash that `Value` at all. This PR proposes to do this. This involves partially reimplementing a `FxIndexSet`, but yields a small but consistent perf improvement (rust-lang#145737 (comment)).
2 parents 64e698d + df04be8 commit 90ae6de

File tree

3 files changed

+180
-44
lines changed

3 files changed

+180
-44
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4235,6 +4235,7 @@ name = "rustc_mir_transform"
42354235
version = "0.0.0"
42364236
dependencies = [
42374237
"either",
4238+
"hashbrown",
42384239
"itertools",
42394240
"rustc_abi",
42404241
"rustc_arena",

compiler/rustc_mir_transform/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ edition = "2024"
66
[dependencies]
77
# tidy-alphabetical-start
88
either = "1"
9+
hashbrown = "0.15"
910
itertools = "0.12"
1011
rustc_abi = { path = "../rustc_abi" }
1112
rustc_arena = { path = "../rustc_arena" }

compiler/rustc_mir_transform/src/gvn.rs

Lines changed: 178 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -85,16 +85,18 @@
8585
//! that contain `AllocId`s.
8686
8787
use std::borrow::Cow;
88+
use std::hash::{Hash, Hasher};
8889

8990
use either::Either;
91+
use hashbrown::hash_table::{Entry, HashTable};
9092
use itertools::Itertools as _;
9193
use rustc_abi::{self as abi, BackendRepr, FIRST_VARIANT, FieldIdx, Primitive, Size, VariantIdx};
9294
use rustc_const_eval::const_eval::DummyMachine;
9395
use rustc_const_eval::interpret::{
9496
ImmTy, Immediate, InterpCx, MemPlaceMeta, MemoryKind, OpTy, Projectable, Scalar,
9597
intern_const_alloc_for_constprop,
9698
};
97-
use rustc_data_structures::fx::{FxIndexSet, MutableValues};
99+
use rustc_data_structures::fx::FxHasher;
98100
use rustc_data_structures::graph::dominators::Dominators;
99101
use rustc_hir::def::DefKind;
100102
use rustc_index::bit_set::DenseBitSet;
@@ -152,9 +154,29 @@ impl<'tcx> crate::MirPass<'tcx> for GVN {
152154
}
153155

154156
newtype_index! {
157+
/// This represents a `Value` in the symbolic execution.
158+
#[debug_format = "_v{}"]
155159
struct VnIndex {}
156160
}
157161

162+
/// Marker type to forbid hashing and comparing opaque values.
163+
/// This struct should only be constructed by `ValueSet::insert_unique` to ensure we use that
164+
/// method to create non-unifiable values. It will ICE if used in `ValueSet::insert`.
165+
#[derive(Copy, Clone, Debug, Eq)]
166+
struct VnOpaque;
167+
impl PartialEq for VnOpaque {
168+
fn eq(&self, _: &VnOpaque) -> bool {
169+
// ICE if we try to compare unique values
170+
unreachable!()
171+
}
172+
}
173+
impl Hash for VnOpaque {
174+
fn hash<T: Hasher>(&self, _: &mut T) {
175+
// ICE if we try to hash unique values
176+
unreachable!()
177+
}
178+
}
179+
158180
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
159181
enum AddressKind {
160182
Ref(BorrowKind),
@@ -166,15 +188,17 @@ enum Value<'tcx> {
166188
// Root values.
167189
/// Used to represent values we know nothing about.
168190
/// The `usize` is a counter incremented by `new_opaque`.
169-
Opaque(usize),
191+
Opaque(VnOpaque),
170192
/// Evaluated or unevaluated constant value.
171193
Constant {
172194
value: Const<'tcx>,
173195
/// Some constants do not have a deterministic value. To avoid merging two instances of the
174196
/// same `Const`, we assign them an additional integer index.
175-
// `disambiguator` is 0 iff the constant is deterministic.
176-
disambiguator: usize,
197+
// `disambiguator` is `None` iff the constant is deterministic.
198+
disambiguator: Option<VnOpaque>,
177199
},
200+
201+
// Aggregates.
178202
/// An aggregate value, either tuple/closure/struct/enum.
179203
/// This does not contain unions, as we cannot reason with the value.
180204
Aggregate(VariantIdx, Vec<VnIndex>),
@@ -192,7 +216,7 @@ enum Value<'tcx> {
192216
place: Place<'tcx>,
193217
kind: AddressKind,
194218
/// Give each borrow and pointer a different provenance, so we don't merge them.
195-
provenance: usize,
219+
provenance: VnOpaque,
196220
},
197221

198222
// Extractions.
@@ -211,6 +235,107 @@ enum Value<'tcx> {
211235
},
212236
}
213237

238+
/// Stores and deduplicates pairs of `(Value, Ty)` into in `VnIndex` numbered values.
239+
///
240+
/// This data structure is mostly a partial reimplementation of `FxIndexMap<VnIndex, (Value, Ty)>`.
241+
/// We do not use a regular `FxIndexMap` to skip hashing values that are unique by construction,
242+
/// like opaque values, address with provenance and non-deterministic constants.
243+
struct ValueSet<'tcx> {
244+
indices: HashTable<VnIndex>,
245+
hashes: IndexVec<VnIndex, u64>,
246+
values: IndexVec<VnIndex, Value<'tcx>>,
247+
types: IndexVec<VnIndex, Ty<'tcx>>,
248+
}
249+
250+
impl<'tcx> ValueSet<'tcx> {
251+
fn new(num_values: usize) -> ValueSet<'tcx> {
252+
ValueSet {
253+
indices: HashTable::with_capacity(num_values),
254+
hashes: IndexVec::with_capacity(num_values),
255+
values: IndexVec::with_capacity(num_values),
256+
types: IndexVec::with_capacity(num_values),
257+
}
258+
}
259+
260+
/// Insert a `(Value, Ty)` pair without hashing or deduplication.
261+
/// This always creates a new `VnIndex`.
262+
#[inline]
263+
fn insert_unique(
264+
&mut self,
265+
ty: Ty<'tcx>,
266+
value: impl FnOnce(VnOpaque) -> Value<'tcx>,
267+
) -> VnIndex {
268+
let value = value(VnOpaque);
269+
270+
debug_assert!(match value {
271+
Value::Opaque(_) | Value::Address { .. } => true,
272+
Value::Constant { disambiguator, .. } => disambiguator.is_some(),
273+
_ => false,
274+
});
275+
276+
let index = self.hashes.push(0);
277+
let _index = self.types.push(ty);
278+
debug_assert_eq!(index, _index);
279+
let _index = self.values.push(value);
280+
debug_assert_eq!(index, _index);
281+
index
282+
}
283+
284+
/// Insert a `(Value, Ty)` pair to be deduplicated.
285+
/// Returns `true` as second tuple field if this value did not exist previously.
286+
#[allow(rustc::pass_by_value)] // closures take `&VnIndex`
287+
fn insert(&mut self, ty: Ty<'tcx>, value: Value<'tcx>) -> (VnIndex, bool) {
288+
debug_assert!(match value {
289+
Value::Opaque(_) | Value::Address { .. } => false,
290+
Value::Constant { disambiguator, .. } => disambiguator.is_none(),
291+
_ => true,
292+
});
293+
294+
let hash: u64 = {
295+
let mut h = FxHasher::default();
296+
value.hash(&mut h);
297+
ty.hash(&mut h);
298+
h.finish()
299+
};
300+
301+
let eq = |index: &VnIndex| self.values[*index] == value && self.types[*index] == ty;
302+
let hasher = |index: &VnIndex| self.hashes[*index];
303+
match self.indices.entry(hash, eq, hasher) {
304+
Entry::Occupied(entry) => {
305+
let index = *entry.get();
306+
(index, false)
307+
}
308+
Entry::Vacant(entry) => {
309+
let index = self.hashes.push(hash);
310+
entry.insert(index);
311+
let _index = self.values.push(value);
312+
debug_assert_eq!(index, _index);
313+
let _index = self.types.push(ty);
314+
debug_assert_eq!(index, _index);
315+
(index, true)
316+
}
317+
}
318+
}
319+
320+
/// Return the `Value` associated with the given `VnIndex`.
321+
#[inline]
322+
fn value(&self, index: VnIndex) -> &Value<'tcx> {
323+
&self.values[index]
324+
}
325+
326+
/// Return the type associated with the given `VnIndex`.
327+
#[inline]
328+
fn ty(&self, index: VnIndex) -> Ty<'tcx> {
329+
self.types[index]
330+
}
331+
332+
/// Replace the value associated with `index` with an opaque value.
333+
#[inline]
334+
fn forget(&mut self, index: VnIndex) {
335+
self.values[index] = Value::Opaque(VnOpaque);
336+
}
337+
}
338+
214339
struct VnState<'body, 'tcx> {
215340
tcx: TyCtxt<'tcx>,
216341
ecx: InterpCx<'tcx, DummyMachine>,
@@ -221,11 +346,9 @@ struct VnState<'body, 'tcx> {
221346
/// Locals that are assigned that value.
222347
// This vector does not hold all the values of `VnIndex` that we create.
223348
rev_locals: IndexVec<VnIndex, SmallVec<[Local; 1]>>,
224-
values: FxIndexSet<(Value<'tcx>, Ty<'tcx>)>,
349+
values: ValueSet<'tcx>,
225350
/// Values evaluated as constants if possible.
226351
evaluated: IndexVec<VnIndex, Option<OpTy<'tcx>>>,
227-
/// Counter to generate different values.
228-
next_opaque: usize,
229352
/// Cache the deref values.
230353
derefs: Vec<VnIndex>,
231354
ssa: &'body SsaLocals,
@@ -256,9 +379,8 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
256379
is_coroutine: body.coroutine.is_some(),
257380
locals: IndexVec::from_elem(None, local_decls),
258381
rev_locals: IndexVec::with_capacity(num_values),
259-
values: FxIndexSet::with_capacity_and_hasher(num_values, Default::default()),
382+
values: ValueSet::new(num_values),
260383
evaluated: IndexVec::with_capacity(num_values),
261-
next_opaque: 1,
262384
derefs: Vec::new(),
263385
ssa,
264386
dominators,
@@ -272,8 +394,7 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
272394

273395
#[instrument(level = "trace", skip(self), ret)]
274396
fn insert(&mut self, ty: Ty<'tcx>, value: Value<'tcx>) -> VnIndex {
275-
let (index, new) = self.values.insert_full((value, ty));
276-
let index = VnIndex::from_usize(index);
397+
let (index, new) = self.values.insert(ty, value);
277398
if new {
278399
// Grow `evaluated` and `rev_locals` here to amortize the allocations.
279400
let evaluated = self.eval_to_const(index);
@@ -285,18 +406,16 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
285406
index
286407
}
287408

288-
fn next_opaque(&mut self) -> usize {
289-
let next_opaque = self.next_opaque;
290-
self.next_opaque += 1;
291-
next_opaque
292-
}
293-
294409
/// Create a new `Value` for which we have no information at all, except that it is distinct
295410
/// from all the others.
296411
#[instrument(level = "trace", skip(self), ret)]
297412
fn new_opaque(&mut self, ty: Ty<'tcx>) -> VnIndex {
298-
let value = Value::Opaque(self.next_opaque());
299-
self.insert(ty, value)
413+
let index = self.values.insert_unique(ty, Value::Opaque);
414+
let _index = self.evaluated.push(None);
415+
debug_assert_eq!(index, _index);
416+
let _index = self.rev_locals.push(SmallVec::new());
417+
debug_assert_eq!(index, _index);
418+
index
300419
}
301420

302421
/// Create a new `Value::Address` distinct from all the others.
@@ -309,18 +428,49 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
309428
}
310429
AddressKind::Address(mutbl) => Ty::new_ptr(self.tcx, pty, mutbl.to_mutbl_lossy()),
311430
};
312-
let value = Value::Address { place, kind, provenance: self.next_opaque() };
313-
self.insert(ty, value)
431+
let index =
432+
self.values.insert_unique(ty, |provenance| Value::Address { place, kind, provenance });
433+
let evaluated = self.eval_to_const(index);
434+
let _index = self.evaluated.push(evaluated);
435+
debug_assert_eq!(index, _index);
436+
let _index = self.rev_locals.push(SmallVec::new());
437+
debug_assert_eq!(index, _index);
438+
index
439+
}
440+
441+
#[instrument(level = "trace", skip(self), ret)]
442+
fn insert_constant(&mut self, value: Const<'tcx>) -> VnIndex {
443+
let (index, new) = if value.is_deterministic() {
444+
// The constant is deterministic, no need to disambiguate.
445+
let constant = Value::Constant { value, disambiguator: None };
446+
self.values.insert(value.ty(), constant)
447+
} else {
448+
// Multiple mentions of this constant will yield different values,
449+
// so assign a different `disambiguator` to ensure they do not get the same `VnIndex`.
450+
let index = self.values.insert_unique(value.ty(), |disambiguator| Value::Constant {
451+
value,
452+
disambiguator: Some(disambiguator),
453+
});
454+
(index, true)
455+
};
456+
if new {
457+
let evaluated = self.eval_to_const(index);
458+
let _index = self.evaluated.push(evaluated);
459+
debug_assert_eq!(index, _index);
460+
let _index = self.rev_locals.push(SmallVec::new());
461+
debug_assert_eq!(index, _index);
462+
}
463+
index
314464
}
315465

316466
#[inline]
317467
fn get(&self, index: VnIndex) -> &Value<'tcx> {
318-
&self.values.get_index(index.as_usize()).unwrap().0
468+
self.values.value(index)
319469
}
320470

321471
#[inline]
322472
fn ty(&self, index: VnIndex) -> Ty<'tcx> {
323-
self.values.get_index(index.as_usize()).unwrap().1
473+
self.values.ty(index)
324474
}
325475

326476
/// Record that `local` is assigned `value`. `local` must be SSA.
@@ -331,33 +481,18 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
331481
self.rev_locals[value].push(local);
332482
}
333483

334-
fn insert_constant(&mut self, value: Const<'tcx>) -> VnIndex {
335-
let disambiguator = if value.is_deterministic() {
336-
// The constant is deterministic, no need to disambiguate.
337-
0
338-
} else {
339-
// Multiple mentions of this constant will yield different values,
340-
// so assign a different `disambiguator` to ensure they do not get the same `VnIndex`.
341-
let disambiguator = self.next_opaque();
342-
// `disambiguator: 0` means deterministic.
343-
debug_assert_ne!(disambiguator, 0);
344-
disambiguator
345-
};
346-
self.insert(value.ty(), Value::Constant { value, disambiguator })
347-
}
348-
349484
fn insert_bool(&mut self, flag: bool) -> VnIndex {
350485
// Booleans are deterministic.
351486
let value = Const::from_bool(self.tcx, flag);
352487
debug_assert!(value.is_deterministic());
353-
self.insert(self.tcx.types.bool, Value::Constant { value, disambiguator: 0 })
488+
self.insert(self.tcx.types.bool, Value::Constant { value, disambiguator: None })
354489
}
355490

356491
fn insert_scalar(&mut self, ty: Ty<'tcx>, scalar: Scalar) -> VnIndex {
357492
// Scalars are deterministic.
358493
let value = Const::from_scalar(self.tcx, scalar, ty);
359494
debug_assert!(value.is_deterministic());
360-
self.insert(ty, Value::Constant { value, disambiguator: 0 })
495+
self.insert(ty, Value::Constant { value, disambiguator: None })
361496
}
362497

363498
fn insert_tuple(&mut self, ty: Ty<'tcx>, values: Vec<VnIndex>) -> VnIndex {
@@ -372,8 +507,7 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
372507

373508
fn invalidate_derefs(&mut self) {
374509
for deref in std::mem::take(&mut self.derefs) {
375-
let opaque = self.next_opaque();
376-
self.values.get_index_mut2(deref.index()).unwrap().0 = Value::Opaque(opaque);
510+
self.values.forget(deref);
377511
}
378512
}
379513

@@ -1572,7 +1706,7 @@ impl<'tcx> VnState<'_, 'tcx> {
15721706
// This was already constant in MIR, do not change it. If the constant is not
15731707
// deterministic, adding an additional mention of it in MIR will not give the same value as
15741708
// the former mention.
1575-
if let Value::Constant { value, disambiguator: 0 } = *self.get(index) {
1709+
if let Value::Constant { value, disambiguator: None } = *self.get(index) {
15761710
debug_assert!(value.is_deterministic());
15771711
return Some(ConstOperand { span: DUMMY_SP, user_ty: None, const_: value });
15781712
}

0 commit comments

Comments
 (0)