Skip to content

Commit 452086f

Browse files
alexcrichtonabrown
andauthored
Store stack maps in an ELF section (#10404)
* Store stack maps in an ELF section This commit moves the storage of stack maps from being embedded within serde-encoded information to instead being stored in a separate ELF section in the final executable. The motivation for this is to make this more easily debuggable with a `wasmtime objdump` command in the future but this additionally should have the nice side effect of making non-stack-maps modules have smaller encoded information (no need to encode an empty list) and additionally make stack-maps-using-modules faster to decode (no serde decoding, it's already "decoded"). This implements a scheme similar to the address map section where there's a "builder" for the section and then a separate half to decode the section. The same basic encoding, a bit map, is used. This is likely going to make accessing stack maps slightly slower, but if that's an issue we can tweak the representation and align things and/or use `usize` or such. * Update crates/environ/src/compile/stack_maps.rs Co-authored-by: Andrew Brown <[email protected]> * Review comments * More review comments * Fix MIRI test by enabling `unaligned` object feature --------- Co-authored-by: Andrew Brown <[email protected]>
1 parent 3e406d2 commit 452086f

File tree

16 files changed

+492
-171
lines changed

16 files changed

+492
-171
lines changed

cranelift/bitset/src/compound.rs

Lines changed: 66 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//! Compound bit sets.
22
3-
use crate::scalar::{self, ScalarBitSet};
3+
use crate::scalar::{self, ScalarBitSet, ScalarBitSetStorage};
44
use alloc::boxed::Box;
55
use core::{cmp, iter, mem};
66

@@ -45,8 +45,8 @@ use core::{cmp, iter, mem};
4545
feature = "enable-serde",
4646
derive(serde_derive::Serialize, serde_derive::Deserialize)
4747
)]
48-
pub struct CompoundBitSet {
49-
elems: Box<[ScalarBitSet<usize>]>,
48+
pub struct CompoundBitSet<T = usize> {
49+
elems: Box<[ScalarBitSet<T>]>,
5050
max: Option<u32>,
5151
}
5252

@@ -57,8 +57,6 @@ impl core::fmt::Debug for CompoundBitSet {
5757
}
5858
}
5959

60-
const BITS_PER_WORD: usize = mem::size_of::<usize>() * 8;
61-
6260
impl CompoundBitSet {
6361
/// Construct a new, empty bit set.
6462
///
@@ -75,6 +73,10 @@ impl CompoundBitSet {
7573
pub fn new() -> Self {
7674
CompoundBitSet::default()
7775
}
76+
}
77+
78+
impl<T: ScalarBitSetStorage> CompoundBitSet<T> {
79+
const BITS_PER_SCALAR: usize = mem::size_of::<T>() * 8;
7880

7981
/// Construct a new, empty bit set with space reserved to store any element
8082
/// `x` such that `x < capacity`.
@@ -86,14 +88,14 @@ impl CompoundBitSet {
8688
/// ```
8789
/// use cranelift_bitset::CompoundBitSet;
8890
///
89-
/// let bitset = CompoundBitSet::with_capacity(4096);
91+
/// let bitset = CompoundBitSet::<u32>::with_capacity(4096);
9092
///
9193
/// assert!(bitset.is_empty());
9294
/// assert!(bitset.capacity() >= 4096);
9395
/// ```
9496
#[inline]
9597
pub fn with_capacity(capacity: usize) -> Self {
96-
let mut bitset = Self::new();
98+
let mut bitset = Self::default();
9799
bitset.ensure_capacity(capacity);
98100
bitset
99101
}
@@ -144,7 +146,7 @@ impl CompoundBitSet {
144146
/// assert!(bitset.capacity() >= 999);
145147
///```
146148
pub fn capacity(&self) -> usize {
147-
self.elems.len() * BITS_PER_WORD
149+
self.elems.len() * Self::BITS_PER_SCALAR
148150
}
149151

150152
/// Is this bitset empty?
@@ -172,8 +174,8 @@ impl CompoundBitSet {
172174
/// `ScalarBitSet<usize>` at `self.elems[word]`.
173175
#[inline]
174176
fn word_and_bit(i: usize) -> (usize, u8) {
175-
let word = i / BITS_PER_WORD;
176-
let bit = i % BITS_PER_WORD;
177+
let word = i / Self::BITS_PER_SCALAR;
178+
let bit = i % Self::BITS_PER_SCALAR;
177179
let bit = u8::try_from(bit).unwrap();
178180
(word, bit)
179181
}
@@ -183,8 +185,8 @@ impl CompoundBitSet {
183185
#[inline]
184186
fn elem(word: usize, bit: u8) -> usize {
185187
let bit = usize::from(bit);
186-
debug_assert!(bit < BITS_PER_WORD);
187-
word * BITS_PER_WORD + bit
188+
debug_assert!(bit < Self::BITS_PER_SCALAR);
189+
word * Self::BITS_PER_SCALAR + bit
188190
}
189191

190192
/// Is `i` contained in this bitset?
@@ -461,19 +463,63 @@ impl CompoundBitSet {
461463
/// );
462464
/// ```
463465
#[inline]
464-
pub fn iter(&self) -> Iter<'_> {
466+
pub fn iter(&self) -> Iter<'_, T> {
465467
Iter {
466468
bitset: self,
467469
word: 0,
468470
sub: None,
469471
}
470472
}
473+
474+
/// Returns an iterator over the words of this bit-set or the in-memory
475+
/// representation of the bit set.
476+
///
477+
/// # Example
478+
///
479+
/// ```
480+
/// use cranelift_bitset::{CompoundBitSet, ScalarBitSet};
481+
///
482+
/// let mut bitset = CompoundBitSet::<u32>::default();
483+
///
484+
/// assert_eq!(
485+
/// bitset.iter_scalars().collect::<Vec<_>>(),
486+
/// [],
487+
/// );
488+
///
489+
/// bitset.insert(0);
490+
///
491+
/// assert_eq!(
492+
/// bitset.iter_scalars().collect::<Vec<_>>(),
493+
/// [ScalarBitSet(0x1)],
494+
/// );
495+
///
496+
/// bitset.insert(1);
497+
///
498+
/// assert_eq!(
499+
/// bitset.iter_scalars().collect::<Vec<_>>(),
500+
/// [ScalarBitSet(0x3)],
501+
/// );
502+
///
503+
/// bitset.insert(32);
504+
///
505+
/// assert_eq!(
506+
/// bitset.iter_scalars().collect::<Vec<_>>(),
507+
/// [ScalarBitSet(0x3), ScalarBitSet(0x1)],
508+
/// );
509+
/// ```
510+
pub fn iter_scalars(&self) -> impl Iterator<Item = ScalarBitSet<T>> + '_ {
511+
let nwords = match self.max {
512+
Some(n) => 1 + (n as usize / Self::BITS_PER_SCALAR),
513+
None => 0,
514+
};
515+
self.elems.iter().copied().take(nwords)
516+
}
471517
}
472518

473-
impl<'a> IntoIterator for &'a CompoundBitSet {
519+
impl<'a, T: ScalarBitSetStorage> IntoIterator for &'a CompoundBitSet<T> {
474520
type Item = usize;
475521

476-
type IntoIter = Iter<'a>;
522+
type IntoIter = Iter<'a, T>;
477523

478524
#[inline]
479525
fn into_iter(self) -> Self::IntoIter {
@@ -482,21 +528,21 @@ impl<'a> IntoIterator for &'a CompoundBitSet {
482528
}
483529

484530
/// An iterator over the elements in a [`CompoundBitSet`].
485-
pub struct Iter<'a> {
486-
bitset: &'a CompoundBitSet,
531+
pub struct Iter<'a, T = usize> {
532+
bitset: &'a CompoundBitSet<T>,
487533
word: usize,
488-
sub: Option<scalar::Iter<usize>>,
534+
sub: Option<scalar::Iter<T>>,
489535
}
490536

491-
impl Iterator for Iter<'_> {
537+
impl<T: ScalarBitSetStorage> Iterator for Iter<'_, T> {
492538
type Item = usize;
493539

494540
#[inline]
495541
fn next(&mut self) -> Option<usize> {
496542
loop {
497543
if let Some(sub) = &mut self.sub {
498544
if let Some(bit) = sub.next() {
499-
return Some(CompoundBitSet::elem(self.word, bit));
545+
return Some(CompoundBitSet::<T>::elem(self.word, bit));
500546
} else {
501547
self.word += 1;
502548
}

crates/cranelift/src/compiler.rs

Lines changed: 32 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ use crate::{array_call_signature, CompiledFunction, ModuleTextBuilder};
66
use crate::{builder::LinkOptions, wasm_call_signature, BuiltinFunctionSignatures};
77
use anyhow::{Context as _, Result};
88
use cranelift_codegen::binemit::CodeOffset;
9-
use cranelift_codegen::bitset::CompoundBitSet;
109
use cranelift_codegen::ir::condcodes::IntCC;
1110
use cranelift_codegen::ir::{self, InstBuilder, MemFlags, UserExternalName, UserFuncName, Value};
1211
use cranelift_codegen::isa::{
@@ -23,14 +22,15 @@ use std::any::Any;
2322
use std::cmp;
2423
use std::collections::HashMap;
2524
use std::mem;
25+
use std::ops::Range;
2626
use std::path;
2727
use std::sync::{Arc, Mutex};
2828
use wasmparser::{FuncValidatorAllocations, FunctionBody};
2929
use wasmtime_environ::{
3030
AddressMapSection, BuiltinFunctionIndex, CacheStore, CompileError, DefinedFuncIndex, FlagValue,
3131
FunctionBodyData, FunctionLoc, HostCall, ModuleTranslation, ModuleTypesBuilder, PtrSize,
32-
RelocationTarget, StackMapInformation, StaticModuleIndex, TrapEncodingBuilder, TrapSentinel,
33-
TripleExt, Tunables, VMOffsets, WasmFuncType, WasmFunctionInfo, WasmValType,
32+
RelocationTarget, StackMapSection, StaticModuleIndex, TrapEncodingBuilder, TrapSentinel,
33+
TripleExt, Tunables, VMOffsets, WasmFuncType, WasmValType,
3434
};
3535

3636
#[cfg(feature = "component-model")]
@@ -187,7 +187,7 @@ impl wasmtime_environ::Compiler for Compiler {
187187
func_index: DefinedFuncIndex,
188188
input: FunctionBodyData<'_>,
189189
types: &ModuleTypesBuilder,
190-
) -> Result<(WasmFunctionInfo, Box<dyn Any + Send>), CompileError> {
190+
) -> Result<Box<dyn Any + Send>, CompileError> {
191191
let isa = &*self.isa;
192192
let module = &translation.module;
193193
let func_index = module.func_index(func_index);
@@ -275,7 +275,7 @@ impl wasmtime_environ::Compiler for Compiler {
275275
&mut func_env,
276276
)?;
277277

278-
let (info, func) = compiler.finish_with_info(
278+
let func = compiler.finish_with_info(
279279
Some((&body, &self.tunables)),
280280
&format!("wasm_func_{}", func_index.as_u32()),
281281
)?;
@@ -284,7 +284,7 @@ impl wasmtime_environ::Compiler for Compiler {
284284
log::debug!("{:?} translated in {:?}", func_index, timing.total());
285285
log::trace!("{:?} timing info\n{}", func_index, timing);
286286

287-
Ok((info, Box::new(func)))
287+
Ok(Box::new(func))
288288
}
289289

290290
fn compile_array_to_wasm_trampoline(
@@ -450,6 +450,7 @@ impl wasmtime_environ::Compiler for Compiler {
450450
}
451451
let mut addrs = AddressMapSection::default();
452452
let mut traps = TrapEncodingBuilder::default();
453+
let mut stack_maps = StackMapSection::default();
453454

454455
let mut ret = Vec::with_capacity(funcs.len());
455456
for (i, (sym, func)) in funcs.iter().enumerate() {
@@ -459,6 +460,11 @@ impl wasmtime_environ::Compiler for Compiler {
459460
let addr = func.address_map();
460461
addrs.push(range.clone(), &addr.instructions);
461462
}
463+
clif_to_env_stack_maps(
464+
&mut stack_maps,
465+
range.clone(),
466+
func.buffer.user_stack_maps(),
467+
);
462468
traps.push(range.clone(), &func.traps().collect::<Vec<_>>());
463469
builder.append_padding(self.linkopts.padding_between_functions);
464470
let info = FunctionLoc {
@@ -473,6 +479,7 @@ impl wasmtime_environ::Compiler for Compiler {
473479
if self.tunables.generate_address_map {
474480
addrs.append_to(obj);
475481
}
482+
stack_maps.append_to(obj);
476483
traps.append_to(obj);
477484

478485
Ok(ret)
@@ -963,16 +970,14 @@ impl FunctionCompiler<'_> {
963970
}
964971

965972
fn finish(self, clif_filename: &str) -> Result<CompiledFunction, CompileError> {
966-
let (info, func) = self.finish_with_info(None, clif_filename)?;
967-
assert!(info.stack_maps.is_empty());
968-
Ok(func)
973+
self.finish_with_info(None, clif_filename)
969974
}
970975

971976
fn finish_with_info(
972977
mut self,
973978
body_and_tunables: Option<(&FunctionBody<'_>, &Tunables)>,
974979
clif_filename: &str,
975-
) -> Result<(WasmFunctionInfo, CompiledFunction), CompileError> {
980+
) -> Result<CompiledFunction, CompileError> {
976981
let context = &mut self.cx.codegen_context;
977982
let isa = &*self.compiler.isa;
978983

@@ -994,7 +999,7 @@ impl FunctionCompiler<'_> {
994999
write!(output, "{}", context.func.display()).unwrap();
9951000
}
9961001

997-
let mut compiled_code = compilation_result?;
1002+
let compiled_code = compilation_result?;
9981003

9991004
// Give wasm functions, user defined code, a "preferred" alignment
10001005
// instead of the minimum alignment as this can help perf in niche
@@ -1054,45 +1059,35 @@ impl FunctionCompiler<'_> {
10541059
}
10551060
}
10561061

1057-
let stack_maps =
1058-
clif_to_env_stack_maps(compiled_code.buffer.take_user_stack_maps().into_iter());
10591062
compiled_function
10601063
.set_sized_stack_slots(std::mem::take(&mut context.func.sized_stack_slots));
10611064
self.compiler.contexts.lock().unwrap().push(self.cx);
10621065

1063-
Ok((
1064-
WasmFunctionInfo {
1065-
start_srcloc: compiled_function.metadata().address_map.start_srcloc,
1066-
stack_maps: stack_maps.into(),
1067-
},
1068-
compiled_function,
1069-
))
1066+
Ok(compiled_function)
10701067
}
10711068
}
10721069

10731070
/// Convert from Cranelift's representation of a stack map to Wasmtime's
10741071
/// compiler-agnostic representation.
1072+
///
1073+
/// Here `section` is the wasmtime data section being created and `range` is the
1074+
/// range of the function being added. The `clif_stack_maps` entry is the raw
1075+
/// listing of stack maps from Cranelift.
10751076
fn clif_to_env_stack_maps(
1076-
clif_stack_maps: impl ExactSizeIterator<Item = (CodeOffset, u32, ir::UserStackMap)>,
1077-
) -> Vec<StackMapInformation> {
1078-
let mut stack_maps = Vec::with_capacity(clif_stack_maps.len());
1079-
for (code_offset, mapped_bytes, stack_map) in clif_stack_maps {
1080-
let mut bitset = CompoundBitSet::new();
1081-
for (ty, offset) in stack_map.entries() {
1077+
section: &mut StackMapSection,
1078+
range: Range<u64>,
1079+
clif_stack_maps: &[(CodeOffset, u32, ir::UserStackMap)],
1080+
) {
1081+
for (offset, frame_size, stack_map) in clif_stack_maps {
1082+
let mut frame_offsets = Vec::new();
1083+
for (ty, frame_offset) in stack_map.entries() {
10821084
assert_eq!(ty, ir::types::I32);
1083-
bitset.insert(usize::try_from(offset).unwrap());
1084-
}
1085-
if bitset.is_empty() {
1086-
continue;
1085+
frame_offsets.push(frame_offset);
10871086
}
1088-
let stack_map = wasmtime_environ::StackMap::new(mapped_bytes, bitset);
1089-
stack_maps.push(StackMapInformation {
1090-
code_offset,
1091-
stack_map,
1092-
});
1087+
let code_offset = range.start + u64::from(*offset);
1088+
assert!(code_offset < range.end);
1089+
section.push(code_offset, *frame_size, frame_offsets.into_iter());
10931090
}
1094-
stack_maps.sort_unstable_by_key(|info| info.code_offset);
1095-
stack_maps
10961091
}
10971092

10981093
fn declare_and_call(

crates/environ/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,11 @@ smallvec = { workspace = true, features = ['serde'] }
4242
clap = { workspace = true, features = ['default'] }
4343
env_logger = { workspace = true }
4444
wat = { workspace = true }
45+
# Fix a test parsing ELF files internally where the bytes themselves reside in a
46+
# `Vec<u8>` with no alignment requirements on it. By enabling the `unaligned`
47+
# feature we don't require anything to be aligned so it doesn't matter the
48+
# alignment of the bytes that we're reading.
49+
object = { workspace = true, features = ['unaligned'] }
4550

4651
[[example]]
4752
name = "factc"

crates/environ/src/address_map.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ fn parse_address_map(
5555
section: &[u8],
5656
) -> Option<(&[U32Bytes<LittleEndian>], &[U32Bytes<LittleEndian>])> {
5757
let mut section = Bytes(section);
58-
// NB: this matches the encoding written by `append_to` above.
58+
// NB: this matches the encoding written by `append_to` in the
59+
// `compile::address_map` module.
5960
let count = section.read::<U32Bytes<LittleEndian>>().ok()?;
6061
let count = usize::try_from(count.get(LittleEndian)).ok()?;
6162
let (offsets, section) =

0 commit comments

Comments
 (0)