From 3bea2022dacd5dc5077a36d0a588ad5458803d2e Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 30 Sep 2025 19:56:30 -0700 Subject: [PATCH 1/9] Wasmtime: implement debug instrumentation and basic host API to examine runtime state. This PR implements ideas from the [recent RFC] to serve as the basis for Wasm (guest) debugging: it adds a stackslot to each function translated from Wasm, stores to replicate Wasm VM state in the stackslot as the program runs, and metadata to describe the format of that state and allow reading it out at runtime. As an initial user of this state, this PR adds a basic "stack view" API that, from host code that has been called from Wasm, can examine Wasm frames currently on the stack and read out all of their locals and stack slots. Note in particular that this PR does not include breakpoints, watchpoints, stepped execution, or any sort of user interface for any of this; it is only a foundation. This PR still has a few unsatisfying bits that I intend to address: - The "stack view" performs some O(n) work when the view is initially taken, computing some internal data per frame. This is forced by the current design of `Backtrace`, which takes a closure and walks that closure over stack frames eagerly (rather than work as an iterator). It's got some impressive iterator-chain stuff going on internally, so refactoring it to the latter approach might not be *too* bad, but I haven't tackled it yet. A O(1) stack view, that is, one that does work only for frames as the host API is used to walk up the stack, is desirable because some use-cases may want to quickly examine e.g. only the deepest frame (say, running with a breakpoint condition that needs to read a particular local's value after each step). - It includes a new `Config::compiler_force_inlining()` option that is used only for testing that we get the correct frames after inlining. I couldn't get the existing flags to work on a Wasmtime config level and suspect there may be an existing bug there; I will try to split out a fix for it. This PR renames the existing `debug` option to `native_debug`, to distinguish it from the new approach. [recent RFC]: https://github.com/bytecodealliance/rfcs/pull/44 --- .github/workflows/main.yml | 2 +- Cargo.toml | 2 + crates/c-api/src/config.rs | 2 +- crates/cli-flags/Cargo.toml | 1 + crates/cli-flags/src/lib.rs | 13 +- crates/cranelift/src/compiled_function.rs | 14 +- crates/cranelift/src/compiler.rs | 91 +++- crates/cranelift/src/func_environ.rs | 269 +++++++++- .../src/translate/code_translator.rs | 40 +- .../src/translate/func_translator.rs | 28 +- crates/cranelift/src/translate/stack.rs | 62 ++- crates/environ/src/compile/frame_table.rs | 316 ++++++++++++ crates/environ/src/compile/mod.rs | 2 + crates/environ/src/frame_table.rs | 481 ++++++++++++++++++ crates/environ/src/lib.rs | 2 + crates/environ/src/obj.rs | 11 + crates/environ/src/tunables.rs | 5 + crates/wasmtime/Cargo.toml | 3 + crates/wasmtime/src/config.rs | 41 +- crates/wasmtime/src/engine/serialization.rs | 8 +- crates/wasmtime/src/runtime.rs | 6 +- crates/wasmtime/src/runtime/code_memory.rs | 12 +- crates/wasmtime/src/runtime/debug.rs | 460 +++++++++++------ crates/wasmtime/src/runtime/func.rs | 9 + crates/wasmtime/src/runtime/module.rs | 10 + .../wasmtime/src/runtime/module/registry.rs | 2 +- crates/wasmtime/src/runtime/native_debug.rs | 172 +++++++ crates/wasmtime/src/runtime/store.rs | 28 + crates/wasmtime/src/runtime/vm/instance.rs | 2 +- .../src/runtime/vm/traphandlers/backtrace.rs | 15 +- crates/winch/src/builder.rs | 4 + examples/fib-debug/main.rs | 2 +- src/commands/objdump.rs | 102 +++- tests/all/debug.rs | 107 ++++ tests/all/main.rs | 1 + tests/all/{debug => native_debug}/dump.rs | 0 tests/all/{debug => native_debug}/gdb.rs | 2 +- tests/all/{debug => native_debug}/lldb.rs | 34 +- tests/all/{debug => native_debug}/mod.rs | 0 tests/all/{debug => native_debug}/obj.rs | 2 +- .../satisfy_memory_import.wat | 0 tests/all/{debug => native_debug}/simulate.rs | 0 .../all/{debug => native_debug}/translate.rs | 0 tests/all/pulley.rs | 2 +- tests/all/winch_engine_features.rs | 2 +- tests/disas/debug-exceptions.wat | 86 ++++ 46 files changed, 2226 insertions(+), 227 deletions(-) create mode 100644 crates/environ/src/compile/frame_table.rs create mode 100644 crates/environ/src/frame_table.rs create mode 100644 crates/wasmtime/src/runtime/native_debug.rs create mode 100644 tests/all/debug.rs rename tests/all/{debug => native_debug}/dump.rs (100%) rename tests/all/{debug => native_debug}/gdb.rs (98%) rename tests/all/{debug => native_debug}/lldb.rs (93%) rename tests/all/{debug => native_debug}/mod.rs (100%) rename tests/all/{debug => native_debug}/obj.rs (95%) rename tests/all/{debug => native_debug}/satisfy_memory_import.wat (100%) rename tests/all/{debug => native_debug}/simulate.rs (100%) rename tests/all/{debug => native_debug}/translate.rs (100%) create mode 100644 tests/disas/debug-exceptions.wat diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index af3c9e924052..b52b91e7e3a5 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -960,7 +960,7 @@ jobs: sudo mkdir -p /usr/lib/local/lib/python3.10/dist-packages/lldb sudo ln -s /usr/lib/llvm-15/lib/python3.10/dist-packages/lldb/* /usr/lib/python3/dist-packages/lldb/ # Only testing release since it is more likely to expose issues with our low-level symbol handling. - cargo test --release --test all -- --ignored --test-threads 1 debug:: + cargo test --release --test all -- --ignored --test-threads 1 native_debug:: env: LLDB: lldb-18 WASI_SDK_PATH: /tmp/wasi-sdk diff --git a/Cargo.toml b/Cargo.toml index 246f78c2e8c2..810e4dc230df 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -469,6 +469,7 @@ default = [ "stack-switching", "winch", "pulley", + "debug", # Enable some nice features of clap by default, but they come at a binary size # cost, so allow disabling this through disabling of our own `default` @@ -531,6 +532,7 @@ gc-drc = ["gc", "wasmtime/gc-drc", "wasmtime-cli-flags/gc-drc"] gc-null = ["gc", "wasmtime/gc-null", "wasmtime-cli-flags/gc-null"] pulley = ["wasmtime-cli-flags/pulley"] stack-switching = ["wasmtime/stack-switching", "wasmtime-cli-flags/stack-switching"] +debug = ["wasmtime-cli-flags/debug", "wasmtime/debug"] # CLI subcommands for the `wasmtime` executable. See `wasmtime $cmd --help` # for more information on each subcommand. diff --git a/crates/c-api/src/config.rs b/crates/c-api/src/config.rs index 3113ab1627b9..ff0b09de5706 100644 --- a/crates/c-api/src/config.rs +++ b/crates/c-api/src/config.rs @@ -55,7 +55,7 @@ pub extern "C" fn wasm_config_new() -> Box { #[unsafe(no_mangle)] pub extern "C" fn wasmtime_config_debug_info_set(c: &mut wasm_config_t, enable: bool) { - c.config.debug_info(enable); + c.config.native_debug_info(enable); } #[unsafe(no_mangle)] diff --git a/crates/cli-flags/Cargo.toml b/crates/cli-flags/Cargo.toml index 1469c9535b15..b5d7420a9e42 100644 --- a/crates/cli-flags/Cargo.toml +++ b/crates/cli-flags/Cargo.toml @@ -40,3 +40,4 @@ threads = ["wasmtime/threads"] memory-protection-keys = ["wasmtime/memory-protection-keys"] pulley = ["wasmtime/pulley"] stack-switching = ["wasmtime/stack-switching"] +debug = ["wasmtime/debug"] \ No newline at end of file diff --git a/crates/cli-flags/src/lib.rs b/crates/cli-flags/src/lib.rs index f0cea981c550..a191c90f27ca 100644 --- a/crates/cli-flags/src/lib.rs +++ b/crates/cli-flags/src/lib.rs @@ -263,7 +263,9 @@ wasmtime_option_group! { #[serde(rename_all = "kebab-case", deny_unknown_fields)] pub struct DebugOptions { /// Enable generation of DWARF debug information in compiled code. - pub debug_info: Option, + pub native_debug_info: Option, + /// Enable debug instrumentation for perfect value reconstruction. + pub debug_instrumentation: Option, /// Configure whether compiled code can map native addresses to wasm. pub address_map: Option, /// Configure whether logging is enabled. @@ -701,8 +703,13 @@ impl CommonOptions { enable => config.cranelift_debug_verifier(enable), true => err, } - if let Some(enable) = self.debug.debug_info { - config.debug_info(enable); + if let Some(enable) = self.debug.native_debug_info { + config.native_debug_info(enable); + } + match_feature! { + ["debug" : self.debug.debug_instrumentation] + enable => config.debug_instrumentation(enable), + _ => err, } if self.debug.coredump.is_some() { #[cfg(feature = "coredump")] diff --git a/crates/cranelift/src/compiled_function.rs b/crates/cranelift/src/compiled_function.rs index ca1faab58eb0..9fa91295d6eb 100644 --- a/crates/cranelift/src/compiled_function.rs +++ b/crates/cranelift/src/compiled_function.rs @@ -1,7 +1,7 @@ use crate::{Relocation, mach_reloc_to_reloc, mach_trap_to_trap}; use cranelift_codegen::{ - Final, MachBufferFinalized, MachSrcLoc, ValueLabelsRanges, ir, isa::unwind::CfaUnwindInfo, - isa::unwind::UnwindInfo, + Final, MachBufferFinalized, MachBufferFrameLayout, MachSrcLoc, ValueLabelsRanges, ir, + isa::unwind::CfaUnwindInfo, isa::unwind::UnwindInfo, }; use wasmtime_environ::{FilePos, InstructionAddressMap, PrimaryMap, TrapInformation}; @@ -44,8 +44,6 @@ pub struct CompiledFunctionMetadata { pub cfa_unwind_info: Option, /// Mapping of value labels and their locations. pub value_labels_ranges: ValueLabelsRanges, - /// Allocated stack slots. - pub sized_stack_slots: ir::StackSlots, /// Start source location. pub start_srcloc: FilePos, /// End source location. @@ -155,9 +153,11 @@ impl CompiledFunction { self.metadata.cfa_unwind_info = Some(unwind); } - /// Set the sized stack slots. - pub fn set_sized_stack_slots(&mut self, slots: ir::StackSlots) { - self.metadata.sized_stack_slots = slots; + /// Returns the frame-layout metadata for this function. + pub fn frame_layout(&self) -> &MachBufferFrameLayout { + self.buffer + .frame_layout() + .expect("Single-function MachBuffer must have frame layout information") } } diff --git a/crates/cranelift/src/compiler.rs b/crates/cranelift/src/compiler.rs index 15dc427f83c7..e5811fe33621 100644 --- a/crates/cranelift/src/compiler.rs +++ b/crates/cranelift/src/compiler.rs @@ -14,7 +14,10 @@ use cranelift_codegen::isa::{ unwind::{UnwindInfo, UnwindInfoKind}, }; use cranelift_codegen::print_errors::pretty_error; -use cranelift_codegen::{CompiledCode, Context, FinalizedMachCallSite}; +use cranelift_codegen::{ + CompiledCode, Context, FinalizedMachCallSite, MachBufferDebugTagList, MachBufferFrameLayout, + MachDebugTagPos, +}; use cranelift_entity::PrimaryMap; use cranelift_frontend::FunctionBuilder; use object::write::{Object, StandardSegment, SymbolId}; @@ -28,13 +31,13 @@ use std::ops::Range; use std::path; use std::sync::{Arc, Mutex}; use wasmparser::{FuncValidatorAllocations, FunctionBody}; -use wasmtime_environ::obj::ELF_WASMTIME_EXCEPTIONS; +use wasmtime_environ::obj::{ELF_WASMTIME_EXCEPTIONS, ELF_WASMTIME_FRAMES}; use wasmtime_environ::{ Abi, AddressMapSection, BuiltinFunctionIndex, CacheStore, CompileError, CompiledFunctionBody, - DefinedFuncIndex, FlagValue, FuncKey, FunctionBodyData, FunctionLoc, HostCall, - InliningCompiler, ModuleTranslation, ModuleTypesBuilder, PtrSize, StackMapSection, - StaticModuleIndex, TrapEncodingBuilder, TrapSentinel, TripleExt, Tunables, VMOffsets, - WasmFuncType, WasmValType, + DefinedFuncIndex, FlagValue, FrameInstPos, FrameStackShape, FrameTableBuilder, FuncKey, + FunctionBodyData, FunctionLoc, HostCall, InliningCompiler, ModuleTranslation, + ModuleTypesBuilder, PtrSize, StackMapSection, StaticModuleIndex, TrapEncodingBuilder, + TrapSentinel, TripleExt, Tunables, VMOffsets, WasmFuncType, WasmValType, }; use wasmtime_unwinder::ExceptionTableBuilder; @@ -252,7 +255,7 @@ impl wasmtime_environ::Compiler for Compiler { context.func.collect_debug_info(); } - let mut func_env = FuncEnvironment::new(self, translation, types, wasm_func_ty); + let mut func_env = FuncEnvironment::new(self, translation, types, wasm_func_ty, key); // The `stack_limit` global value below is the implementation of stack // overflow checks in Wasmtime. @@ -575,6 +578,7 @@ impl wasmtime_environ::Compiler for Compiler { let mut traps = TrapEncodingBuilder::default(); let mut stack_maps = StackMapSection::default(); let mut exception_tables = ExceptionTableBuilder::default(); + let mut frame_tables = FrameTableBuilder::default(); let mut ret = Vec::with_capacity(funcs.len()); for (i, (sym, func)) in funcs.iter().enumerate() { @@ -602,6 +606,16 @@ impl wasmtime_environ::Compiler for Compiler { range.clone(), func.buffer.call_sites(), )?; + if self.tunables.debug_instrumentation + && let Some(frame_layout) = func.buffer.frame_layout() + { + clif_to_env_frame_tables( + &mut frame_tables, + range.clone(), + func.buffer.debug_tags(), + frame_layout, + )?; + } builder.append_padding(self.linkopts.padding_between_functions); let info = FunctionLoc { @@ -628,6 +642,17 @@ impl wasmtime_environ::Compiler for Compiler { obj.append_section_data(exception_section, bytes, 1); }); + if self.tunables.debug_instrumentation { + let frame_table_section = obj.add_section( + obj.segment_name(StandardSegment::Data).to_vec(), + ELF_WASMTIME_FRAMES.as_bytes().to_vec(), + SectionKind::ReadOnlyData, + ); + frame_tables.serialize(|bytes| { + obj.append_section_data(frame_table_section, bytes, 1); + }); + } + Ok(ret) } @@ -1401,8 +1426,6 @@ impl FunctionCompiler<'_> { } } - compiled_function - .set_sized_stack_slots(std::mem::take(&mut context.func.sized_stack_slots)); self.compiler.contexts.lock().unwrap().push(self.cx); Ok(compiled_function) @@ -1447,6 +1470,56 @@ fn clif_to_env_exception_tables<'a>( builder.add_func(CodeOffset::try_from(range.start).unwrap(), call_sites) } +/// Convert from Cranelift's representation of frame state slots and +/// debug tags to Wasmtime's serialized metadata. +fn clif_to_env_frame_tables<'a>( + builder: &mut FrameTableBuilder, + range: Range, + tag_sites: impl Iterator>, + frame_layout: &MachBufferFrameLayout, +) -> anyhow::Result<()> { + let mut frame_descriptors = HashMap::new(); + for tag_site in tag_sites { + // Split into frames; each has three debug tags. + let mut frames = vec![]; + for frame_tags in tag_site.tags.chunks_exact(3) { + let &[ + ir::DebugTag::StackSlot(slot), + ir::DebugTag::User(wasm_pc), + ir::DebugTag::User(stack_shape), + ] = frame_tags + else { + panic!("Invalid tags"); + }; + + let frame_descriptor = *frame_descriptors.entry(slot).or_insert_with(|| { + let slot_to_fp_offset = + frame_layout.frame_to_fp_offset - frame_layout.stackslots[slot].offset; + let descriptor = frame_layout.stackslots[slot].descriptor.clone(); + builder.add_frame_descriptor(slot_to_fp_offset, descriptor) + }); + + frames.push(( + wasm_pc, + frame_descriptor, + FrameStackShape::from_raw(stack_shape), + )); + } + + let native_pc_in_code_section = u32::try_from(range.start) + .unwrap() + .checked_add(tag_site.offset) + .unwrap(); + let pos = match tag_site.pos { + MachDebugTagPos::Post => FrameInstPos::Post, + MachDebugTagPos::Pre => FrameInstPos::Pre, + }; + builder.add_program_point(native_pc_in_code_section, pos, &frames); + } + + Ok(()) +} + fn save_last_wasm_entry_context( builder: &mut FunctionBuilder, pointer_type: ir::Type, diff --git a/crates/cranelift/src/func_environ.rs b/crates/cranelift/src/func_environ.rs index 1ba0a3c9b407..91ff563c60ae 100644 --- a/crates/cranelift/src/func_environ.rs +++ b/crates/cranelift/src/func_environ.rs @@ -21,13 +21,14 @@ use cranelift_frontend::Variable; use cranelift_frontend::{FuncInstBuilder, FunctionBuilder}; use smallvec::{SmallVec, smallvec}; use std::mem; -use wasmparser::{Operator, WasmFeatures}; +use wasmparser::{FuncValidator, Operator, WasmFeatures, WasmModuleResources}; use wasmtime_environ::{ BuiltinFunctionIndex, DataIndex, DefinedFuncIndex, ElemIndex, EngineOrModuleTypeIndex, - FuncIndex, FuncKey, GlobalIndex, IndexType, Memory, MemoryIndex, Module, - ModuleInternedTypeIndex, ModuleTranslation, ModuleTypesBuilder, PtrSize, Table, TableIndex, - TagIndex, TripleExt, Tunables, TypeConvert, TypeIndex, VMOffsets, WasmCompositeInnerType, - WasmFuncType, WasmHeapTopType, WasmHeapType, WasmRefType, WasmResult, WasmValType, + FrameStateSlotBuilder, FrameValType, FuncIndex, FuncKey, GlobalIndex, IndexType, Memory, + MemoryIndex, Module, ModuleInternedTypeIndex, ModuleTranslation, ModuleTypesBuilder, PtrSize, + Table, TableIndex, TagIndex, TripleExt, Tunables, TypeConvert, TypeIndex, VMOffsets, + WasmCompositeInnerType, WasmFuncType, WasmHeapTopType, WasmHeapType, WasmRefType, WasmResult, + WasmValType, }; use wasmtime_environ::{FUNCREF_INIT_BIT, FUNCREF_MASK}; use wasmtime_math::f64_cvt_to_int_bounds; @@ -96,6 +97,7 @@ wasmtime_environ::foreach_builtin_function!(declare_function_signatures); pub struct FuncEnvironment<'module_environment> { compiler: &'module_environment Compiler, isa: &'module_environment (dyn TargetIsa + 'module_environment), + key: FuncKey, pub(crate) module: &'module_environment Module, types: &'module_environment ModuleTypesBuilder, wasm_func_ty: &'module_environment WasmFuncType, @@ -182,6 +184,10 @@ pub struct FuncEnvironment<'module_environment> { /// slot on this function's stack to be used for the /// current continuation's `values` field. stack_switching_values_buffer: Option, + + /// The stack-slot used for exposing Wasm state via debug + /// instrumentation, if any, and the builder containing its metadata. + pub(crate) state_slot: Option<(ir::StackSlot, FrameStateSlotBuilder)>, } impl<'module_environment> FuncEnvironment<'module_environment> { @@ -190,6 +196,7 @@ impl<'module_environment> FuncEnvironment<'module_environment> { translation: &'module_environment ModuleTranslation<'module_environment>, types: &'module_environment ModuleTypesBuilder, wasm_func_ty: &'module_environment WasmFuncType, + key: FuncKey, ) -> Self { let tunables = compiler.tunables(); let builtin_functions = BuiltinFunctions::new(compiler); @@ -199,6 +206,7 @@ impl<'module_environment> FuncEnvironment<'module_environment> { let _ = BuiltinFunctions::raise; Self { + key, isa: compiler.isa(), module: &translation.module, compiler, @@ -238,6 +246,8 @@ impl<'module_environment> FuncEnvironment<'module_environment> { stack_switching_handler_list_buffer: None, stack_switching_values_buffer: None, + + state_slot: None, } } @@ -1179,6 +1189,187 @@ impl<'module_environment> FuncEnvironment<'module_environment> { let ty = self.module.types[type_index].unwrap_module_type_index(); self.types[ty].unwrap_func().params().len() } + + /// Initialize the state slot with an empty layout. + pub(crate) fn create_state_slot(&mut self, builder: &mut FunctionBuilder) { + if self.tunables.debug_instrumentation { + let frame_builder = FrameStateSlotBuilder::new(self.key, self.pointer_type().bytes()); + + // Initially zero-size and with no descriptor; we will fill in + // this info once we're done with the function body. + let slot = builder.func.create_sized_stack_slot(ir::StackSlotData::new( + ir::StackSlotKind::ExplicitSlot, + 0, + 0, + vec![], + )); + + self.state_slot = Some((slot, frame_builder)); + } + } + + /// Update the state slot layout with a new layout given a local. + pub(crate) fn add_state_slot_local( + &mut self, + builder: &mut FunctionBuilder, + ty: WasmValType, + init: Option, + ) { + if let Some((slot, b)) = &mut self.state_slot { + let offset = b.add_local(FrameValType::from(ty)); + if let Some(init) = init { + builder.ins().stack_store(init, *slot, offset.offset()); + } + } + } + + fn update_state_slot_stack( + &mut self, + validator: &FuncValidator, + builder: &mut FunctionBuilder, + stack: &mut FuncTranslationStacks, + ) -> WasmResult<()> { + // Take ownership of the state-slot builder temporarily rather + // than mutably borrowing so we can invoke a method below. + if let Some((slot, mut b)) = self.state_slot.take() { + // If the stack-shape stack is shorter than the value + // stack, that means that values were popped and then new + // values were pushed; hence, these operand-stack values + // are "dirty" and need to be flushed to the stackslot. + for i in stack.stack_shape.len()..stack.stack.len() { + let parent_shape = i + .checked_sub(1) + .map(|parent_idx| stack.stack_shape[parent_idx]); + if let Some(this_ty) = validator + .get_operand_type(stack.stack.len() - i - 1) + .expect("Index should not be out of range") + { + let wasm_ty = self.convert_valtype(this_ty)?; + let (this_shape, offset) = + b.push_stack(parent_shape, FrameValType::from(wasm_ty)); + stack.stack_shape.push(this_shape); + + let value = stack.stack[i]; + builder.ins().stack_store(value, slot, offset.offset()); + } else { + // Unreachable code with unknown type -- no + // flushes for this or later-pushed values. + break; + } + } + + self.state_slot = Some((slot, b)); + } + + Ok(()) + } + + pub(crate) fn debug_tags( + &self, + stack: &FuncTranslationStacks, + srcloc: ir::SourceLoc, + ) -> Vec { + if let Some((slot, _b)) = &self.state_slot { + stack.assert_debug_stack_is_synced(); + let stack_shape = stack + .stack_shape + .last() + .map(|s| s.raw()) + .unwrap_or(u32::MAX); + let pc = srcloc.bits(); + vec![ + ir::DebugTag::StackSlot(*slot), + ir::DebugTag::User(pc), + ir::DebugTag::User(stack_shape), + ] + } else { + vec![] + } + } + + fn set_debug_tags( + &self, + builder: &mut FunctionBuilder, + stack: &FuncTranslationStacks, + srcloc: ir::SourceLoc, + ) { + if self.state_slot.is_some() { + let tags = self.debug_tags(stack, srcloc); + builder.set_debug_tags(tags); + } + } + + fn finish_debug_metadata(&self, builder: &mut FunctionBuilder) { + if let Some((slot, b)) = &self.state_slot { + builder.func.sized_stack_slots[*slot].descriptor = b.serialize(); + builder.func.sized_stack_slots[*slot].size = b.size(); + } + } + + /// Store a new value for a local in the state slot, if present. + pub(crate) fn state_slot_local_set( + &self, + builder: &mut FunctionBuilder, + local: u32, + value: ir::Value, + ) { + if let Some((slot, b)) = &self.state_slot { + let offset = b.local_offset(local); + builder.ins().stack_store(value, *slot, offset.offset()); + } + } + + fn update_state_slot_vmctx(&mut self, builder: &mut FunctionBuilder) { + if let &Some((slot, _)) = &self.state_slot { + let vmctx = self.vmctx_val(&mut builder.cursor()); + builder.ins().stack_store(vmctx, slot, 0); + } + } + + /// Perform debug instrumentation prior to translating an + /// operator. + pub(crate) fn debug_instrumentation_before_op( + &mut self, + builder: &mut FunctionBuilder, + stack: &mut FuncTranslationStacks, + srcloc: ir::SourceLoc, + ) -> WasmResult<()> { + if stack.reachable() { + self.set_debug_tags(builder, stack, srcloc); + } + Ok(()) + } + + /// Perform debug instrumentation after translating an operator. + pub(crate) fn debug_instrumentation_after_op( + &mut self, + validator: &FuncValidator, + builder: &mut FunctionBuilder, + stack: &mut FuncTranslationStacks, + ) -> WasmResult<()> { + if stack.reachable() { + self.update_state_slot_stack(validator, builder, stack)?; + } + Ok(()) + } + + /// Perform debug instrumentation before translating the whole function. + pub(crate) fn debug_instrumentation_at_start( + &mut self, + builder: &mut FunctionBuilder, + ) -> WasmResult<()> { + self.update_state_slot_vmctx(builder); + Ok(()) + } + + /// Perform debug instrumentation after translating the whole function. + pub(crate) fn debug_instrumentation_at_end( + &mut self, + builder: &mut FunctionBuilder, + ) -> WasmResult<()> { + self.finish_debug_metadata(builder); + Ok(()) + } } #[derive(Default)] @@ -1691,6 +1882,8 @@ impl FuncEnvironment<'_> { struct Call<'a, 'func, 'module_env> { builder: &'a mut FunctionBuilder<'func>, env: &'a mut FuncEnvironment<'module_env>, + stack: &'a FuncTranslationStacks, + srcloc: ir::SourceLoc, handlers: Vec<(Option, Block)>, tail: bool, } @@ -1712,12 +1905,16 @@ impl<'a, 'func, 'module_env> Call<'a, 'func, 'module_env> { pub fn new( builder: &'a mut FunctionBuilder<'func>, env: &'a mut FuncEnvironment<'module_env>, + stack: &'a FuncTranslationStacks, + srcloc: ir::SourceLoc, handlers: impl IntoIterator, Block)>, ) -> Self { let handlers = handlers.into_iter().collect(); Call { builder, env, + stack, + srcloc, handlers, tail: false, } @@ -1727,10 +1924,14 @@ impl<'a, 'func, 'module_env> Call<'a, 'func, 'module_env> { pub fn new_tail( builder: &'a mut FunctionBuilder<'func>, env: &'a mut FuncEnvironment<'module_env>, + stack: &'a FuncTranslationStacks, + srcloc: ir::SourceLoc, ) -> Self { Call { builder, env, + stack, + srcloc, handlers: vec![], tail: true, } @@ -2213,15 +2414,17 @@ impl<'a, 'func, 'module_env> Call<'a, 'func, 'module_env> { } else if let Some((exception_table, continuation_block, results)) = self.exception_table(sig_ref) { - self.builder.ins().try_call(callee, args, exception_table); + let inst = self.builder.ins().try_call(callee, args, exception_table); self.handle_call_result_stackmap(&results, sig_ref); self.builder.switch_to_block(continuation_block); self.builder.seal_block(continuation_block); + self.attach_tags(inst); results } else { let inst = self.builder.ins().call(callee, args); let results = self.results_from_call_inst(inst); self.handle_call_result_stackmap(&results, sig_ref); + self.attach_tags(inst); results } } @@ -2240,20 +2443,30 @@ impl<'a, 'func, 'module_env> Call<'a, 'func, 'module_env> { } else if let Some((exception_table, continuation_block, results)) = self.exception_table(sig_ref) { - self.builder + let inst = self + .builder .ins() .try_call_indirect(func_addr, args, exception_table); self.handle_call_result_stackmap(&results, sig_ref); self.builder.switch_to_block(continuation_block); self.builder.seal_block(continuation_block); + self.attach_tags(inst); results } else { let inst = self.builder.ins().call_indirect(sig_ref, func_addr, args); let results = self.results_from_call_inst(inst); self.handle_call_result_stackmap(&results, sig_ref); + self.attach_tags(inst); results } } + + fn attach_tags(&mut self, inst: ir::Inst) { + let tags = self.env.debug_tags(self.stack, self.srcloc); + if !tags.is_empty() { + self.builder.func.debug_tags.set(inst, tags); + } + } } impl TypeConvert for FuncEnvironment<'_> { @@ -2307,12 +2520,20 @@ impl FuncEnvironment<'_> { &self.heaps } - pub fn is_wasm_parameter(&self, _signature: &ir::Signature, index: usize) -> bool { + pub fn is_wasm_parameter(&self, index: usize) -> bool { // The first two parameters are the vmctx and caller vmctx. The rest are // the wasm parameters. index >= 2 } + pub fn clif_param_as_wasm_param(&self, index: usize) -> Option { + if index >= 2 { + Some(self.wasm_func_ty.params()[index - 2]) + } else { + None + } + } + pub fn param_needs_stack_map(&self, _signature: &ir::Signature, index: usize) -> bool { // Skip the caller and callee vmctx. if index < 2 { @@ -2990,6 +3211,8 @@ impl FuncEnvironment<'_> { pub fn translate_call_indirect<'a>( &mut self, builder: &'a mut FunctionBuilder, + stack: &'a FuncTranslationStacks, + srcloc: ir::SourceLoc, features: &WasmFeatures, table_index: TableIndex, ty_index: TypeIndex, @@ -2998,7 +3221,7 @@ impl FuncEnvironment<'_> { call_args: &[ir::Value], handlers: impl IntoIterator, Block)>, ) -> WasmResult> { - Call::new(builder, self, handlers).indirect_call( + Call::new(builder, self, stack, srcloc, handlers).indirect_call( features, table_index, ty_index, @@ -3011,39 +3234,55 @@ impl FuncEnvironment<'_> { pub fn translate_call<'a>( &mut self, builder: &'a mut FunctionBuilder, + stack: &'a FuncTranslationStacks, + srcloc: ir::SourceLoc, callee_index: FuncIndex, sig_ref: ir::SigRef, call_args: &[ir::Value], handlers: impl IntoIterator, Block)>, ) -> WasmResult { - Call::new(builder, self, handlers).direct_call(callee_index, sig_ref, call_args) + Call::new(builder, self, stack, srcloc, handlers).direct_call( + callee_index, + sig_ref, + call_args, + ) } pub fn translate_call_ref<'a>( &mut self, builder: &'a mut FunctionBuilder, + stack: &'a FuncTranslationStacks, + srcloc: ir::SourceLoc, sig_ref: ir::SigRef, callee: ir::Value, call_args: &[ir::Value], handlers: impl IntoIterator, Block)>, ) -> WasmResult { - Call::new(builder, self, handlers).call_ref(sig_ref, callee, call_args) + Call::new(builder, self, stack, srcloc, handlers).call_ref(sig_ref, callee, call_args) } pub fn translate_return_call( &mut self, builder: &mut FunctionBuilder, + stack: &FuncTranslationStacks, + srcloc: ir::SourceLoc, callee_index: FuncIndex, sig_ref: ir::SigRef, call_args: &[ir::Value], ) -> WasmResult<()> { - Call::new_tail(builder, self).direct_call(callee_index, sig_ref, call_args)?; + Call::new_tail(builder, self, stack, srcloc).direct_call( + callee_index, + sig_ref, + call_args, + )?; Ok(()) } pub fn translate_return_call_indirect( &mut self, builder: &mut FunctionBuilder, + stack: &FuncTranslationStacks, + srcloc: ir::SourceLoc, features: &WasmFeatures, table_index: TableIndex, ty_index: TypeIndex, @@ -3051,7 +3290,7 @@ impl FuncEnvironment<'_> { callee: ir::Value, call_args: &[ir::Value], ) -> WasmResult<()> { - Call::new_tail(builder, self).indirect_call( + Call::new_tail(builder, self, stack, srcloc).indirect_call( features, table_index, ty_index, @@ -3065,11 +3304,13 @@ impl FuncEnvironment<'_> { pub fn translate_return_call_ref( &mut self, builder: &mut FunctionBuilder, + stack: &FuncTranslationStacks, + srcloc: ir::SourceLoc, sig_ref: ir::SigRef, callee: ir::Value, call_args: &[ir::Value], ) -> WasmResult<()> { - Call::new_tail(builder, self).call_ref(sig_ref, callee, call_args)?; + Call::new_tail(builder, self, stack, srcloc).call_ref(sig_ref, callee, call_args)?; Ok(()) } diff --git a/crates/cranelift/src/translate/code_translator.rs b/crates/cranelift/src/translate/code_translator.rs index 1e15bec0c3af..151910e7e976 100644 --- a/crates/cranelift/src/translate/code_translator.rs +++ b/crates/cranelift/src/translate/code_translator.rs @@ -124,6 +124,7 @@ pub fn translate_operator( builder: &mut FunctionBuilder, stack: &mut FuncTranslationStacks, environ: &mut FuncEnvironment<'_>, + srcloc: ir::SourceLoc, ) -> WasmResult<()> { log::trace!("Translating Wasm opcode: {op:?}"); @@ -163,6 +164,7 @@ pub fn translate_operator( builder.def_var(Variable::from_u32(*local_index), val); let label = ValueLabel::from_u32(*local_index); builder.set_val_label(val, label); + environ.state_slot_local_set(builder, *local_index, val); } Operator::LocalTee { local_index } => { let mut val = stack.peek1(); @@ -176,6 +178,7 @@ pub fn translate_operator( builder.def_var(Variable::from_u32(*local_index), val); let label = ValueLabel::from_u32(*local_index); builder.set_val_label(val, label); + environ.state_slot_local_set(builder, *local_index, val); } /********************************** Globals **************************************** * `get_global` and `set_global` are handled by the environment. @@ -427,7 +430,7 @@ pub fn translate_operator( frame.restore_catch_handlers(&mut stack.handlers, builder); - frame.truncate_value_stack_to_original_size(&mut stack.stack); + frame.truncate_value_stack_to_original_size(&mut stack.stack, &mut stack.stack_shape); stack .stack .extend_from_slice(builder.block_params(next_block)); @@ -650,6 +653,8 @@ pub fn translate_operator( let inst_results = environ.translate_call( builder, + stack, + srcloc, function_index, sig_ref, args, @@ -682,6 +687,8 @@ pub fn translate_operator( let inst_results = environ.translate_call_indirect( builder, + stack, + srcloc, validator.features(), TableIndex::from_u32(*table_index), type_index, @@ -724,8 +731,9 @@ pub fn translate_operator( // Bitcast any vector arguments to their default type, I8X16, before calling. let args = stack.peekn_mut(num_args); bitcast_wasm_params(environ, sig_ref, args, builder); + let args = stack.peekn(num_args); // Reborrow immutably. - environ.translate_return_call(builder, function_index, sig_ref, args)?; + environ.translate_return_call(builder, stack, srcloc, function_index, sig_ref, args)?; stack.popn(num_args); stack.reachable = false; @@ -748,6 +756,8 @@ pub fn translate_operator( environ.translate_return_call_indirect( builder, + stack, + srcloc, validator.features(), TableIndex::from_u32(*table_index), type_index, @@ -772,7 +782,14 @@ pub fn translate_operator( let args = stack.peekn_mut(num_args); bitcast_wasm_params(environ, sigref, args, builder); - environ.translate_return_call_ref(builder, sigref, callee, stack.peekn(num_args))?; + environ.translate_return_call_ref( + builder, + stack, + srcloc, + sigref, + callee, + stack.peekn(num_args), + )?; stack.popn(num_args); stack.reachable = false; @@ -2516,6 +2533,8 @@ pub fn translate_operator( let inst_results = environ.translate_call_ref( builder, + stack, + srcloc, sigref, callee, stack.peekn(num_args), @@ -3233,7 +3252,10 @@ fn translate_unreachable_operator( blocktype_params_results(validator, blocktype)?; let else_block = block_with_params(builder, params, environ)?; let frame = stack.control_stack.last().unwrap(); - frame.truncate_value_stack_to_else_params(&mut stack.stack); + frame.truncate_value_stack_to_else_params( + &mut stack.stack, + &mut stack.stack_shape, + ); // We change the target of the branch instruction. builder.change_jump_destination( @@ -3246,7 +3268,10 @@ fn translate_unreachable_operator( } ElseData::WithElse { else_block } => { let frame = stack.control_stack.last().unwrap(); - frame.truncate_value_stack_to_else_params(&mut stack.stack); + frame.truncate_value_stack_to_else_params( + &mut stack.stack, + &mut stack.stack_shape, + ); else_block } }; @@ -3264,13 +3289,14 @@ fn translate_unreachable_operator( } Operator::End => { let value_stack = &mut stack.stack; + let stack_shape = &mut stack.stack_shape; let control_stack = &mut stack.control_stack; let frame = control_stack.pop().unwrap(); frame.restore_catch_handlers(&mut stack.handlers, builder); // Pop unused parameters from stack. - frame.truncate_value_stack_to_original_size(value_stack); + frame.truncate_value_stack_to_original_size(value_stack, stack_shape); let reachable_anyway = match frame { // If it is a loop we also have to seal the body loop block @@ -4287,7 +4313,7 @@ fn bitcast_wasm_params( ) { let callee_signature = &builder.func.dfg.signatures[callee_signature]; let changes = bitcast_arguments(builder, arguments, &callee_signature.params, |i| { - environ.is_wasm_parameter(&callee_signature, i) + environ.is_wasm_parameter(i) }); for (t, arg) in changes { let mut flags = MemFlags::new(); diff --git a/crates/cranelift/src/translate/func_translator.rs b/crates/cranelift/src/translate/func_translator.rs index fb205e067ffd..72c95bddcdff 100644 --- a/crates/cranelift/src/translate/func_translator.rs +++ b/crates/cranelift/src/translate/func_translator.rs @@ -76,6 +76,8 @@ impl FuncTranslator { builder.switch_to_block(entry_block); builder.seal_block(entry_block); // Declare all predecessors known. + environ.create_state_slot(&mut builder); + // Make sure the entry block is inserted in the layout before we make any callbacks to // `environ`. The callback functions may need to insert things in the entry block. builder.ensure_inserted_block(); @@ -103,7 +105,7 @@ impl FuncTranslator { fn declare_wasm_parameters( builder: &mut FunctionBuilder, entry_block: Block, - environ: &FuncEnvironment<'_>, + environ: &mut FuncEnvironment<'_>, ) -> usize { let sig_len = builder.func.signature.params.len(); let mut next_local = 0; @@ -111,7 +113,7 @@ fn declare_wasm_parameters( let param_type = builder.func.signature.params[i]; // There may be additional special-purpose parameters in addition to the normal WebAssembly // signature parameters. For example, a `vmctx` pointer. - if environ.is_wasm_parameter(&builder.func.signature, i) { + if let Some(wasm_type) = environ.clif_param_as_wasm_param(i) { // This is a normal WebAssembly signature parameter, so create a local for it. let local = builder.declare_var(param_type.value_type); debug_assert_eq!(local.index(), next_local); @@ -123,6 +125,8 @@ fn declare_wasm_parameters( let param_value = builder.block_params(entry_block)[i]; builder.def_var(local, param_value); + + environ.add_state_slot_local(builder, wasm_type, Some(param_value)); } if param_type.purpose == ir::ArgumentPurpose::VMContext { let param_value = builder.block_params(entry_block)[i]; @@ -221,6 +225,7 @@ fn declare_locals( builder.def_var(local, init); builder.set_val_label(init, ValueLabel::new(*next_local)); } + environ.add_state_slot_local(builder, environ.convert_valtype(wasm_type)?, init); *next_local += 1; } Ok(()) @@ -245,18 +250,32 @@ fn parse_function_body( let mut reader = OperatorsReader::new(reader); let mut operand_types = vec![]; + environ.debug_instrumentation_at_start(builder)?; + while !reader.eof() { let pos = reader.original_position(); - builder.set_srcloc(cur_srcloc(&reader.get_binary_reader())); + let srcloc = cur_srcloc(&reader.get_binary_reader()); + builder.set_srcloc(srcloc); let op = reader.read()?; let operand_types = validate_op_and_get_operand_types(validator, environ, &mut operand_types, &op, pos)?; + environ.debug_instrumentation_before_op(builder, stack, srcloc)?; environ.before_translate_operator(&op, operand_types, builder, stack)?; - translate_operator(validator, &op, operand_types, builder, stack, environ)?; + translate_operator( + validator, + &op, + operand_types, + builder, + stack, + environ, + srcloc, + )?; environ.after_translate_operator(&op, operand_types, builder, stack)?; + environ.debug_instrumentation_after_op(validator, builder, stack)?; } + environ.debug_instrumentation_at_end(builder)?; environ.after_translate_function(builder, stack)?; reader.finish()?; @@ -276,6 +295,7 @@ fn parse_function_body( // Discard any remaining values on the stack. Either we just returned them, // or the end of the function is unreachable. stack.stack.clear(); + stack.stack_shape.clear(); Ok(()) } diff --git a/crates/cranelift/src/translate/stack.rs b/crates/cranelift/src/translate/stack.rs index 5c79231d7dcf..f338fe3f0abd 100644 --- a/crates/cranelift/src/translate/stack.rs +++ b/crates/cranelift/src/translate/stack.rs @@ -7,6 +7,7 @@ use cranelift_codegen::ir::{self, Block, ExceptionTag, Inst, Value}; use cranelift_frontend::FunctionBuilder; use std::vec::Vec; +use wasmtime_environ::FrameStackShape; /// Information about the presence of an associated `else` for an `if`, or the /// lack thereof. @@ -190,14 +191,23 @@ impl ControlStackFrame { /// Pop values from the value stack so that it is left at the /// input-parameters to an else-block. - pub fn truncate_value_stack_to_else_params(&self, stack: &mut Vec) { + pub fn truncate_value_stack_to_else_params( + &self, + stack: &mut Vec, + stack_shape: &mut Vec, + ) { debug_assert!(matches!(self, &ControlStackFrame::If { .. })); stack.truncate(self.original_stack_size()); + stack_shape.truncate(self.original_stack_size()); } /// Pop values from the value stack so that it is left at the state it was /// before this control-flow frame. - pub fn truncate_value_stack_to_original_size(&self, stack: &mut Vec) { + pub fn truncate_value_stack_to_original_size( + &self, + stack: &mut Vec, + stack_shape: &mut Vec, + ) { // The "If" frame pushes its parameters twice, so they're available to the else block // (see also `FuncTranslationStacks::push_if`). // Yet, the original_stack_size member accounts for them only once, so that the else @@ -212,7 +222,10 @@ impl ControlStackFrame { } _ => 0, }; - stack.truncate(self.original_stack_size() - num_duplicated_params); + + let new_len = self.original_stack_size() - num_duplicated_params; + stack.truncate(new_len); + stack_shape.truncate(new_len); } /// Restore the catch-handlers as they were outside of this block. @@ -242,6 +255,13 @@ pub struct FuncTranslationStacks { /// A stack of values corresponding to the active values in the input wasm function at this /// point. pub(crate) stack: Vec, + /// "Shape" of stack at each index, if emitting debug instrumentation. + /// + /// When we pop `stack`, we automatically pop `stack_shape` as + /// well, but we never push automatically; this enables us to + /// determine which values are new and need to be flushed to + /// memory after translating an operator. + pub(crate) stack_shape: Vec, /// A stack of active control flow operations at this point in the input wasm function. pub(crate) control_stack: Vec, /// Exception handler state, updated as we enter and exit @@ -266,6 +286,7 @@ impl FuncTranslationStacks { pub(crate) fn new() -> Self { Self { stack: Vec::new(), + stack_shape: Vec::new(), control_stack: Vec::new(), handlers: HandlerState::default(), reachable: true, @@ -274,6 +295,7 @@ impl FuncTranslationStacks { fn clear(&mut self) { debug_assert!(self.stack.is_empty()); + debug_assert!(self.stack_shape.is_empty()); debug_assert!(self.control_stack.is_empty()); debug_assert!(self.handlers.is_empty()); self.reachable = true; @@ -313,6 +335,7 @@ impl FuncTranslationStacks { /// Pop one value. pub(crate) fn pop1(&mut self) -> Value { + self.pop_stack_shape(1); self.stack .pop() .expect("attempted to pop a value from an empty stack") @@ -328,6 +351,7 @@ impl FuncTranslationStacks { /// Pop two values. Return them in the order they were pushed. pub(crate) fn pop2(&mut self) -> (Value, Value) { + self.pop_stack_shape(2); let v2 = self.stack.pop().unwrap(); let v1 = self.stack.pop().unwrap(); (v1, v2) @@ -335,6 +359,7 @@ impl FuncTranslationStacks { /// Pop three values. Return them in the order they were pushed. pub(crate) fn pop3(&mut self) -> (Value, Value, Value) { + self.pop_stack_shape(3); let v3 = self.stack.pop().unwrap(); let v2 = self.stack.pop().unwrap(); let v1 = self.stack.pop().unwrap(); @@ -343,6 +368,7 @@ impl FuncTranslationStacks { /// Pop four values. Return them in the order they were pushed. pub(crate) fn pop4(&mut self) -> (Value, Value, Value, Value) { + self.pop_stack_shape(4); let v4 = self.stack.pop().unwrap(); let v3 = self.stack.pop().unwrap(); let v2 = self.stack.pop().unwrap(); @@ -352,6 +378,7 @@ impl FuncTranslationStacks { /// Pop five values. Return them in the order they were pushed. pub(crate) fn pop5(&mut self) -> (Value, Value, Value, Value, Value) { + self.pop_stack_shape(5); let v5 = self.stack.pop().unwrap(); let v4 = self.stack.pop().unwrap(); let v3 = self.stack.pop().unwrap(); @@ -379,6 +406,21 @@ impl FuncTranslationStacks { self.ensure_length_is_at_least(n); let new_len = self.stack.len() - n; self.stack.truncate(new_len); + self.stack_shape.truncate(new_len); + } + + fn pop_stack_shape(&mut self, n: usize) { + // The `stack_shape` vec represents the *clean* slots (already + // flushed to memory); its length is always less than or equal + // to `stack`, but indices always correspond between the + // two. Thus a pop on `stack` may or may not pop something on + // `stack_shape`; but if `stack` is truncated down to a length + // L by some number of pops, truncating `stack_shape` to that + // same length L will pop exactly the right shapes and will + // ensure that any new pushes that are "dirty" will be + // correctly represented as such. + let new_len = self.stack.len() - n; + self.stack_shape.truncate(new_len); } /// Peek at the top `n` values on the stack in the order they were pushed. @@ -467,6 +509,7 @@ impl FuncTranslationStacks { blocktype: wasmparser::BlockType, ) { debug_assert!(num_param_types <= self.stack.len()); + self.assert_debug_stack_is_synced(); // Push a second copy of our `if`'s parameters on the stack. This lets // us avoid saving them on the side in the `ControlStackFrame` for our @@ -477,6 +520,15 @@ impl FuncTranslationStacks { for i in (self.stack.len() - num_param_types)..self.stack.len() { let val = self.stack[i]; self.stack.push(val); + // Duplicate the stack-shape as well, if we're doing debug + // instrumentation. Note that we must have flushed + // everything before processing an `if`, so (as per the + // assert above) we can rely on either no shapes (if no + // instrumentation) or all shapes being present. + if !self.stack_shape.is_empty() { + let shape = self.stack_shape[i]; + self.stack_shape.push(shape); + } } self.control_stack.push(ControlStackFrame::If { @@ -491,6 +543,10 @@ impl FuncTranslationStacks { blocktype, }); } + + pub(crate) fn assert_debug_stack_is_synced(&self) { + debug_assert!(self.stack_shape.is_empty() || self.stack_shape.len() == self.stack.len()); + } } /// Exception handler state. diff --git a/crates/environ/src/compile/frame_table.rs b/crates/environ/src/compile/frame_table.rs new file mode 100644 index 000000000000..7757c69c95cd --- /dev/null +++ b/crates/environ/src/compile/frame_table.rs @@ -0,0 +1,316 @@ +//! Builder for the `ELF_WASMTIME_FRAME_TABLE` ("frame table") section +//! in compiled executables. +//! +//! This section is present only if debug instrumentation is +//! enabled. It describes functions, stackslots that carry Wasm state, +//! and allows looking up active Wasm frames (including multiple +//! frames in one function due to inlining), Wasm local types and Wasm +//! operand stack depth in each frame by PC, with offsets to read +//! those values off of the state in the stack frame. + +use crate::{ + FrameInstPos, FrameStackShape, FrameStateSlotOffset, FrameTableDescriptorIndex, FrameValType, + FuncKey, WasmHeapTopType, WasmValType, prelude::*, +}; +use object::{LittleEndian, U32Bytes}; +use std::collections::{HashMap, hash_map::Entry}; + +/// Builder for a stackslot descriptor. +pub struct FrameStateSlotBuilder { + /// Function identifier for this state slot. + func_key: FuncKey, + + /// Pointer size for target. + pointer_size: u32, + + /// Local types and offsets. + locals: Vec<(FrameValType, FrameStateSlotOffset)>, + + /// Stack nodes: (parent, type, offset) tuples. + stacks: Vec<(Option, FrameValType, FrameStateSlotOffset)>, + + /// Hashconsing for stack-type nodes. + stacks_dedup: + HashMap<(Option, FrameValType, FrameStateSlotOffset), FrameStackShape>, + + /// Size of vmctx (one pointer). + vmctx_size: u32, + + /// Size of all locals. + locals_size: u32, + + /// Maximum size of whole state slot. + slot_size: u32, +} + +impl From for FrameValType { + fn from(ty: WasmValType) -> FrameValType { + match ty { + WasmValType::I32 => FrameValType::I32, + WasmValType::I64 => FrameValType::I64, + WasmValType::F32 => FrameValType::F32, + WasmValType::F64 => FrameValType::F64, + WasmValType::V128 => FrameValType::V128, + WasmValType::Ref(r) => match r.heap_type.top() { + WasmHeapTopType::Any => FrameValType::AnyRef, + WasmHeapTopType::Extern => FrameValType::ExternRef, + WasmHeapTopType::Func => FrameValType::FuncRef, + WasmHeapTopType::Exn => FrameValType::ExnRef, + WasmHeapTopType::Cont => FrameValType::ContRef, + }, + } + } +} + +impl FrameStateSlotBuilder { + /// Create a new state-slot builder. + pub fn new(func_key: FuncKey, pointer_size: u32) -> FrameStateSlotBuilder { + FrameStateSlotBuilder { + func_key, + pointer_size, + locals: vec![], + stacks: vec![], + stacks_dedup: HashMap::new(), + vmctx_size: pointer_size, + locals_size: 0, + slot_size: pointer_size, + } + } + + /// Add a local to the state-slot. + /// + /// Locals must be added in local index order, and must be added + /// before any stack shapes are defined. The offset in the state + /// slot is returned. + pub fn add_local(&mut self, ty: FrameValType) -> FrameStateSlotOffset { + let offset = FrameStateSlotOffset(self.vmctx_size + self.locals_size); + let size = ty.storage_size(self.pointer_size); + self.locals_size += size; + self.slot_size += size; + self.locals.push((ty, offset)); + offset + } + + /// Get a local's offset in the state-slot. + pub fn local_offset(&self, local: u32) -> FrameStateSlotOffset { + let index = usize::try_from(local).unwrap(); + self.locals[index].1 + } + + /// Push a stack entry. Returns the stack-shape descriptor and the + /// offset at which to write the pushed value. + pub fn push_stack( + &mut self, + parent: Option, + ty: FrameValType, + ) -> (FrameStackShape, FrameStateSlotOffset) { + let offset = parent + .map(|parent| { + let (_, ty, offset) = self.stacks[parent.index()]; + offset.add(ty.storage_size(self.pointer_size)) + }) + .unwrap_or(FrameStateSlotOffset(self.vmctx_size + self.locals_size)); + + self.slot_size = core::cmp::max( + self.slot_size, + offset.0 + ty.storage_size(self.pointer_size), + ); + + let shape = match self.stacks_dedup.entry((parent, ty, offset)) { + Entry::Occupied(o) => *o.get(), + Entry::Vacant(v) => { + let shape = FrameStackShape(u32::try_from(self.stacks.len()).unwrap()); + self.stacks.push((parent, ty, offset)); + *v.insert(shape) + } + }; + + (shape, offset) + } + + /// Get the offset for the top slot in a given stack shape. + pub fn stack_last_offset(&self, shape: FrameStackShape) -> FrameStateSlotOffset { + self.stacks[shape.index()].2 + } + + /// Serialize the frame-slot descriptor so it can be included as + /// metadata. + pub fn serialize(&self) -> Vec { + // Format (all little-endian): + // - func_key: (u32, u32) + // - num_locals: u32 + // - num_stack_shapes: u32 + // - local_offsets: num_locals times: + // - offset: u32 (offset from start of state slot) + // - stack_shape_parents: num_stack_shapes times: + // - parent_shape: u32 (or u32::MAX for none) + // - stack_shape_offsets: num_stack_shapes times: + // - offset: u32 (offset from start of state slot for top-of-stack value) + // - local_types: num_locals times: + // - type: u8 + // - stack_shape_types: num_stack_shapes times: + // - type: u8 (type of top-of-stack value) + + let mut buffer = vec![]; + let (func_key_namespace, func_key_index) = self.func_key.into_parts(); + buffer.extend_from_slice(&u32::to_le_bytes(func_key_namespace.into_raw())); + buffer.extend_from_slice(&u32::to_le_bytes(func_key_index.into_raw())); + + buffer.extend_from_slice(&u32::to_le_bytes(u32::try_from(self.locals.len()).unwrap())); + buffer.extend_from_slice(&u32::to_le_bytes(u32::try_from(self.stacks.len()).unwrap())); + + for (_, offset) in &self.locals { + buffer.extend_from_slice(&u32::to_le_bytes(offset.0)); + } + for (parent, _, _) in &self.stacks { + let parent = parent.map(|p| p.0).unwrap_or(u32::MAX); + buffer.extend_from_slice(&u32::to_le_bytes(parent)); + } + for (_, _, offset) in &self.stacks { + buffer.extend_from_slice(&u32::to_le_bytes(offset.0)); + } + for (ty, _) in &self.locals { + buffer.push(*ty as u8); + } + for (_, ty, _) in &self.stacks { + buffer.push(*ty as u8); + } + + buffer + } + + /// The total size required for all locals/stack storage. + pub fn size(&self) -> u32 { + self.slot_size + } +} + +/// Builder for the Frame Table. +/// +/// Format: +/// +/// - `num_slot_descriptors`: u32 +/// - `num_progpoints`: u32 +/// - `frame_descriptor_pool_length`: u32 +/// - `progpoint_descriptor_pool_length`; U32 +/// - `num_slot_descriptors` times: +/// - frame descriptor offset: u32 +/// - length: u32 +/// - `num_slot_descriptors` times: +/// - offset from frame up to FP: u32 +/// - `num_progpoints` times: +/// - PC, from start of text section, position (post/pre): u32 +/// - encoded as (pc << 1) | post_pre_bit +/// - `num_progpoints` times: +/// - progpoint descriptor offset: u32 +/// - frame descriptors (format described above; `frame_descriptor_pool_length` bytes) +/// - progpoint descriptors (`progpoint_descriptor_pool_length` bytes) +/// - each descriptor: sequence of frames +/// - Wasm PC: u32 (high bit set to indicate a parent frame) +/// - slot descriptor index: u32 +/// - stack shape index: u32 (or u32::MAX for none) +#[derive(Default)] +pub struct FrameTableBuilder { + /// (offset, length) pairs into `frame_descriptor_data`, indexed + /// by frame descriptor number. + frame_descriptor_ranges: Vec>, + frame_descriptor_data: Vec, + + /// Offset from frame slot up to FP for each frame descriptor. + frame_descriptor_fp_offsets: Vec>, + + progpoint_pcs: Vec>, + progpoint_descriptor_offsets: Vec>, + progpoint_descriptor_data: Vec>, +} + +impl FrameTableBuilder { + /// Add one frame descriptor. + /// + /// Returns the frame descriptor index. + pub fn add_frame_descriptor( + &mut self, + slot_to_fp_offset: u32, + data: Vec, + ) -> FrameTableDescriptorIndex { + let start = u32::try_from(self.frame_descriptor_data.len()).unwrap(); + self.frame_descriptor_data.extend(data); + let end = u32::try_from(self.frame_descriptor_data.len()).unwrap(); + + let index = FrameTableDescriptorIndex( + u32::try_from(self.frame_descriptor_fp_offsets.len()).unwrap(), + ); + self.frame_descriptor_fp_offsets + .push(U32Bytes::new(LittleEndian, slot_to_fp_offset)); + self.frame_descriptor_ranges + .push(U32Bytes::new(LittleEndian, start)); + self.frame_descriptor_ranges + .push(U32Bytes::new(LittleEndian, end)); + + index + } + + /// Add one program point. + pub fn add_program_point( + &mut self, + native_pc: u32, + pos: FrameInstPos, + // For each frame: Wasm PC, frame descriptor, stack shape + // within the frame descriptor. + frames: &[(u32, FrameTableDescriptorIndex, FrameStackShape)], + ) { + let pc_and_pos = FrameInstPos::encode(native_pc, pos); + // If we already have a program point record at this PC, don't add another. + if let Some(last) = self.progpoint_pcs.last() + && last.get(LittleEndian) == pc_and_pos + { + return; + } + + let start = u32::try_from(self.progpoint_descriptor_data.len()).unwrap(); + self.progpoint_pcs + .push(U32Bytes::new(LittleEndian, pc_and_pos)); + self.progpoint_descriptor_offsets + .push(U32Bytes::new(LittleEndian, start)); + + for (i, &(wasm_pc, frame_descriptor, stack_shape)) in frames.iter().enumerate() { + debug_assert!(wasm_pc < 0x8000_0000); + let not_last = i < (frames.len() - 1); + let wasm_pc = wasm_pc | if not_last { 0x8000_0000 } else { 0 }; + self.progpoint_descriptor_data + .push(U32Bytes::new(LittleEndian, wasm_pc)); + self.progpoint_descriptor_data + .push(U32Bytes::new(LittleEndian, frame_descriptor.0)); + self.progpoint_descriptor_data + .push(U32Bytes::new(LittleEndian, stack_shape.0)); + } + } + + /// Serialize the exception-handler data section, taking a closure + /// to consume slices. + pub fn serialize(&mut self, mut f: F) { + // Pad `frame_descriptor_data` to a multiple of 4 bytes so + // `progpoint_descriptor_data` is aligned as well. + while self.frame_descriptor_data.len() & 3 != 0 { + self.frame_descriptor_data.push(0); + } + + let num_frame_descriptors = u32::try_from(self.frame_descriptor_fp_offsets.len()).unwrap(); + f(&num_frame_descriptors.to_le_bytes()); + let num_prog_points = u32::try_from(self.progpoint_pcs.len()).unwrap(); + f(&num_prog_points.to_le_bytes()); + + let frame_descriptor_pool_length = u32::try_from(self.frame_descriptor_data.len()).unwrap(); + f(&frame_descriptor_pool_length.to_le_bytes()); + let progpoint_descriptor_pool_length = + u32::try_from(self.progpoint_descriptor_data.len()).unwrap(); + f(&progpoint_descriptor_pool_length.to_le_bytes()); + + f(object::bytes_of_slice(&self.frame_descriptor_ranges)); + f(object::bytes_of_slice(&self.frame_descriptor_fp_offsets)); + f(object::bytes_of_slice(&self.progpoint_pcs)); + f(object::bytes_of_slice(&self.progpoint_descriptor_offsets)); + f(&self.frame_descriptor_data); + f(object::bytes_of_slice(&self.progpoint_descriptor_data)); + } +} diff --git a/crates/environ/src/compile/mod.rs b/crates/environ/src/compile/mod.rs index 5bff028d4d69..df2d64acff0b 100644 --- a/crates/environ/src/compile/mod.rs +++ b/crates/environ/src/compile/mod.rs @@ -16,6 +16,7 @@ use std::path; use std::sync::Arc; mod address_map; +mod frame_table; mod module_artifacts; mod module_environ; mod module_types; @@ -23,6 +24,7 @@ mod stack_maps; mod trap_encoding; pub use self::address_map::*; +pub use self::frame_table::*; pub use self::module_artifacts::*; pub use self::module_environ::*; pub use self::module_types::*; diff --git a/crates/environ/src/frame_table.rs b/crates/environ/src/frame_table.rs new file mode 100644 index 000000000000..2fdf32c72048 --- /dev/null +++ b/crates/environ/src/frame_table.rs @@ -0,0 +1,481 @@ +//! Frame-table parser and lookup logic. +//! +//! This module contains utilities to interpret the `.wasmtime.frame` +//! section in a compiled artifact as produced by +//! [`crate::compile::frame_table::FrameTableBuilder`]. + +use crate::FuncKey; +use alloc::vec::Vec; +use object::{Bytes, LittleEndian, U32Bytes}; + +/// An index into the table of stack shapes. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub struct FrameStackShape(pub(crate) u32); +impl FrameStackShape { + pub(crate) fn index(self) -> usize { + usize::try_from(self.0).unwrap() + } + + /// Get the raw stack-shape index suitable for serializing into + /// metadata. + pub fn raw(self) -> u32 { + self.0 + } + + /// Wrap a raw stack shape index (e.g. from debug tags) into a FrameStackShape. + pub fn from_raw(index: u32) -> FrameStackShape { + FrameStackShape(index) + } +} + +/// An index to a frame descriptor that can be referenced from a +/// program point descriptor. +#[derive(Clone, Copy, Debug)] +pub struct FrameTableDescriptorIndex(pub(crate) u32); +impl FrameTableDescriptorIndex { + fn index(self) -> usize { + usize::try_from(self.0).unwrap() + } +} + +/// A parser for a frame-table section. +/// +/// This parser holds slices to the in-memory section data, and is +/// cheap to construct: it reads some header fields but does not +/// interpret or validate content data until queried. +pub struct FrameTable<'a> { + frame_descriptor_ranges: &'a [U32Bytes], + frame_descriptor_data: &'a [u8], + + frame_descriptor_fp_offsets: &'a [U32Bytes], + + progpoint_pcs: &'a [U32Bytes], + progpoint_descriptor_offsets: &'a [U32Bytes], + progpoint_descriptor_data: &'a [U32Bytes], +} + +impl<'a> FrameTable<'a> { + /// Parse a frame table section from a byte-slice as produced by + /// [`crate::compile::frame_table::FrameTableBuilder`]. + pub fn parse(data: &'a [u8]) -> anyhow::Result> { + let mut data = Bytes(data); + let num_frame_descriptors = data + .read::>() + .map_err(|_| anyhow::anyhow!("Unable to read frame descriptor count prefix"))?; + let num_frame_descriptors = usize::try_from(num_frame_descriptors.get(LittleEndian))?; + let num_progpoint_descriptors = data + .read::>() + .map_err(|_| anyhow::anyhow!("Unable to read progpoint descriptor count prefix"))?; + let num_progpoint_descriptors = + usize::try_from(num_progpoint_descriptors.get(LittleEndian))?; + let frame_descriptor_pool_length = data + .read::>() + .map_err(|_| anyhow::anyhow!("Unable to read frame descriptor pool length"))?; + let frame_descriptor_pool_length = + usize::try_from(frame_descriptor_pool_length.get(LittleEndian))?; + let progpoint_descriptor_pool_length = data + .read::>() + .map_err(|_| anyhow::anyhow!("Unable to read progpoint descriptor pool length"))?; + let progpoint_descriptor_pool_length = + usize::try_from(progpoint_descriptor_pool_length.get(LittleEndian))?; + + let (frame_descriptor_ranges, data) = + object::slice_from_bytes::>(data.0, 2 * num_frame_descriptors) + .map_err(|_| anyhow::anyhow!("Unable to read frame descriptor ranges slice"))?; + let (frame_descriptor_fp_offsets, data) = + object::slice_from_bytes::>(data, num_frame_descriptors) + .map_err(|_| anyhow::anyhow!("Unable to read frame descriptor FP offset slice"))?; + + let (progpoint_pcs, data) = + object::slice_from_bytes::>(data, num_progpoint_descriptors) + .map_err(|_| anyhow::anyhow!("Unable to read progpoint PC slice"))?; + let (progpoint_descriptor_offsets, data) = + object::slice_from_bytes::>(data, num_progpoint_descriptors) + .map_err(|_| anyhow::anyhow!("Unable to read progpoint descriptor offset slice"))?; + + let (frame_descriptor_data, data) = data + .split_at_checked(frame_descriptor_pool_length) + .ok_or_else(|| anyhow::anyhow!("Unable to read frame descriptor pool"))?; + + let (progpoint_descriptor_data, _) = object::slice_from_bytes::>( + data, + progpoint_descriptor_pool_length, + ) + .map_err(|_| anyhow::anyhow!("Unable to read progpoint descriptor pool"))?; + + Ok(FrameTable { + frame_descriptor_ranges, + frame_descriptor_data, + frame_descriptor_fp_offsets, + progpoint_pcs, + progpoint_descriptor_offsets, + progpoint_descriptor_data, + }) + } + + /// Get raw frame descriptor data and slot-to-FP-offset for a + /// given frame descriptor. + pub fn frame_descriptor( + &self, + frame_descriptor: FrameTableDescriptorIndex, + ) -> Option<(&'a [u8], u32)> { + let range_start = self + .frame_descriptor_ranges + .get(frame_descriptor.index() * 2)? + .get(LittleEndian); + let range_end = self + .frame_descriptor_ranges + .get(frame_descriptor.index() * 2 + 1)? + .get(LittleEndian); + let range_start = usize::try_from(range_start).unwrap(); + let range_end = usize::try_from(range_end).unwrap(); + if range_end < range_start || range_end > self.frame_descriptor_data.len() { + return None; + } + let descriptor = &self.frame_descriptor_data[range_start..range_end]; + let slot_to_fp_offset = self + .frame_descriptor_fp_offsets + .get(frame_descriptor.index())? + .get(LittleEndian); + Some((descriptor, slot_to_fp_offset)) + } + + /// Get frames for the program point at the PC upper-bounded by a + /// given search PC (offset in text section). + pub fn find_program_point( + &self, + search_pc: u32, + search_pos: FrameInstPos, + ) -> Option> { + let key = FrameInstPos::encode(search_pc, search_pos); + let index = match self + .progpoint_pcs + .binary_search_by_key(&key, |entry| entry.get(LittleEndian)) + { + Ok(idx) => idx, + Err(idx) if idx > 0 => idx - 1, + Err(_) => return None, + }; + + Some(self.program_point_frame_iter(index)) + } + + /// Get all program point records with iterators over + /// corresponding frames for each. + pub fn into_program_points( + self, + ) -> impl Iterator< + Item = ( + u32, + FrameInstPos, + Vec<(u32, FrameTableDescriptorIndex, FrameStackShape)>, + ), + > + 'a { + self.progpoint_pcs.iter().enumerate().map(move |(i, pc)| { + let pc_and_pos = pc.get(LittleEndian); + let (pc, pos) = FrameInstPos::decode(pc_and_pos); + ( + pc, + pos, + self.program_point_frame_iter(i).collect::>(), + ) + }) + } + + fn program_point_frame_iter( + &self, + index: usize, + ) -> impl Iterator { + let offset = + usize::try_from(self.progpoint_descriptor_offsets[index].get(LittleEndian)).unwrap(); + let mut data = &self.progpoint_descriptor_data[offset..]; + + core::iter::from_fn(move || { + if data.len() < 3 { + return None; + } + let wasm_pc = data[0].get(LittleEndian); + let frame_descriptor = FrameTableDescriptorIndex(data[1].get(LittleEndian)); + let stack_shape = FrameStackShape(data[2].get(LittleEndian)); + data = &data[3..]; + let not_last = wasm_pc & 0x8000_0000 != 0; + let wasm_pc = wasm_pc & 0x7fff_ffff; + if !not_last { + data = &[]; + } + Some((wasm_pc, frame_descriptor, stack_shape)) + }) + } +} + +/// An instruction position for a program point. +/// +/// We attach debug metadata to a *position* on an offset in the text +/// (code) section, either "post" or "pre". The "post" position +/// logically comes first, and is associated with the instruction that +/// ends at this offset (i.e., the previous instruction). The "pre" +/// position comes next, and is associated with the instruction that +/// begins at this offset (i.e., the next instruction). +/// +/// We make this distinction because metadata lookups sometimes occur +/// with a PC that is after the instruction (e.g., the return address +/// after a call instruction), and sometimes at the instruction (e.g., +/// a trapping PC address). The lookup context will know which one to +/// use -- e.g., when walking the stack, "pre" for a trapping PC and +/// "post" for every frame after that -- so we simply encode it as +/// part of the position and allow searching on it. +/// +/// The need for this distinction can be understood by way of an +/// example; say we have: +/// +/// ```plain +/// call ... +/// trapping_store ... +/// ``` +/// +/// where both instructions have debug metadata. We might look up the +/// PC of `trapping_store` once as we walk the stack from within the +/// call (we will get this PC because it is the return address) and +/// once when `trapping_store` itself traps; and we want different +/// metadata in each case. +/// +/// An alternative is to universally attach tags to the end offset of +/// an instruction, which allows us to handle return addresses +/// naturally but requires traps to adjust their PC. However, this +/// requires trap handlers to know the length of the trapping +/// instruction, which is not always easy -- in the most general case, +/// on variable-length instruction sets, it requires a full +/// instruction decoder. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum FrameInstPos { + /// The "post" position at an offset attaches to the instruction + /// that ends at this offset, i.e., came previously. + Post, + /// The "pre" position at an offset attaches to the instruction + /// that begins at this offset, i.e., comes next. + Pre, +} + +impl FrameInstPos { + pub(crate) fn encode(pc: u32, pos: FrameInstPos) -> u32 { + let lsb = match pos { + Self::Post => 0, + Self::Pre => 1, + }; + debug_assert!(pc < 0x8000_0000); + (pc << 1) | lsb + } + pub(crate) fn decode(bits: u32) -> (u32, FrameInstPos) { + let pos = match bits & 1 { + 0 => Self::Post, + 1 => Self::Pre, + _ => unreachable!(), + }; + let pc = bits >> 1; + (pc, pos) + } +} + +/// An offset into the state slot. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub struct FrameStateSlotOffset(pub(crate) u32); +impl FrameStateSlotOffset { + #[cfg(feature = "compile")] + pub(crate) fn add(self, offset: u32) -> FrameStateSlotOffset { + FrameStateSlotOffset(self.0 + offset) + } + + /// Get the offset into the state stackslot, suitable for use in a + /// `stack_store`/`stack_load` instruction. + pub fn offset(self) -> i32 { + i32::try_from(self.0).unwrap() + } +} + +/// A type stored in a frame. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[allow(missing_docs, reason = "self-describing variants")] +pub enum FrameValType { + I32, + I64, + F32, + F64, + V128, + AnyRef, + FuncRef, + ExternRef, + ExnRef, + ContRef, +} + +impl FrameValType { + #[cfg(feature = "compile")] + pub(crate) fn storage_size(&self, pointer_size: u32) -> u32 { + match self { + FrameValType::I32 => 4, + FrameValType::I64 => 8, + FrameValType::F32 => 4, + FrameValType::F64 => 8, + FrameValType::V128 => 16, + FrameValType::AnyRef | FrameValType::ExternRef | FrameValType::ExnRef => 4, + FrameValType::FuncRef => pointer_size, + FrameValType::ContRef => 2 * pointer_size, + } + } +} + +impl From for u8 { + fn from(value: FrameValType) -> u8 { + match value { + FrameValType::I32 => 0, + FrameValType::I64 => 1, + FrameValType::F32 => 2, + FrameValType::F64 => 3, + FrameValType::V128 => 4, + FrameValType::AnyRef => 5, + FrameValType::FuncRef => 6, + FrameValType::ExternRef => 7, + FrameValType::ExnRef => 8, + FrameValType::ContRef => 9, + } + } +} + +impl TryFrom for FrameValType { + type Error = anyhow::Error; + fn try_from(value: u8) -> anyhow::Result { + match value { + 0 => Ok(Self::I32), + 1 => Ok(Self::I64), + 2 => Ok(Self::F32), + 3 => Ok(Self::F64), + 4 => Ok(Self::V128), + 5 => Ok(Self::AnyRef), + 6 => Ok(Self::FuncRef), + 7 => Ok(Self::ExternRef), + 8 => Ok(Self::ExnRef), + 9 => Ok(Self::ContRef), + _ => Err(anyhow::anyhow!("Invalid type")), + } + } +} + +/// Parser for a frame state slot descriptor. +/// +/// This provides the ability to extract offsets and types for locals +/// and for the stack given a stack shape. +pub struct FrameStateSlot<'a> { + func_key: FuncKey, + local_offsets: &'a [U32Bytes], + stack_shape_parents: &'a [U32Bytes], + stack_shape_offsets: &'a [U32Bytes], + local_types: &'a [u8], + stack_shape_types: &'a [u8], +} + +impl<'a> FrameStateSlot<'a> { + /// Parse a slot descriptor. + /// + /// This parses the descriptor bytes as provided by + /// [`FrameTable::frame_descriptor`]. + pub fn parse(descriptor: &'a [u8]) -> anyhow::Result> { + let mut data = Bytes(descriptor); + let func_key_namespace = data + .read::>() + .map_err(|_| anyhow::anyhow!("Unable to read func key namespace"))? + .get(LittleEndian); + let func_key_index = data + .read::>() + .map_err(|_| anyhow::anyhow!("Unable to read func key index"))? + .get(LittleEndian); + let func_key = FuncKey::from_raw_parts(func_key_namespace, func_key_index); + + let num_locals = data + .read::>() + .map_err(|_| anyhow::anyhow!("Unable to read num_locals"))? + .get(LittleEndian); + let num_locals = usize::try_from(num_locals)?; + let num_stack_shapes = data + .read::>() + .map_err(|_| anyhow::anyhow!("Unable to read num_stack_shapes"))? + .get(LittleEndian); + let num_stack_shapes = usize::try_from(num_stack_shapes)?; + + let (local_offsets, data) = + object::slice_from_bytes::>(data.0, num_locals) + .map_err(|_| anyhow::anyhow!("Unable to read local_offsets slice"))?; + let (stack_shape_parents, data) = + object::slice_from_bytes::>(data, num_stack_shapes) + .map_err(|_| anyhow::anyhow!("Unable to read stack_shape_parents slice"))?; + let (stack_shape_offsets, data) = + object::slice_from_bytes::>(data, num_stack_shapes) + .map_err(|_| anyhow::anyhow!("Unable to read stack_shape_offsets slice"))?; + let (local_types, data) = data + .split_at_checked(num_locals) + .ok_or_else(|| anyhow::anyhow!("Unable to read local_types slice"))?; + let (stack_shape_types, _) = data + .split_at_checked(num_stack_shapes) + .ok_or_else(|| anyhow::anyhow!("Unable to read stack_shape_types slice"))?; + + Ok(FrameStateSlot { + func_key, + local_offsets, + stack_shape_parents, + stack_shape_offsets, + local_types, + stack_shape_types, + }) + } + + /// Get the FuncKey for the function that produced this frame + /// slot. + pub fn func_key(&self) -> FuncKey { + self.func_key + } + + /// Get the local offsets and types. + pub fn locals(&self) -> impl Iterator { + (0..self.num_locals()).map(|i| self.local(i).unwrap()) + } + + /// Get the type and offset for a given local. + pub fn local(&self, index: usize) -> Option<(FrameStateSlotOffset, FrameValType)> { + let offset = FrameStateSlotOffset(self.local_offsets.get(index)?.get(LittleEndian)); + let ty = FrameValType::try_from(*self.local_types.get(index)?).expect("Invalid type"); + Some((offset, ty)) + } + + /// Get the number of locals in the frame. + pub fn num_locals(&self) -> usize { + self.local_offsets.len() + } + + /// Get the offsets and types for operand stack values, from top + /// of stack (most recently pushed) down. + pub fn stack( + &self, + shape: FrameStackShape, + ) -> impl Iterator { + fn unpack_option_shape(shape: FrameStackShape) -> Option { + if shape.0 == u32::MAX { + None + } else { + Some(shape) + } + } + + let mut shape = unpack_option_shape(shape); + core::iter::from_fn(move || { + shape.map(|s| { + let parent = FrameStackShape(self.stack_shape_parents[s.index()].get(LittleEndian)); + let parent = unpack_option_shape(parent); + let offset = + FrameStateSlotOffset(self.stack_shape_offsets[s.index()].get(LittleEndian)); + let ty = FrameValType::try_from(self.stack_shape_types[s.index()]) + .expect("Invalid type"); + shape = parent; + (offset, ty) + }) + }) + } +} diff --git a/crates/environ/src/lib.rs b/crates/environ/src/lib.rs index 19bff1c498dc..d78640d13710 100644 --- a/crates/environ/src/lib.rs +++ b/crates/environ/src/lib.rs @@ -19,6 +19,7 @@ extern crate alloc; pub mod prelude; mod address_map; +mod frame_table; #[macro_use] mod builtin; mod demangling; @@ -45,6 +46,7 @@ pub use crate::address_map::*; pub use crate::builtin::*; pub use crate::demangling::*; pub use crate::error::*; +pub use crate::frame_table::*; pub use crate::gc::*; pub use crate::hostcall::*; pub use crate::key::*; diff --git a/crates/environ/src/obj.rs b/crates/environ/src/obj.rs index a846d667c863..9a3eb6e06a15 100644 --- a/crates/environ/src/obj.rs +++ b/crates/environ/src/obj.rs @@ -109,6 +109,17 @@ pub const ELF_WASMTIME_TRAPS: &str = ".wasmtime.traps"; /// code offsets are relative to the start of the text segment. pub const ELF_WASMTIME_EXCEPTIONS: &str = ".wasmtime.exceptions"; +/// A custom binary-encoded section of the wasmtime compilation +/// artifacts which encodes frame tables. +/// +/// This section is used at runtime to allow debug APIs to decode Wasm +/// VM-level state from state stack slots. +/// +/// This section's format is defined by the +/// [`wasmtime_environ::FrameTableBuilder`] data structure. Its code +/// offsets are relative to the start of the text segment. +pub const ELF_WASMTIME_FRAMES: &str = ".wasmtime.frames"; + /// A custom section which consists of just 1 byte which is either 0 or 1 as to /// whether BTI is enabled. pub const ELF_WASM_BTI: &str = ".wasmtime.bti"; diff --git a/crates/environ/src/tunables.rs b/crates/environ/src/tunables.rs index 55b2bdad8f7c..f4b4d4e5c328 100644 --- a/crates/environ/src/tunables.rs +++ b/crates/environ/src/tunables.rs @@ -136,6 +136,10 @@ define_tunables! { /// The general size threshold for the sum of the caller's and callee's /// sizes, past which we will generally not inline calls anymore. pub inlining_sum_size_threshold: u32, + + /// Whether we are emitting debug instrumentation for precise + /// Wasm state. + pub debug_instrumentation: bool, } pub struct ConfigTunables { @@ -210,6 +214,7 @@ impl Tunables { inlining_intra_module: IntraModuleInlining::WhenUsingGc, inlining_small_callee_size: 50, inlining_sum_size_threshold: 2000, + debug_instrumentation: false, } } diff --git a/crates/wasmtime/Cargo.toml b/crates/wasmtime/Cargo.toml index 81ccc0e5083a..cb180e96e1b6 100644 --- a/crates/wasmtime/Cargo.toml +++ b/crates/wasmtime/Cargo.toml @@ -408,3 +408,6 @@ component-model-async-bytes = [ "component-model-async", "dep:bytes", ] + +# Enables support for guest debugging. +debug = ['runtime'] diff --git a/crates/wasmtime/src/config.rs b/crates/wasmtime/src/config.rs index 4b313dfa5000..0c370a48e25e 100644 --- a/crates/wasmtime/src/config.rs +++ b/crates/wasmtime/src/config.rs @@ -6,7 +6,7 @@ use core::str::FromStr; #[cfg(any(feature = "cache", feature = "cranelift", feature = "winch"))] use std::path::Path; use wasmparser::WasmFeatures; -use wasmtime_environ::{ConfigTunables, TripleExt, Tunables}; +use wasmtime_environ::{ConfigTunables, IntraModuleInlining, TripleExt, Tunables}; #[cfg(feature = "runtime")] use crate::memory::MemoryCreator; @@ -431,8 +431,9 @@ impl Config { self } - /// Configures whether DWARF debug information will be emitted during - /// compilation. + /// Configures whether DWARF debug information will be emitted + /// during compilation for a native debugger on the Wasmtime + /// process to consume. /// /// Note that the `debug-builtins` compile-time Cargo feature must also be /// enabled for native debuggers such as GDB or LLDB to be able to debug @@ -440,11 +441,32 @@ impl Config { /// /// By default this option is `false`. /// **Note** Enabling this option is not compatible with the Winch compiler. - pub fn debug_info(&mut self, enable: bool) -> &mut Self { + pub fn native_debug_info(&mut self, enable: bool) -> &mut Self { self.tunables.generate_native_debuginfo = Some(enable); self } + /// Configures whether compiled code will be instrumented to + /// provide precise debug state at the Wasm VM level. + /// + /// Without this enabled, debugger-visible state is "best-effort": + /// we may be able to recover some Wasm locals or operand stack + /// values, but it is not guaranteed, even when optimizations are + /// disabled. + /// + /// When this is enabled, additional instrumentation is inserted + /// that directly tracks the Wasm VM state at every step. This has + /// some performance impact, but allows perfect debugging + /// fidelity. + /// + /// ***Note*** Enabling this option is not compatible with the + /// Winch compiler. + #[cfg(feature = "debug")] + pub fn debug_instrumentation(&mut self, enable: bool) -> &mut Self { + self.tunables.debug_instrumentation = Some(enable); + self + } + /// Configures whether [`WasmBacktrace`] will be present in the context of /// errors returned from Wasmtime. /// @@ -2062,6 +2084,17 @@ impl Config { self } + /// Whether to force all possible inlining. + pub fn compiler_force_inlining(&mut self, inlining: bool) -> &mut Self { + let inlining = if inlining { + IntraModuleInlining::Yes + } else { + IntraModuleInlining::No + }; + self.tunables.inlining_intra_module = Some(inlining); + self + } + /// Returns the set of features that the currently selected compiler backend /// does not support at all and may panic on. /// diff --git a/crates/wasmtime/src/engine/serialization.rs b/crates/wasmtime/src/engine/serialization.rs index c38d26671b8d..3e9506b017ef 100644 --- a/crates/wasmtime/src/engine/serialization.rs +++ b/crates/wasmtime/src/engine/serialization.rs @@ -278,6 +278,7 @@ impl Metadata<'_> { memory_reservation, memory_guard_size, generate_native_debuginfo, + debug_instrumentation, parse_wasm_debuginfo, consume_fuel, epoch_interruption, @@ -322,6 +323,11 @@ impl Metadata<'_> { other.generate_native_debuginfo, "debug information support", )?; + Self::check_bool( + debug_instrumentation, + other.debug_instrumentation, + "debug instrumentation", + )?; Self::check_bool( parse_wasm_debuginfo, other.parse_wasm_debuginfo, @@ -702,7 +708,7 @@ Caused by: assert_eq!(cache_config.cache_misses(), 1); let mut cfg = Config::new(); - cfg.debug_info(true) + cfg.native_debug_info(true) .cache(Some(Cache::from_file(Some(&config_path))?)); let engine = Engine::new(&cfg)?; let cache_config = engine diff --git a/crates/wasmtime/src/runtime.rs b/crates/wasmtime/src/runtime.rs index 8ff64707e810..a0ff5e0ec65e 100644 --- a/crates/wasmtime/src/runtime.rs +++ b/crates/wasmtime/src/runtime.rs @@ -31,7 +31,7 @@ pub(crate) mod func; pub(crate) mod code; pub(crate) mod code_memory; -#[cfg(feature = "debug-builtins")] +#[cfg(feature = "debug")] pub(crate) mod debug; #[cfg(feature = "gc")] pub(crate) mod exception; @@ -45,6 +45,8 @@ pub(crate) mod limits; pub(crate) mod linker; pub(crate) mod memory; pub(crate) mod module; +#[cfg(feature = "debug-builtins")] +pub(crate) mod native_debug; pub(crate) mod resources; pub(crate) mod store; pub(crate) mod trampoline; @@ -74,6 +76,8 @@ cfg_if::cfg_if! { } pub use code_memory::CodeMemory; +#[cfg(feature = "debug")] +pub use debug::*; #[cfg(feature = "gc")] pub use exception::*; pub use externals::*; diff --git a/crates/wasmtime/src/runtime/code_memory.rs b/crates/wasmtime/src/runtime/code_memory.rs index acb05e81e630..6777790b183f 100644 --- a/crates/wasmtime/src/runtime/code_memory.rs +++ b/crates/wasmtime/src/runtime/code_memory.rs @@ -36,6 +36,7 @@ pub struct CodeMemory { address_map_data: Range, stack_map_data: Range, exception_data: Range, + frame_tables_data: Range, func_name_data: Range, info_data: Range, wasm_dwarf: Range, @@ -122,6 +123,7 @@ impl CodeMemory { let mut has_native_debug_info = false; let mut trap_data = 0..0; let mut exception_data = 0..0; + let mut frame_tables_data = 0..0; let mut wasm_data = 0..0; let mut address_map_data = 0..0; let mut stack_map_data = 0..0; @@ -172,6 +174,7 @@ impl CodeMemory { obj::ELF_WASMTIME_STACK_MAP => stack_map_data = range, obj::ELF_WASMTIME_TRAPS => trap_data = range, obj::ELF_WASMTIME_EXCEPTIONS => exception_data = range, + obj::ELF_WASMTIME_FRAMES => frame_tables_data = range, obj::ELF_NAME_DATA => func_name_data = range, obj::ELF_WASMTIME_INFO => info_data = range, obj::ELF_WASMTIME_DWARF => wasm_dwarf = range, @@ -216,6 +219,7 @@ impl CodeMemory { address_map_data, stack_map_data, exception_data, + frame_tables_data, func_name_data, wasm_dwarf, info_data, @@ -277,6 +281,12 @@ impl CodeMemory { &self.mmap[self.exception_data.clone()] } + /// Returns the encoded frame-tables section to pass to + /// `wasmtime_environ::FrameTable::parse`. + pub fn frame_tables(&self) -> &[u8] { + &self.mmap[self.frame_tables_data.clone()] + } + /// Returns the contents of the `ELF_WASMTIME_INFO` section, or an empty /// slice if it wasn't found. #[inline] @@ -420,7 +430,7 @@ impl CodeMemory { // and anything else necessary that is done in "create_gdbjit_image" right now. let image = self.mmap().to_vec(); let text: &[u8] = self.text(); - let bytes = crate::debug::create_gdbjit_image(image, (text.as_ptr(), text.len()))?; + let bytes = crate::native_debug::create_gdbjit_image(image, (text.as_ptr(), text.len()))?; let reg = crate::runtime::vm::GdbJitImageRegistration::register(bytes); self.debug_registration = Some(reg); Ok(()) diff --git a/crates/wasmtime/src/runtime/debug.rs b/crates/wasmtime/src/runtime/debug.rs index 1fa4d430b88d..98bb6028f2f2 100644 --- a/crates/wasmtime/src/runtime/debug.rs +++ b/crates/wasmtime/src/runtime/debug.rs @@ -1,172 +1,332 @@ -use crate::prelude::*; -use core::mem::size_of; -use object::elf::*; -use object::endian::{BigEndian, Endian, Endianness, LittleEndian}; -use object::read::elf::{FileHeader, SectionHeader}; -use object::{ - File, NativeEndian as NE, Object, ObjectSection, ObjectSymbol, RelocationEncoding, - RelocationKind, RelocationTarget, U64Bytes, +//! Debugging API. + +use crate::{ + AnyRef, ExnRef, ExternRef, Func, Instance, Module, Val, ValType, + store::{AutoAssertNoGc, StoreOpaque}, + vm::{Backtrace, VMContext}, +}; +use alloc::vec::Vec; +use core::{ffi::c_void, ops::ControlFlow, ptr::NonNull}; +use wasmtime_environ::{ + DefinedFuncIndex, FrameInstPos, FrameStackShape, FrameStateSlot, FrameStateSlotOffset, + FrameTableDescriptorIndex, FrameValType, FuncKey, }; -use wasmtime_environ::obj; +use wasmtime_unwinder::Frame; -pub(crate) fn create_gdbjit_image( - mut bytes: Vec, - code_region: (*const u8, usize), -) -> Result, Error> { - let e = ensure_supported_elf_format(&bytes)?; +impl StoreOpaque { + /// Provide an object that captures Wasm stack state, including + /// Wasm VM-level values (locals and operand stack). + /// + /// This object views all activations for the current store that + /// are on the stack. An activation is a contiguous sequence of + /// Wasm frames (called functions) that were called from host code + /// and called back out to host code. If there are activations + /// from multiple stores on the stack, for example if Wasm code in + /// one store calls out to host code which invokes another Wasm + /// function in another store, then the other stores are "opaque" + /// to our view here in the same way that host code is. + /// + /// Returns `None` if debug instrumentation is not enabled for + /// the engine containing this store. + pub fn stack_values(&mut self) -> Option> { + if !self.engine().tunables().debug_instrumentation { + return None; + } - // patch relocs - relocate_dwarf_sections(&mut bytes, code_region)?; + let mut frames = vec![]; + Backtrace::trace(self, |frame| { + // `is_trapping_frame == false`: for now, we do not yet + // support capturing stack values after a trap, so the PC + // we use to look up metadata is always a "post-position" + // PC, i.e., a call's return address. + frames.extend(VirtualFrame::decode(self, frame, false)); + ControlFlow::Continue(()) + }); + Some(StackView { + store: self, + frames, + }) + } +} - // elf is still missing details... - match e { - Endianness::Little => { - convert_object_elf_to_loadable_file::(&mut bytes, code_region) - } - Endianness::Big => { - convert_object_elf_to_loadable_file::(&mut bytes, code_region) - } +/// A view of values in active Wasm stack frames. +/// +/// See the documentation on `Store::stack_value` for more information +/// about which frames this view will show. +pub struct StackView<'a> { + /// Mutable borrow held to the store. + /// + /// This both ensures that the stack does not mutate while we're + /// observing it (any borrow would do), and lets us create + /// host-API GC references as values that are references are read + /// off of the stack (a mutable borrow is needed for this). + store: &'a mut StoreOpaque, + + /// Pre-enumerated frames. We precompute this rather than walking + /// a true iterator finger up the stack (e.g., current FP and + /// current `CallThreadState`) because our existing unwinder logic + /// is written in a visit-with-closure style; and users of this + /// API are likely to visit every frame anyway, so + /// sparseness/efficiency is not a main concern here. + frames: Vec, +} + +/// Internal data pre-computed for one stack frame. +/// +/// This combines physical frame info (pc, fp) with the module this PC +/// maps to (yielding a frame table) and one frame as produced by the +/// progpoint lookup (Wasm PC, frame descriptor index, stack shape). +struct VirtualFrame { + /// The frame pointer. + fp: usize, + /// The resolved module handle for the physical PC. + /// + /// The module for each inlined frame within the physical frame is + /// resolved from the vmctx reachable for each such frame; this + /// module isused only for looking up the frame table. + module: Module, + /// The Wasm PC for this frame. + wasm_pc: u32, + /// The frame descriptor for this frame. + frame_descriptor: FrameTableDescriptorIndex, + /// The stack shape for this frame. + stack_shape: FrameStackShape, +} + +/// A view of a frame that can decode values in that frame. +pub struct FrameView<'a> { + frame_state_slot: FrameStateSlot<'a>, + store: &'a mut StoreOpaque, + slot_addr: usize, + wasm_pc: u32, + stack: Vec<(FrameStateSlotOffset, FrameValType)>, +} + +impl<'a> StackView<'a> { + /// Get a handle to a specific frame. + /// + /// # Panics + /// + /// Panics if the index is out of range. + pub fn frame(&mut self, index: usize) -> FrameView<'_> { + FrameView::new(self.store, &self.frames[index]) } - Ok(bytes) + /// Get the number of frames viewable on this stack. + pub fn len(&self) -> usize { + self.frames.len() + } } -fn relocate_dwarf_sections(bytes: &mut [u8], code_region: (*const u8, usize)) -> Result<(), Error> { - let mut relocations = Vec::new(); - let obj = File::parse(&bytes[..]).map_err(obj::ObjectCrateErrorWrapper)?; - for section in obj.sections() { - let section_start = match section.file_range() { - Some((start, _)) => start, - None => continue, +impl VirtualFrame { + fn decode(store: &StoreOpaque, frame: Frame, is_trapping_frame: bool) -> Vec { + let module = store + .modules() + .lookup_module_by_pc(frame.pc()) + .expect("Wasm frame PC does not correspond to a module"); + let base = module.code_object().code_memory().text().as_ptr() as usize; + let pc = frame.pc().wrapping_sub(base); + let table = module.frame_table(); + let pc = u32::try_from(pc).expect("PC offset too large"); + let pos = if is_trapping_frame { + FrameInstPos::Pre + } else { + FrameInstPos::Post }; - for (off, r) in section.relocations() { - if r.kind() != RelocationKind::Absolute - || r.encoding() != RelocationEncoding::Generic - || r.size() != 64 - { - continue; - } - - let sym = match r.target() { - RelocationTarget::Symbol(index) => match obj.symbol_by_index(index) { - Ok(sym) => sym, - Err(_) => continue, - }, - _ => continue, - }; - relocations.push(( - section_start + off, - (code_region.0 as u64) - .wrapping_add(sym.address()) - .wrapping_add(r.addend() as u64), - )); + let Some(program_points) = table.find_program_point(pc, pos) else { + return vec![]; + }; + + let mut frames: Vec<_> = program_points + .map(|(wasm_pc, frame_descriptor, stack_shape)| VirtualFrame { + fp: frame.fp(), + module: module.clone(), + wasm_pc, + frame_descriptor, + stack_shape, + }) + .collect(); + + // Reverse the frames so we return them inside-out, matching + // the bottom-up stack traversal order. + frames.reverse(); + frames + } +} + +impl<'a> FrameView<'a> { + fn new(store: &'a mut StoreOpaque, frame: &'a VirtualFrame) -> Self { + let frame_table = frame.module.frame_table(); + // Parse the frame descriptor. + let (data, slot_to_fp_offset) = frame_table + .frame_descriptor(frame.frame_descriptor) + .unwrap(); + let frame_state_slot = FrameStateSlot::parse(data).unwrap(); + let slot_addr = frame + .fp + .wrapping_sub(usize::try_from(slot_to_fp_offset).unwrap()); + // Materialize the stack shape so we have O(1) access to its elements. + let mut stack = frame_state_slot + .stack(frame.stack_shape) + .collect::>(); + stack.reverse(); // Put top-of-stack last. + FrameView { + store, + frame_state_slot, + slot_addr, + wasm_pc: frame.wasm_pc, + stack, } } - for (offset, value) in relocations { - let (loc, _) = offset - .try_into() - .ok() - .and_then(|offset| object::from_bytes_mut::>(&mut bytes[offset..]).ok()) - .ok_or_else(|| anyhow!("invalid dwarf relocations"))?; - loc.set(NE, value); + fn raw_instance(&mut self) -> &'a crate::vm::Instance { + // Read out the vmctx slot. + // SAFETY: vmctx is always at offset 0 in the slot. + let vmctx: *mut VMContext = unsafe { *(self.slot_addr as *mut _) }; + let vmctx = NonNull::new(vmctx).expect("null vmctx in debug state slot"); + // SAFETY: the stored vmctx value is a valid instance in this + // store; we only visit frames from this store in teh backtrace. + let instance = unsafe { crate::vm::Instance::from_vmctx(vmctx) }; + // SAFETY: the instance pointer read above is valid. + unsafe { instance.as_ref() } + } + + /// Get the instance associated with this frame. + pub fn instance(&mut self) -> Instance { + let instance = self.raw_instance(); + Instance::from_wasmtime(instance.id(), self.store) + } + + /// Get the module associated with this frame, if any (i.e., not a + /// container instance for a host-created entity). + pub fn module(&mut self) -> Option<&Module> { + let instance = self.raw_instance(); + instance.runtime_module() + } + + /// Get the raw function index associated with this frame, and the + /// PC as an offset within its code section, if it is a Wasm + /// function directly from the given `Module` (rather than a + /// trampoline). + pub fn wasm_function_index_and_pc(&mut self) -> Option<(DefinedFuncIndex, u32)> { + let FuncKey::DefinedWasmFunction(module, func) = self.frame_state_slot.func_key() else { + return None; + }; + debug_assert_eq!( + module, + self.module() + .expect("module should be defined if this is a defined function") + .env_module() + .module_index + ); + Some((func, self.wasm_pc)) + } + + /// Get the number of locals in this frame. + pub fn num_locals(&self) -> usize { + self.frame_state_slot.num_locals() + } + + /// Get the depth of the operand stack in this frame. + pub fn num_stacks(&self) -> usize { + self.stack.len() + } + + /// Get the type and value of the given local in this frame. + /// + /// # Panics + /// + /// Panics if the index is out-of-range (greater than + /// `num_locals()`). + pub fn local(&mut self, index: usize) -> (ValType, Val) { + let (offset, ty) = self.frame_state_slot.local(index).unwrap(); + // SAFETY: compiler produced metadata to describe this local + // slot and stored a value of the correct type into it. + unsafe { read_value(self.store, self.slot_addr, offset, ty) } + } + + /// Get the type and value of the given operand-stack value in + /// this frame. + /// + /// Index 0 corresponds to the bottom-of-stack, and higher indices + /// from there are more recently pushed values. In other words, + /// index order reads the Wasm virtual machine's abstract stack + /// state left-to-right. + pub fn stack(&mut self, index: usize) -> (ValType, Val) { + let (offset, ty) = self.stack[index]; + // SAFETY: compiler produced metadata to describe this + // operand-stack slot and stored a value of the correct type + // into it. + unsafe { read_value(self.store, self.slot_addr, offset, ty) } } - Ok(()) } -fn ensure_supported_elf_format(bytes: &[u8]) -> Result { - use object::elf::*; - use object::read::elf::*; +/// Read the value at the given offset. +/// +/// # Safety +/// +/// The `offset` and `ty` must correspond to a valid value written +/// to the frame by generated code of the correct type. This will +/// be the case if this information comes from the frame tables +/// (as long as the frontend that generates the tables and +/// instrumentation is correct, and as long as the tables are +/// preserved through serialization). +unsafe fn read_value( + store: &mut StoreOpaque, + slot_base: usize, + offset: FrameStateSlotOffset, + ty: FrameValType, +) -> (ValType, Val) { + let address = slot_base.wrapping_add(usize::try_from(offset.offset()).unwrap()); - let kind = match object::FileKind::parse(bytes) { - Ok(file) => file, - Err(err) => { - bail!("Failed to parse file: {}", err); + // SAFETY: each case reads a value from memory that should be + // valid according to our safety condition. + match ty { + FrameValType::I32 => { + let value = unsafe { *(address as *const i32) }; + (ValType::I32, Val::I32(value)) } - }; - let header = match kind { - object::FileKind::Elf64 => match object::elf::FileHeader64::::parse(bytes) { - Ok(header) => header, - Err(err) => { - bail!("Unsupported ELF file: {}", err); - } - }, - _ => { - bail!("only 64-bit ELF files currently supported") + FrameValType::I64 => { + let value = unsafe { *(address as *const i64) }; + (ValType::I64, Val::I64(value)) } - }; - let e = header.endian().unwrap(); - - match header.e_machine.get(e) { - EM_AARCH64 => (), - EM_X86_64 => (), - EM_S390 => (), - EM_RISCV => (), - machine => { - bail!("Unsupported ELF target machine: {:x}", machine); + FrameValType::F32 => { + let value = unsafe { *(address as *const u32) }; + (ValType::F32, Val::F32(value)) } - } - ensure!( - header.e_phoff.get(e) == 0 && header.e_phnum.get(e) == 0, - "program header table is empty" - ); - let e_shentsize = header.e_shentsize.get(e); - let req_shentsize = match e { - Endianness::Little => size_of::>(), - Endianness::Big => size_of::>(), - }; - ensure!(e_shentsize as usize == req_shentsize, "size of sh"); - Ok(e) -} - -fn convert_object_elf_to_loadable_file( - bytes: &mut Vec, - code_region: (*const u8, usize), -) { - let e = E::default(); - - let header = FileHeader64::::parse(&bytes[..]).unwrap(); - let sections = header.sections(e, &bytes[..]).unwrap(); - let text_range = match sections.section_by_name(e, b".text") { - Some((i, text)) => { - let range = text.file_range(e); - let e_shoff = usize::try_from(header.e_shoff.get(e)).unwrap(); - let off = e_shoff + i.0 * header.e_shentsize.get(e) as usize; - - let section: &mut SectionHeader64 = - object::from_bytes_mut(&mut bytes[off..]).unwrap().0; - // Patch vaddr, and save file location and its size. - section.sh_addr.set(e, code_region.0 as u64); - range + FrameValType::F64 => { + let value = unsafe { *(address as *const u64) }; + (ValType::F64, Val::F64(value)) + } + FrameValType::V128 => { + let value = unsafe { *(address as *const u128) }; + (ValType::V128, Val::V128(value.into())) + } + FrameValType::AnyRef => { + let mut nogc = AutoAssertNoGc::new(store); + let value = unsafe { *(address as *const u32) }; + let value = AnyRef::_from_raw(&mut nogc, value); + (ValType::ANYREF, Val::AnyRef(value)) + } + FrameValType::ExnRef => { + let mut nogc = AutoAssertNoGc::new(store); + let value = unsafe { *(address as *const u32) }; + let value = ExnRef::_from_raw(&mut nogc, value); + (ValType::EXNREF, Val::ExnRef(value)) + } + FrameValType::ExternRef => { + let mut nogc = AutoAssertNoGc::new(store); + let value = unsafe { *(address as *const u32) }; + let value = ExternRef::_from_raw(&mut nogc, value); + (ValType::EXTERNREF, Val::ExternRef(value)) + } + FrameValType::FuncRef => { + let value = unsafe { *(address as *const *mut c_void) }; + let value = unsafe { Func::_from_raw(store, value) }; + (ValType::EXTERNREF, Val::FuncRef(value)) + } + FrameValType::ContRef => { + unimplemented!("contref values are not implemented in the host API yet") } - None => None, - }; - - // LLDB wants segment with virtual address set, placing them at the end of ELF. - let ph_off = bytes.len(); - let e_phentsize = size_of::>(); - let e_phnum = 1; - bytes.resize(ph_off + e_phentsize * e_phnum, 0); - if let Some((sh_offset, sh_size)) = text_range { - let (v_offset, size) = code_region; - let program: &mut ProgramHeader64 = - object::from_bytes_mut(&mut bytes[ph_off..]).unwrap().0; - program.p_type.set(e, PT_LOAD); - program.p_offset.set(e, sh_offset); - program.p_vaddr.set(e, v_offset as u64); - program.p_paddr.set(e, v_offset as u64); - program.p_filesz.set(e, sh_size); - program.p_memsz.set(e, size as u64); - } else { - unreachable!(); } - - // It is somewhat loadable ELF file at this moment. - let header: &mut FileHeader64 = object::from_bytes_mut(bytes).unwrap().0; - header.e_type.set(e, ET_DYN); - header.e_phoff.set(e, ph_off as u64); - header - .e_phentsize - .set(e, u16::try_from(e_phentsize).unwrap()); - header.e_phnum.set(e, u16::try_from(e_phnum).unwrap()); } diff --git a/crates/wasmtime/src/runtime/func.rs b/crates/wasmtime/src/runtime/func.rs index 4f801593ea6b..a3a476423a45 100644 --- a/crates/wasmtime/src/runtime/func.rs +++ b/crates/wasmtime/src/runtime/func.rs @@ -2228,6 +2228,15 @@ impl Caller<'_, T> { pub fn fuel_async_yield_interval(&mut self, interval: Option) -> Result<()> { self.store.fuel_async_yield_interval(interval) } + + /// Provide an object that captures Wasm stack state, including + /// Wasm VM-level values (locals and operand stack). + /// + /// See ['Store::stack_values`] for more details. + #[cfg(feature = "debug")] + pub fn stack_values(&mut self) -> Option> { + self.store.stack_values() + } } impl AsContext for Caller<'_, T> { diff --git a/crates/wasmtime/src/runtime/module.rs b/crates/wasmtime/src/runtime/module.rs index 35b721405892..d044a5577d77 100644 --- a/crates/wasmtime/src/runtime/module.rs +++ b/crates/wasmtime/src/runtime/module.rs @@ -18,6 +18,8 @@ use core::ptr::NonNull; #[cfg(feature = "std")] use std::{fs::File, path::Path}; use wasmparser::{Parser, ValidPayload, Validator}; +#[cfg(feature = "debug")] +use wasmtime_environ::FrameTable; use wasmtime_environ::{ CompiledFunctionsTable, CompiledModuleInfo, EntityIndex, HostPtr, ModuleTypes, ObjectKind, TypeTrace, VMOffsets, VMSharedTypeIndex, @@ -1142,6 +1144,14 @@ impl Module { ExceptionTable::parse(self.inner.code.code_memory().exception_tables()) .expect("Exception tables were validated on module load") } + + /// Obtain a frame-table parser on this module's frame state slot + /// (debug instrumentation) metadata. + #[cfg(feature = "debug")] + pub(crate) fn frame_table<'a>(&'a self) -> FrameTable<'a> { + FrameTable::parse(self.inner.code.code_memory().frame_tables()) + .expect("Frame tables were validated on module load") + } } /// Describes a function for a given module. diff --git a/crates/wasmtime/src/runtime/module/registry.rs b/crates/wasmtime/src/runtime/module/registry.rs index 2167b2244699..a7836b4e3b8b 100644 --- a/crates/wasmtime/src/runtime/module/registry.rs +++ b/crates/wasmtime/src/runtime/module/registry.rs @@ -70,7 +70,7 @@ impl ModuleRegistry { } /// Fetches a registered module given a program counter value. - #[cfg(feature = "gc")] + #[cfg(any(feature = "gc", feature = "debug"))] pub fn lookup_module_by_pc(&self, pc: usize) -> Option<&Module> { let (module, _) = self.module_and_offset(pc)?; Some(module) diff --git a/crates/wasmtime/src/runtime/native_debug.rs b/crates/wasmtime/src/runtime/native_debug.rs new file mode 100644 index 000000000000..1fa4d430b88d --- /dev/null +++ b/crates/wasmtime/src/runtime/native_debug.rs @@ -0,0 +1,172 @@ +use crate::prelude::*; +use core::mem::size_of; +use object::elf::*; +use object::endian::{BigEndian, Endian, Endianness, LittleEndian}; +use object::read::elf::{FileHeader, SectionHeader}; +use object::{ + File, NativeEndian as NE, Object, ObjectSection, ObjectSymbol, RelocationEncoding, + RelocationKind, RelocationTarget, U64Bytes, +}; +use wasmtime_environ::obj; + +pub(crate) fn create_gdbjit_image( + mut bytes: Vec, + code_region: (*const u8, usize), +) -> Result, Error> { + let e = ensure_supported_elf_format(&bytes)?; + + // patch relocs + relocate_dwarf_sections(&mut bytes, code_region)?; + + // elf is still missing details... + match e { + Endianness::Little => { + convert_object_elf_to_loadable_file::(&mut bytes, code_region) + } + Endianness::Big => { + convert_object_elf_to_loadable_file::(&mut bytes, code_region) + } + } + + Ok(bytes) +} + +fn relocate_dwarf_sections(bytes: &mut [u8], code_region: (*const u8, usize)) -> Result<(), Error> { + let mut relocations = Vec::new(); + let obj = File::parse(&bytes[..]).map_err(obj::ObjectCrateErrorWrapper)?; + for section in obj.sections() { + let section_start = match section.file_range() { + Some((start, _)) => start, + None => continue, + }; + for (off, r) in section.relocations() { + if r.kind() != RelocationKind::Absolute + || r.encoding() != RelocationEncoding::Generic + || r.size() != 64 + { + continue; + } + + let sym = match r.target() { + RelocationTarget::Symbol(index) => match obj.symbol_by_index(index) { + Ok(sym) => sym, + Err(_) => continue, + }, + _ => continue, + }; + relocations.push(( + section_start + off, + (code_region.0 as u64) + .wrapping_add(sym.address()) + .wrapping_add(r.addend() as u64), + )); + } + } + + for (offset, value) in relocations { + let (loc, _) = offset + .try_into() + .ok() + .and_then(|offset| object::from_bytes_mut::>(&mut bytes[offset..]).ok()) + .ok_or_else(|| anyhow!("invalid dwarf relocations"))?; + loc.set(NE, value); + } + Ok(()) +} + +fn ensure_supported_elf_format(bytes: &[u8]) -> Result { + use object::elf::*; + use object::read::elf::*; + + let kind = match object::FileKind::parse(bytes) { + Ok(file) => file, + Err(err) => { + bail!("Failed to parse file: {}", err); + } + }; + let header = match kind { + object::FileKind::Elf64 => match object::elf::FileHeader64::::parse(bytes) { + Ok(header) => header, + Err(err) => { + bail!("Unsupported ELF file: {}", err); + } + }, + _ => { + bail!("only 64-bit ELF files currently supported") + } + }; + let e = header.endian().unwrap(); + + match header.e_machine.get(e) { + EM_AARCH64 => (), + EM_X86_64 => (), + EM_S390 => (), + EM_RISCV => (), + machine => { + bail!("Unsupported ELF target machine: {:x}", machine); + } + } + ensure!( + header.e_phoff.get(e) == 0 && header.e_phnum.get(e) == 0, + "program header table is empty" + ); + let e_shentsize = header.e_shentsize.get(e); + let req_shentsize = match e { + Endianness::Little => size_of::>(), + Endianness::Big => size_of::>(), + }; + ensure!(e_shentsize as usize == req_shentsize, "size of sh"); + Ok(e) +} + +fn convert_object_elf_to_loadable_file( + bytes: &mut Vec, + code_region: (*const u8, usize), +) { + let e = E::default(); + + let header = FileHeader64::::parse(&bytes[..]).unwrap(); + let sections = header.sections(e, &bytes[..]).unwrap(); + let text_range = match sections.section_by_name(e, b".text") { + Some((i, text)) => { + let range = text.file_range(e); + let e_shoff = usize::try_from(header.e_shoff.get(e)).unwrap(); + let off = e_shoff + i.0 * header.e_shentsize.get(e) as usize; + + let section: &mut SectionHeader64 = + object::from_bytes_mut(&mut bytes[off..]).unwrap().0; + // Patch vaddr, and save file location and its size. + section.sh_addr.set(e, code_region.0 as u64); + range + } + None => None, + }; + + // LLDB wants segment with virtual address set, placing them at the end of ELF. + let ph_off = bytes.len(); + let e_phentsize = size_of::>(); + let e_phnum = 1; + bytes.resize(ph_off + e_phentsize * e_phnum, 0); + if let Some((sh_offset, sh_size)) = text_range { + let (v_offset, size) = code_region; + let program: &mut ProgramHeader64 = + object::from_bytes_mut(&mut bytes[ph_off..]).unwrap().0; + program.p_type.set(e, PT_LOAD); + program.p_offset.set(e, sh_offset); + program.p_vaddr.set(e, v_offset as u64); + program.p_paddr.set(e, v_offset as u64); + program.p_filesz.set(e, sh_size); + program.p_memsz.set(e, size as u64); + } else { + unreachable!(); + } + + // It is somewhat loadable ELF file at this moment. + let header: &mut FileHeader64 = object::from_bytes_mut(bytes).unwrap().0; + header.e_type.set(e, ET_DYN); + header.e_phoff.set(e, ph_off as u64); + header + .e_phentsize + .set(e, u16::try_from(e_phentsize).unwrap()); + header.e_phnum.set(e, u16::try_from(e_phnum).unwrap()); +} diff --git a/crates/wasmtime/src/runtime/store.rs b/crates/wasmtime/src/runtime/store.rs index ca54b0f084d7..4bad060699a1 100644 --- a/crates/wasmtime/src/runtime/store.rs +++ b/crates/wasmtime/src/runtime/store.rs @@ -1167,6 +1167,25 @@ impl Store { pub fn has_pending_exception(&self) -> bool { self.inner.pending_exception.is_some() } + + /// Provide an object that captures Wasm stack state, including + /// Wasm VM-level values (locals and operand stack). + /// + /// This object views all activations for the current store that + /// are on the stack. An activation is a contiguous sequence of + /// Wasm frames (called functions) that were called from host code + /// and called back out to host code. If there are activations + /// from multiple stores on the stack, for example if Wasm code in + /// one store calls out to host code which invokes another Wasm + /// function in another store, then the other stores are "opaque" + /// to our view here in the same way that host code is. + /// + /// Returns `None` if debug instrumentation is not enabled for + /// the engine containing this store. + #[cfg(feature = "debug")] + pub fn stack_values(&mut self) -> Option> { + self.inner.stack_values() + } } impl<'a, T> StoreContext<'a, T> { @@ -1290,6 +1309,15 @@ impl<'a, T> StoreContextMut<'a, T> { pub fn has_pending_exception(&self) -> bool { self.0.inner.pending_exception.is_some() } + + /// Provide an object that captures Wasm stack state, including + /// Wasm VM-level values (locals and operand stack). + /// + /// See ['Store::stack_values`] for more details. + #[cfg(feature = "debug")] + pub fn stack_values(&mut self) -> Option> { + self.0.inner.stack_values() + } } impl StoreInner { diff --git a/crates/wasmtime/src/runtime/vm/instance.rs b/crates/wasmtime/src/runtime/vm/instance.rs index 41965ac15c77..f59be04881e4 100644 --- a/crates/wasmtime/src/runtime/vm/instance.rs +++ b/crates/wasmtime/src/runtime/vm/instance.rs @@ -346,7 +346,7 @@ impl Instance { self.runtime_info.env_module() } - #[cfg(feature = "gc")] + #[cfg(any(feature = "gc", feature = "debug"))] pub(crate) fn runtime_module(&self) -> Option<&crate::Module> { match &self.runtime_info { ModuleRuntimeInfo::Module(m) => Some(m), diff --git a/crates/wasmtime/src/runtime/vm/traphandlers/backtrace.rs b/crates/wasmtime/src/runtime/vm/traphandlers/backtrace.rs index 055c4f2aff69..c8f3b314fdc8 100644 --- a/crates/wasmtime/src/runtime/vm/traphandlers/backtrace.rs +++ b/crates/wasmtime/src/runtime/vm/traphandlers/backtrace.rs @@ -77,7 +77,7 @@ impl Backtrace { } /// Walk the current Wasm stack, calling `f` for each frame we walk. - #[cfg(feature = "gc")] + #[cfg(any(feature = "gc", feature = "debug"))] pub fn trace(store: &StoreOpaque, f: impl FnMut(Frame) -> ControlFlow<()>) { let vm_store_context = store.vm_store_context(); let unwind = store.unwinder(); @@ -136,6 +136,19 @@ impl Backtrace { /// If Wasm hit a trap, and we calling this from the trap handler, then the /// Wasm exit trampoline didn't run, and we use the provided PC and FP /// instead of looking them up in `VMStoreContext`. + /// + /// We define "current Wasm stack" here as "all activations + /// associated with the given store". That is: if we have a stack like + /// + /// ```plain + /// host --> (Wasm functions in store A) --> host --> (Wasm functions in store B) --> host + /// --> (Wasm functions in store A) --> host --> call `trace_with_trap_state` with store A + /// ``` + /// + /// then we will see the first and third Wasm activations (those + /// associated with store A), but not that with store B. In + /// essence, activations from another store might as well be some + /// other opaque host code; we don't know anything about it. pub(crate) unsafe fn trace_with_trap_state( vm_store_context: *const VMStoreContext, unwind: &dyn Unwind, diff --git a/crates/winch/src/builder.rs b/crates/winch/src/builder.rs index a7ddfbb3bab2..0c6b2b5e7132 100644 --- a/crates/winch/src/builder.rs +++ b/crates/winch/src/builder.rs @@ -67,6 +67,10 @@ impl CompilerBuilder for Builder { bail!("Winch does not currently support generating native debug information"); } + if tunables.debug_instrumentation { + bail!("Winch does not currently support debug instrumentation"); + } + self.tunables = Some(tunables.clone()); self.cranelift.set_tunables(tunables)?; Ok(()) diff --git a/examples/fib-debug/main.rs b/examples/fib-debug/main.rs index 55a6e84cafe2..b1025c0bc97d 100644 --- a/examples/fib-debug/main.rs +++ b/examples/fib-debug/main.rs @@ -16,7 +16,7 @@ fn main() -> Result<()> { // debugged in GDB. let engine = Engine::new( Config::new() - .debug_info(true) + .native_debug_info(true) .cranelift_opt_level(OptLevel::None), )?; let mut store = Store::new(&engine, ()); diff --git a/src/commands/objdump.rs b/src/commands/objdump.rs index bf0469331c2f..64d7a29aa827 100644 --- a/src/commands/objdump.rs +++ b/src/commands/objdump.rs @@ -14,7 +14,10 @@ use std::iter::{self, Peekable}; use std::path::{Path, PathBuf}; use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; use wasmtime::Engine; -use wasmtime_environ::{FilePos, StackMap, Trap, obj}; +use wasmtime_environ::{ + FilePos, FrameInstPos, FrameStackShape, FrameStateSlot, FrameTable, FrameTableDescriptorIndex, + StackMap, Trap, obj, +}; use wasmtime_unwinder::{ExceptionHandler, ExceptionTable}; /// A helper utility in wasmtime to explore the compiled object file format of @@ -70,6 +73,10 @@ pub struct ObjdumpCommand { /// Whether or not to show information about exception tables. #[arg(long, require_equals = true, value_name = "true|false")] exception_tables: Option>, + + /// Whether or not to show information about frame tables. + #[arg(long, require_equals = true, value_name = "true|false")] + frame_tables: Option>, } fn optional_flag_with_default(flag: Option>, default: bool) -> bool { @@ -97,6 +104,10 @@ impl ObjdumpCommand { optional_flag_with_default(self.exception_tables, true) } + fn frame_tables(&self) -> bool { + optional_flag_with_default(self.frame_tables, true) + } + /// Executes the command. pub fn execute(self) -> Result<()> { // Setup stdout handling color options. Also build some variables used @@ -150,6 +161,18 @@ impl ObjdumpCommand { .and_then(|bytes| ExceptionTable::parse(bytes).ok()) .map(|table| table.into_iter()) .map(|i| (Box::new(i) as Box>).peekable()), + frame_tables: elf + .section_by_name(obj::ELF_WASMTIME_FRAMES) + .and_then(|section| section.data().ok()) + .and_then(|bytes| FrameTable::parse(bytes).ok()) + .map(|table| table.into_program_points()) + .map(|i| (Box::new(i) as Box>).peekable()), + + frame_table_descriptors: elf + .section_by_name(obj::ELF_WASMTIME_FRAMES) + .and_then(|section| section.data().ok()) + .and_then(|bytes| FrameTable::parse(bytes).ok()), + objdump: &self, }; @@ -528,6 +551,21 @@ struct Decorator<'a> { stack_maps: Option)> + 'a>>>, exception_tables: Option, Vec)> + 'a>>>, + frame_tables: Option< + Peekable< + Box< + dyn Iterator< + Item = ( + u32, + FrameInstPos, + Vec<(u32, FrameTableDescriptorIndex, FrameStackShape)>, + ), + > + 'a, + >, + >, + >, + + frame_table_descriptors: Option>, } impl Decorator<'_> { @@ -536,6 +574,7 @@ impl Decorator<'_> { self.traps(address, post_list); self.stack_maps(address, post_list); self.exception_table(address, pre_list); + self.frame_table(address, pre_list, post_list); } fn addrmap(&mut self, address: u64, list: &mut Vec) { @@ -625,4 +664,65 @@ impl Decorator<'_> { } } } + + fn frame_table( + &mut self, + address: u64, + pre_list: &mut Vec, + post_list: &mut Vec, + ) { + if !self.objdump.frame_tables() { + return; + } + let (Some(frame_table_iter), Some(frame_tables)) = + (&mut self.frame_tables, &self.frame_table_descriptors) + else { + return; + }; + + while let Some((addr, pos, frames)) = + frame_table_iter.next_if(|(addr, _, _)| u64::from(*addr) <= address) + { + if u64::from(addr) != address { + continue; + } + let list = match pos { + // N.B.: the "post" position means that we are + // attached to the end of the previous instruction + // (its "post"); which means that from this + // instruction's PoV, we print before the instruction + // (the "pre list"). And vice versa for the "pre" + // position. Hence the reversal here. + FrameInstPos::Post => &mut *pre_list, + FrameInstPos::Pre => &mut *post_list, + }; + for (wasm_pc, frame_descriptor, stack_shape) in frames { + let (frame_descriptor_data, offset) = + frame_tables.frame_descriptor(frame_descriptor).unwrap(); + let frame_descriptor = FrameStateSlot::parse(frame_descriptor_data).unwrap(); + + let local_shape = Self::describe_local_shape(&frame_descriptor); + let stack_shape = Self::describe_stack_shape(&frame_descriptor, stack_shape); + let func_key = frame_descriptor.func_key(); + list.push(format!("debug frame state: func key {func_key:?}, wasm PC {wasm_pc}, slot at FP-0x{offset:x}, locals {local_shape}, stack {stack_shape}")); + } + } + } + + fn describe_local_shape(desc: &FrameStateSlot<'_>) -> String { + let mut parts = vec![]; + for (offset, ty) in desc.locals() { + parts.push(format!("{ty:?} @ slot+0x{:x}", offset.offset())); + } + parts.join(", ") + } + + fn describe_stack_shape(desc: &FrameStateSlot<'_>, shape: FrameStackShape) -> String { + let mut parts = vec![]; + for (offset, ty) in desc.stack(shape) { + parts.push(format!("{ty:?} @ slot+0x{:x}", offset.offset())); + } + parts.reverse(); + parts.join(", ") + } } diff --git a/tests/all/debug.rs b/tests/all/debug.rs new file mode 100644 index 000000000000..274a9b6d1d1e --- /dev/null +++ b/tests/all/debug.rs @@ -0,0 +1,107 @@ +//! Tests for instrumentation-based debugging. + +use wasmtime::{Caller, Config, Engine, Extern, Func, Instance, Module, Store, ValType}; + +fn test_stack_values) + Send + Sync + 'static>( + wat: &str, + c: C, + f: F, +) -> anyhow::Result<()> { + let mut config = Config::default(); + config.debug_instrumentation(true); + config.wasm_exceptions(true); + c(&mut config); + let engine = Engine::new(&config)?; + let module = Module::new(&engine, wat)?; + + let mut store = Store::new(&engine, ()); + let func = Func::wrap(&mut store, move |caller: Caller<'_, ()>| { + f(caller); + }); + let instance = Instance::new(&mut store, &module, &[Extern::Func(func)])?; + let mut results = []; + instance + .get_func(&mut store, "main") + .unwrap() + .call(&mut store, &[], &mut results)?; + + Ok(()) +} + +#[test] +fn stack_values_two_frames() -> anyhow::Result<()> { + let _ = env_logger::try_init(); + + for inlining in [false, true] { + test_stack_values( + r#" + (module + (import "" "host" (func)) + (func (export "main") + i32.const 1 + i32.const 2 + call 2 + drop) + (func (param i32 i32) (result i32) + local.get 0 + local.get 1 + call 0 + i32.add)) + "#, + |config| { + config.compiler_inlining(inlining); + config.compiler_force_inlining(inlining); + }, + |mut caller: Caller<'_, ()>| { + let mut stack = caller.stack_values().unwrap(); + assert_eq!(stack.len(), 2); + let mut frame = stack.frame(0); + assert_eq!(frame.wasm_function_index_and_pc().unwrap().0.as_u32(), 1); + assert_eq!(frame.wasm_function_index_and_pc().unwrap().1, 65); + + assert_eq!(frame.num_locals(), 2); + assert_eq!(frame.num_stacks(), 2); + assert!(matches!(frame.local(0).0, ValType::I32)); + assert!(matches!(frame.local(1).0, ValType::I32)); + assert_eq!(frame.local(0).1.unwrap_i32(), 1); + assert_eq!(frame.local(1).1.unwrap_i32(), 2); + assert!(matches!(frame.stack(0).0, ValType::I32)); + assert!(matches!(frame.stack(1).0, ValType::I32)); + assert_eq!(frame.stack(0).1.unwrap_i32(), 1); + assert_eq!(frame.stack(1).1.unwrap_i32(), 2); + + let mut frame = stack.frame(1); + assert_eq!(frame.wasm_function_index_and_pc().unwrap().0.as_u32(), 0); + assert_eq!(frame.wasm_function_index_and_pc().unwrap().1, 55); + }, + )?; + } + Ok(()) +} + +#[test] +fn stack_values_exceptions() -> anyhow::Result<()> { + test_stack_values( + r#" + (module + (tag $t (param i32)) + (import "" "host" (func)) + (func (export "main") + (block $b (result i32) + (try_table (catch $t $b) + (throw $t (i32.const 42))) + i32.const 0) + (call 0) + (drop))) + "#, + |_config| {}, + |mut caller: Caller<'_, ()>| { + let mut stack = caller.stack_values().unwrap(); + assert_eq!(stack.len(), 1); + let mut frame = stack.frame(0); + assert_eq!(frame.num_stacks(), 1); + assert!(matches!(frame.stack(0).0, ValType::I32)); + assert_eq!(frame.stack(0).1.unwrap_i32(), 42); + }, + ) +} diff --git a/tests/all/main.rs b/tests/all/main.rs index c91f7bb78ca8..5e3568fda87f 100644 --- a/tests/all/main.rs +++ b/tests/all/main.rs @@ -35,6 +35,7 @@ mod memory_creator; mod module; mod module_serialize; mod name; +mod native_debug; mod noextern; mod piped_tests; mod pooling_allocator; diff --git a/tests/all/debug/dump.rs b/tests/all/native_debug/dump.rs similarity index 100% rename from tests/all/debug/dump.rs rename to tests/all/native_debug/dump.rs diff --git a/tests/all/debug/gdb.rs b/tests/all/native_debug/gdb.rs similarity index 98% rename from tests/all/debug/gdb.rs rename to tests/all/native_debug/gdb.rs index 9a124de51cea..c5e104ee11c6 100644 --- a/tests/all/debug/gdb.rs +++ b/tests/all/native_debug/gdb.rs @@ -55,7 +55,7 @@ fn test_debug_dwarf_gdb() -> Result<()> { let output = gdb_with_script( &[ "-Ccache=n", - "-Ddebug-info", + "-Dnative-debug-info", "-Oopt-level=0", "--invoke", "fib", diff --git a/tests/all/debug/lldb.rs b/tests/all/native_debug/lldb.rs similarity index 93% rename from tests/all/debug/lldb.rs rename to tests/all/native_debug/lldb.rs index d64387fe2688..ebce6094a4b9 100644 --- a/tests/all/debug/lldb.rs +++ b/tests/all/native_debug/lldb.rs @@ -85,7 +85,7 @@ pub fn dwarf_fib_wasm() -> Result<()> { let output = lldb_with_script( &[ "-Ccache=n", - "-Ddebug-info", + "-Dnative-debug-info", "-Oopt-level=0", "--invoke", "fib", @@ -123,7 +123,7 @@ pub fn dwarf_fib_wasm_dwarf5() -> Result<()> { let output = lldb_with_script( &[ "-Ccache=n", - "-Ddebug-info", + "-Dnative-debug-info", "-Oopt-level=0", "--invoke", "fib", @@ -161,7 +161,7 @@ pub fn dwarf_fib_wasm_split4() -> Result<()> { let output = lldb_with_script( &[ "-Ccache=n", - "-Ddebug-info", + "-Dnative-debug-info", "-Oopt-level=0", "--invoke", "fib", @@ -197,7 +197,12 @@ check: exited with status #[ignore] pub fn dwarf_generic() -> Result<()> { let output = lldb_with_script( - &["-Ccache=n", "-Ddebug-info", "-Oopt-level=0", DWARF_GENERIC], + &[ + "-Ccache=n", + "-Dnative-debug-info", + "-Oopt-level=0", + DWARF_GENERIC, + ], r#"br set -n debug_break -C up r p __vmctx->set() @@ -256,7 +261,7 @@ pub fn dwarf_codegen_optimized() -> Result<()> { let output = lldb_with_script( &[ "-Ccache=n", - "-Ddebug-info", + "-Dnative-debug-info", "-Oopt-level=2", DWARF_CODEGEN_OPTIMIZED, ], @@ -291,7 +296,7 @@ pub fn dwarf_codegen_optimized_wasm_optimized() -> Result<()> { let output = lldb_with_script( &[ "-Ccache=n", - "-Ddebug-info", + "-Dnative-debug-info", "-Oopt-level=2", DWARF_CODEGEN_OPTIMIZED_WASM_OPTIMIZED, ], @@ -327,7 +332,7 @@ pub fn dwarf_fraction_norm() -> Result<()> { &[ "-Ccache=n", "-Oopt-level=0", - "-Ddebug-info", + "-Dnative-debug-info", DWARF_FRACTION_NORM, ], r#"b dwarf_fraction_norm.cc:26 @@ -357,7 +362,7 @@ pub fn dwarf_two_removed_branches() -> Result<()> { &[ "-Ccache=n", "-Oopt-level=0", - "-Ddebug-info", + "-Dnative-debug-info", DWARF_TWO_REMOVED_BRANCHES, ], r#"r"#, @@ -380,7 +385,7 @@ pub fn dwarf_spilled_frame_base() -> Result<()> { &[ "-Ccache=n", "-Oopt-level=0", - "-Ddebug-info", + "-Dnative-debug-info", DWARF_SPILLED_FRAME_BASE, ], r#"b dwarf_spilled_frame_base.c:13 @@ -421,7 +426,12 @@ check: exited with status #[ignore] pub fn dwarf_fission() -> Result<()> { let output = lldb_with_script( - &["-Ccache=n", "-Ddebug-info", "-Oopt-level=0", DWARF_FISSION], + &[ + "-Ccache=n", + "-Dnative-debug-info", + "-Oopt-level=0", + DWARF_FISSION, + ], r#"breakpoint set --file dwarf_fission.c --line 8 r fr v @@ -449,7 +459,7 @@ check: exited with status = 0 fn test_dwarf_simple(wasm: &str, extra_args: &[&str]) -> Result<()> { println!("testing {wasm:?}"); - let mut args = vec!["-Ccache=n", "-Oopt-level=0", "-Ddebug-info"]; + let mut args = vec!["-Ccache=n", "-Oopt-level=0", "-Dnative-debug-info"]; args.extend(extra_args); args.push(wasm); let output = lldb_with_script( @@ -517,7 +527,7 @@ fn dwarf_multiple_codegen_units() -> Result<()> { ] { println!("testing {wasm:?}"); let output = lldb_with_script( - &["-Ccache=n", "-Oopt-level=0", "-Ddebug-info", wasm], + &["-Ccache=n", "-Oopt-level=0", "-Dnative-debug-info", wasm], r#" breakpoint set --file dwarf_multiple_codegen_units.rs --line 3 breakpoint set --file dwarf_multiple_codegen_units.rs --line 10 diff --git a/tests/all/debug/mod.rs b/tests/all/native_debug/mod.rs similarity index 100% rename from tests/all/debug/mod.rs rename to tests/all/native_debug/mod.rs diff --git a/tests/all/debug/obj.rs b/tests/all/native_debug/obj.rs similarity index 95% rename from tests/all/debug/obj.rs rename to tests/all/native_debug/obj.rs index 4395da771b9d..feb993b99f72 100644 --- a/tests/all/debug/obj.rs +++ b/tests/all/native_debug/obj.rs @@ -12,7 +12,7 @@ pub fn compile_cranelift( output: impl AsRef, ) -> Result<()> { let mut config = Config::new(); - config.debug_info(true); + config.native_debug_info(true); if let Some(target) = target { config.target(&target.to_string())?; } diff --git a/tests/all/debug/satisfy_memory_import.wat b/tests/all/native_debug/satisfy_memory_import.wat similarity index 100% rename from tests/all/debug/satisfy_memory_import.wat rename to tests/all/native_debug/satisfy_memory_import.wat diff --git a/tests/all/debug/simulate.rs b/tests/all/native_debug/simulate.rs similarity index 100% rename from tests/all/debug/simulate.rs rename to tests/all/native_debug/simulate.rs diff --git a/tests/all/debug/translate.rs b/tests/all/native_debug/translate.rs similarity index 100% rename from tests/all/debug/translate.rs rename to tests/all/native_debug/translate.rs diff --git a/tests/all/pulley.rs b/tests/all/pulley.rs index 870d621c34b0..427adf77d06c 100644 --- a/tests/all/pulley.rs +++ b/tests/all/pulley.rs @@ -487,7 +487,7 @@ async fn pulley_provenance_test_async_components() -> Result<()> { #[cfg(not(miri))] fn enabling_debug_info_doesnt_break_anything() -> Result<()> { let mut config = pulley_config(); - config.debug_info(true); + config.native_debug_info(true); let engine = Engine::new(&config)?; assert!(Module::from_file(&engine, "./tests/all/cli_tests/greeter_command.wat").is_err()); Ok(()) diff --git a/tests/all/winch_engine_features.rs b/tests/all/winch_engine_features.rs index 27f3508cae29..954ce213a277 100644 --- a/tests/all/winch_engine_features.rs +++ b/tests/all/winch_engine_features.rs @@ -52,7 +52,7 @@ fn ensure_compatibility_between_winch_and_signals_based_traps(config: &mut Confi fn ensure_compatibility_between_winch_and_generate_native_debuginfo( config: &mut Config, ) -> Result<()> { - config.debug_info(true); + config.native_debug_info(true); let result = Engine::new(&config); match result { Ok(_) => { diff --git a/tests/disas/debug-exceptions.wat b/tests/disas/debug-exceptions.wat new file mode 100644 index 000000000000..4a91001ce027 --- /dev/null +++ b/tests/disas/debug-exceptions.wat @@ -0,0 +1,86 @@ +;;! target = "aarch64" +;;! test = "compile" +;;! flags = ["-Wexceptions=yes", "-Wgc=yes", "-Ddebug-instrumentation=yes"] + +(module + (tag $t (param i32)) + (import "" "host" (func)) + (func (export "main") + (block $b (result i32) + (try_table (catch $t $b) + (throw $t (i32.const 42))) + i32.const 0) + (call 0) + (drop))) +;; wasm[0]::function[1]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; ldur x16, [x2, #8] +;; ldur x16, [x16, #0x10] +;; add x16, x16, #0xc0 +;; cmp sp, x16 +;; b.lo #0x10c +;; 1c: stp x27, x28, [sp, #-0x10]! +;; stp x25, x26, [sp, #-0x10]! +;; stp x23, x24, [sp, #-0x10]! +;; stp x21, x22, [sp, #-0x10]! +;; stp x19, x20, [sp, #-0x10]! +;; stp d14, d15, [sp, #-0x10]! +;; stp d12, d13, [sp, #-0x10]! +;; stp d10, d11, [sp, #-0x10]! +;; stp d8, d9, [sp, #-0x10]! +;; sub sp, sp, #0x20 +;; stur x2, [sp] +;; stur x2, [sp, #0x10] +;; mov w24, #0x2a +;; stur w24, [sp, #8] +;; ldur x2, [sp, #0x10] +;; bl #0x318 +;; 5c: mov x21, x2 +;; mov w3, #0x4000000 +;; mov w4, #2 +;; mov w5, #0x28 +;; mov w6, #8 +;; ldur x2, [sp, #0x10] +;; bl #0x2a4 +;; 78: ldur x4, [sp, #0x10] +;; ldr x9, [x4, #8] +;; ldr x13, [x9, #0x18] +;; add x9, x13, #0x20 +;; str w24, [x9, w2, uxtw] +;; add x10, x13, #0x18 +;; mov x12, x21 +;; str w12, [x10, w2, uxtw] +;; mov w11, #0 +;; add x12, x13, #0x1c +;; stur x13, [sp, #0x18] +;; str w11, [x12, w2, uxtw] +;; mov x3, x2 +;; ldur x2, [sp, #0x10] +;; bl #0x350 +;; ├─╼ exception frame offset: SP = FP - 0xb0 +;; ╰─╼ exception handler: tag=0, context at [SP+0x10], handler=0xb8 +;; b4: .byte 0x1f, 0xc1, 0x00, 0x00 +;; ldur x13, [sp, #0x18] +;; add x13, x13, #0x20 +;; ldr w15, [x13, w0, uxtw] +;; stur w15, [sp, #8] +;; ldur x2, [sp, #0x10] +;; ldr x0, [x2, #0x30] +;; ldr x2, [x2, #0x40] +;; ldur x3, [sp, #0x10] +;; blr x0 +;; ╰─╼ debug frame state: func key DefinedWasmFunction(StaticModuleIndex(0), DefinedFuncIndex(0)), wasm PC 69, slot at FP-0xb0, locals , stack I32 @ slot+0x8 +;; dc: add sp, sp, #0x20 +;; ldp d8, d9, [sp], #0x10 +;; ldp d10, d11, [sp], #0x10 +;; ldp d12, d13, [sp], #0x10 +;; ldp d14, d15, [sp], #0x10 +;; ldp x19, x20, [sp], #0x10 +;; ldp x21, x22, [sp], #0x10 +;; ldp x23, x24, [sp], #0x10 +;; ldp x25, x26, [sp], #0x10 +;; ldp x27, x28, [sp], #0x10 +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 10c: .byte 0x1f, 0xc1, 0x00, 0x00 From 1e287cee5fff53d492a08c275d26798af0bcedb9 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 2 Oct 2025 15:39:21 -0700 Subject: [PATCH 2/9] Update to new APIs on Cranelift side. --- crates/cranelift/src/compiled_function.rs | 7 ++- crates/cranelift/src/compiler.rs | 61 ++++++++++++++++++----- crates/cranelift/src/func_environ.rs | 31 ++++-------- crates/environ/src/compile/frame_table.rs | 4 +- crates/environ/src/compile/mod.rs | 2 +- crates/environ/src/key.rs | 19 +++++++ crates/wasmtime/src/compile.rs | 12 ++--- crates/winch/src/compiler.rs | 4 +- 8 files changed, 96 insertions(+), 44 deletions(-) diff --git a/crates/cranelift/src/compiled_function.rs b/crates/cranelift/src/compiled_function.rs index 9fa91295d6eb..ac1da8676532 100644 --- a/crates/cranelift/src/compiled_function.rs +++ b/crates/cranelift/src/compiled_function.rs @@ -3,7 +3,9 @@ use cranelift_codegen::{ Final, MachBufferFinalized, MachBufferFrameLayout, MachSrcLoc, ValueLabelsRanges, ir, isa::unwind::CfaUnwindInfo, isa::unwind::UnwindInfo, }; -use wasmtime_environ::{FilePos, InstructionAddressMap, PrimaryMap, TrapInformation}; +use wasmtime_environ::{ + FilePos, FrameStateSlotBuilder, InstructionAddressMap, PrimaryMap, TrapInformation, +}; #[derive(Debug, Clone, PartialEq, Eq, Default)] /// Metadata to translate from binary offsets back to the original @@ -61,6 +63,8 @@ pub struct CompiledFunction { /// The metadata for the compiled function, including unwind information /// the function address map. metadata: CompiledFunctionMetadata, + /// Debug metadata for the top-level function's state slot. + pub debug_slot_descriptor: Option, } impl CompiledFunction { @@ -77,6 +81,7 @@ impl CompiledFunction { name_map, alignment, metadata: Default::default(), + debug_slot_descriptor: None, } } diff --git a/crates/cranelift/src/compiler.rs b/crates/cranelift/src/compiler.rs index e5811fe33621..112069d78d4b 100644 --- a/crates/cranelift/src/compiler.rs +++ b/crates/cranelift/src/compiler.rs @@ -34,10 +34,10 @@ use wasmparser::{FuncValidatorAllocations, FunctionBody}; use wasmtime_environ::obj::{ELF_WASMTIME_EXCEPTIONS, ELF_WASMTIME_FRAMES}; use wasmtime_environ::{ Abi, AddressMapSection, BuiltinFunctionIndex, CacheStore, CompileError, CompiledFunctionBody, - DefinedFuncIndex, FlagValue, FrameInstPos, FrameStackShape, FrameTableBuilder, FuncKey, - FunctionBodyData, FunctionLoc, HostCall, InliningCompiler, ModuleTranslation, - ModuleTypesBuilder, PtrSize, StackMapSection, StaticModuleIndex, TrapEncodingBuilder, - TrapSentinel, TripleExt, Tunables, VMOffsets, WasmFuncType, WasmValType, + DefinedFuncIndex, FlagValue, FrameInstPos, FrameStackShape, FrameStateSlotBuilder, + FrameTableBuilder, FuncKey, FunctionBodyData, FunctionLoc, HostCall, InliningCompiler, + ModuleTranslation, ModuleTypesBuilder, PtrSize, StackMapSection, StaticModuleIndex, + TrapEncodingBuilder, TrapSentinel, TripleExt, Tunables, VMOffsets, WasmFuncType, WasmValType, }; use wasmtime_unwinder::ExceptionTableBuilder; @@ -56,6 +56,7 @@ struct CompilerContext { codegen_context: Context, incremental_cache_ctx: Option, validator_allocations: FuncValidatorAllocations, + debug_slot_descriptor: Option, abi: Option, } @@ -66,6 +67,7 @@ impl Default for CompilerContext { codegen_context: Context::new(), incremental_cache_ctx: None, validator_allocations: Default::default(), + debug_slot_descriptor: None, abi: None, } } @@ -329,13 +331,19 @@ impl wasmtime_environ::Compiler for Compiler { .map_err(|e| CompileError::Codegen(e.to_string()))?; } + let needs_gc_heap = func_env.needs_gc_heap(); + + if let Some((_, slot_builder)) = func_env.state_slot { + compiler.cx.debug_slot_descriptor = Some(slot_builder); + } + let timing = cranelift_codegen::timing::take_current(); log::debug!("`{symbol}` translated to CLIF in {:?}", timing.total()); log::trace!("`{symbol}` timing info\n{timing}"); Ok(CompiledFunctionBody { code: box_dyn_any_compiler_context(Some(compiler.cx)), - needs_gc_heap: func_env.needs_gc_heap(), + needs_gc_heap, }) } @@ -561,12 +569,12 @@ impl wasmtime_environ::Compiler for Compiler { fn append_code( &self, obj: &mut Object<'static>, - funcs: &[(String, Box)], + funcs: &[(String, FuncKey, Box)], resolve_reloc: &dyn Fn(usize, FuncKey) -> usize, ) -> Result> { log::trace!( "appending functions to object file: {:#?}", - funcs.iter().map(|(sym, _)| sym).collect::>() + funcs.iter().map(|(sym, _, _)| sym).collect::>() ); let mut builder = @@ -580,8 +588,24 @@ impl wasmtime_environ::Compiler for Compiler { let mut exception_tables = ExceptionTableBuilder::default(); let mut frame_tables = FrameTableBuilder::default(); + let mut frame_descriptors = HashMap::new(); + if self.tunables.debug_instrumentation { + for (_, key, func) in funcs { + debug_assert!(!func.is::>()); + debug_assert!(func.is::()); + let func = func.downcast_ref::().unwrap(); + frame_descriptors.insert( + *key, + func.debug_slot_descriptor + .as_ref() + .map(|builder| builder.serialize()) + .unwrap_or_else(|| vec![]), + ); + } + } + let mut ret = Vec::with_capacity(funcs.len()); - for (i, (sym, func)) in funcs.iter().enumerate() { + for (i, (sym, _key, func)) in funcs.iter().enumerate() { debug_assert!(!func.is::>()); debug_assert!(func.is::()); let func = func.downcast_ref::().unwrap(); @@ -614,6 +638,7 @@ impl wasmtime_environ::Compiler for Compiler { range.clone(), func.buffer.debug_tags(), frame_layout, + &frame_descriptors, )?; } builder.append_padding(self.linkopts.padding_between_functions); @@ -1405,6 +1430,10 @@ impl FunctionCompiler<'_> { } } + if let Some(builder) = self.cx.debug_slot_descriptor.take() { + compiled_function.debug_slot_descriptor = Some(builder); + } + if body_and_tunables .map(|(_, t)| t.generate_native_debuginfo) .unwrap_or(false) @@ -1477,8 +1506,9 @@ fn clif_to_env_frame_tables<'a>( range: Range, tag_sites: impl Iterator>, frame_layout: &MachBufferFrameLayout, + frame_descriptors: &HashMap>, ) -> anyhow::Result<()> { - let mut frame_descriptors = HashMap::new(); + let mut frame_descriptor_indices = HashMap::new(); for tag_site in tag_sites { // Split into frames; each has three debug tags. let mut frames = vec![]; @@ -1492,11 +1522,18 @@ fn clif_to_env_frame_tables<'a>( panic!("Invalid tags"); }; - let frame_descriptor = *frame_descriptors.entry(slot).or_insert_with(|| { + let func_key = frame_layout.stackslots[slot] + .key + .expect("Key must be present on stackslot used as state slot") + .bits(); + let func_key = FuncKey::from_raw_u64(func_key); + let frame_descriptor = *frame_descriptor_indices.entry(slot).or_insert_with(|| { let slot_to_fp_offset = frame_layout.frame_to_fp_offset - frame_layout.stackslots[slot].offset; - let descriptor = frame_layout.stackslots[slot].descriptor.clone(); - builder.add_frame_descriptor(slot_to_fp_offset, descriptor) + let descriptor = frame_descriptors + .get(&func_key) + .expect("frame descriptor not present for FuncKey"); + builder.add_frame_descriptor(slot_to_fp_offset, &descriptor) }); frames.push(( diff --git a/crates/cranelift/src/func_environ.rs b/crates/cranelift/src/func_environ.rs index 91ff563c60ae..477e2da22076 100644 --- a/crates/cranelift/src/func_environ.rs +++ b/crates/cranelift/src/func_environ.rs @@ -1197,12 +1197,14 @@ impl<'module_environment> FuncEnvironment<'module_environment> { // Initially zero-size and with no descriptor; we will fill in // this info once we're done with the function body. - let slot = builder.func.create_sized_stack_slot(ir::StackSlotData::new( - ir::StackSlotKind::ExplicitSlot, - 0, - 0, - vec![], - )); + let slot = builder + .func + .create_sized_stack_slot(ir::StackSlotData::new_with_key( + ir::StackSlotKind::ExplicitSlot, + 0, + 0, + ir::StackSlotKey::new(self.key.into_raw_u64()), + )); self.state_slot = Some((slot, frame_builder)); } @@ -1287,21 +1289,8 @@ impl<'module_environment> FuncEnvironment<'module_environment> { } } - fn set_debug_tags( - &self, - builder: &mut FunctionBuilder, - stack: &FuncTranslationStacks, - srcloc: ir::SourceLoc, - ) { - if self.state_slot.is_some() { - let tags = self.debug_tags(stack, srcloc); - builder.set_debug_tags(tags); - } - } - fn finish_debug_metadata(&self, builder: &mut FunctionBuilder) { if let Some((slot, b)) = &self.state_slot { - builder.func.sized_stack_slots[*slot].descriptor = b.serialize(); builder.func.sized_stack_slots[*slot].size = b.size(); } } @@ -1335,7 +1324,9 @@ impl<'module_environment> FuncEnvironment<'module_environment> { srcloc: ir::SourceLoc, ) -> WasmResult<()> { if stack.reachable() { - self.set_debug_tags(builder, stack, srcloc); + let inst = builder.ins().sequence_point(); + let tags = self.debug_tags(stack, srcloc); + builder.func.debug_tags.set(inst, tags); } Ok(()) } diff --git a/crates/environ/src/compile/frame_table.rs b/crates/environ/src/compile/frame_table.rs index 7757c69c95cd..944616a16883 100644 --- a/crates/environ/src/compile/frame_table.rs +++ b/crates/environ/src/compile/frame_table.rs @@ -231,10 +231,10 @@ impl FrameTableBuilder { pub fn add_frame_descriptor( &mut self, slot_to_fp_offset: u32, - data: Vec, + data: &[u8], ) -> FrameTableDescriptorIndex { let start = u32::try_from(self.frame_descriptor_data.len()).unwrap(); - self.frame_descriptor_data.extend(data); + self.frame_descriptor_data.extend(data.iter().cloned()); let end = u32::try_from(self.frame_descriptor_data.len()).unwrap(); let index = FrameTableDescriptorIndex( diff --git a/crates/environ/src/compile/mod.rs b/crates/environ/src/compile/mod.rs index df2d64acff0b..034923b769e9 100644 --- a/crates/environ/src/compile/mod.rs +++ b/crates/environ/src/compile/mod.rs @@ -333,7 +333,7 @@ pub trait Compiler: Send + Sync { fn append_code( &self, obj: &mut Object<'static>, - funcs: &[(String, Box)], + funcs: &[(String, FuncKey, Box)], resolve_reloc: &dyn Fn(usize, FuncKey) -> usize, ) -> Result>; diff --git a/crates/environ/src/key.rs b/crates/environ/src/key.rs index b05f6b3fed92..f09d108d6e7d 100644 --- a/crates/environ/src/key.rs +++ b/crates/environ/src/key.rs @@ -392,6 +392,25 @@ impl FuncKey { } } + /// Create a key from a raw packed `u64` representation. + /// + /// Should only be given a value produced by `into_raw_u64()`. + /// + /// Panics when given an invalid value. + pub fn from_raw_u64(value: u64) -> Self { + let hi = u32::try_from(value >> 32).unwrap(); + let lo = u32::try_from(value & 0xffff_ffff).unwrap(); + FuncKey::from_raw_parts(hi, lo) + } + + /// Produce a packed `u64` representation of this key. + /// + /// May be used with `from_raw_64()` to reconstruct this key. + pub fn into_raw_u64(&self) -> u64 { + let (hi, lo) = self.into_raw_parts(); + (u64::from(hi) << 32) | u64::from(lo) + } + /// Unwrap a `FuncKey::DefinedWasmFunction` or else panic. pub fn unwrap_defined_wasm_function(self) -> (StaticModuleIndex, DefinedFuncIndex) { match self { diff --git a/crates/wasmtime/src/compile.rs b/crates/wasmtime/src/compile.rs index 5d210bd58c4d..1a2796394b08 100644 --- a/crates/wasmtime/src/compile.rs +++ b/crates/wasmtime/src/compile.rs @@ -890,7 +890,7 @@ impl UnlinkedCompileOutputs<'_> { needs_gc_heap |= output.function.needs_gc_heap; let index = compiled_funcs.len(); - compiled_funcs.push((output.symbol, output.function.code)); + compiled_funcs.push((output.symbol, output.key, output.function.code)); if output.start_srcloc != FilePos::none() { indices @@ -913,9 +913,9 @@ impl UnlinkedCompileOutputs<'_> { struct PreLinkOutput { /// Whether or not any of these functions require a GC heap needs_gc_heap: bool, - /// The flattened list of (symbol name, compiled function) pairs, as they - /// will be laid out in the object file. - compiled_funcs: Vec<(String, Box)>, + /// The flattened list of (symbol name, FuncKey, compiled + /// function) pairs, as they will be laid out in the object file. + compiled_funcs: Vec<(String, FuncKey, Box)>, /// The `FunctionIndices` mapping our function keys to indices in that flat /// list. indices: FunctionIndices, @@ -937,7 +937,7 @@ impl FunctionIndices { self, mut obj: object::write::Object<'static>, engine: &'a Engine, - compiled_funcs: Vec<(String, Box)>, + compiled_funcs: Vec<(String, FuncKey, Box)>, translations: PrimaryMap>, dwarf_package_bytes: Option<&[u8]>, ) -> Result<(wasmtime_environ::ObjectBuilder<'a>, Artifacts)> { @@ -966,7 +966,7 @@ impl FunctionIndices { &|module, func| { let i = self.indices[&FuncKey::DefinedWasmFunction(module, func)]; let (symbol, _) = symbol_ids_and_locs[i]; - let (_, compiled_func) = &compiled_funcs[i]; + let (_, _, compiled_func) = &compiled_funcs[i]; (symbol, &**compiled_func) }, dwarf_package_bytes, diff --git a/crates/winch/src/compiler.rs b/crates/winch/src/compiler.rs index 66279e0f2bd7..13a5e80d988f 100644 --- a/crates/winch/src/compiler.rs +++ b/crates/winch/src/compiler.rs @@ -189,7 +189,7 @@ impl wasmtime_environ::Compiler for Compiler { fn append_code( &self, obj: &mut Object<'static>, - funcs: &[(String, Box)], + funcs: &[(String, FuncKey, Box)], resolve_reloc: &dyn Fn(usize, wasmtime_environ::FuncKey) -> usize, ) -> Result> { self.trampolines.append_code(obj, funcs, resolve_reloc) @@ -334,7 +334,7 @@ impl wasmtime_environ::Compiler for NoInlineCompiler { fn append_code( &self, obj: &mut Object<'static>, - funcs: &[(String, Box)], + funcs: &[(String, FuncKey, Box)], resolve_reloc: &dyn Fn(usize, FuncKey) -> usize, ) -> Result> { self.0.append_code(obj, funcs, resolve_reloc) From 3409838818c988a0de07736e06d1b3f5e9f4e949 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 2 Oct 2025 15:48:08 -0700 Subject: [PATCH 3/9] Test update. --- crates/cranelift/src/func_environ.rs | 2 +- tests/disas/debug-exceptions.wat | 44 +++++++++++++++------------- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/crates/cranelift/src/func_environ.rs b/crates/cranelift/src/func_environ.rs index 477e2da22076..8231c2a38aea 100644 --- a/crates/cranelift/src/func_environ.rs +++ b/crates/cranelift/src/func_environ.rs @@ -1323,7 +1323,7 @@ impl<'module_environment> FuncEnvironment<'module_environment> { stack: &mut FuncTranslationStacks, srcloc: ir::SourceLoc, ) -> WasmResult<()> { - if stack.reachable() { + if stack.reachable() && self.state_slot.is_some() { let inst = builder.ins().sequence_point(); let tags = self.debug_tags(stack, srcloc); builder.func.debug_tags.set(inst, tags); diff --git a/tests/disas/debug-exceptions.wat b/tests/disas/debug-exceptions.wat index 4a91001ce027..9d153df4bb6b 100644 --- a/tests/disas/debug-exceptions.wat +++ b/tests/disas/debug-exceptions.wat @@ -32,9 +32,11 @@ ;; sub sp, sp, #0x20 ;; stur x2, [sp] ;; stur x2, [sp, #0x10] -;; mov w24, #0x2a -;; stur w24, [sp, #8] +;; mov w27, #0x2a +;; ╰─╼ debug frame state: func key DefinedWasmFunction(StaticModuleIndex(0), DefinedFuncIndex(0)), wasm PC 53, slot at FP-0xb0, locals , stack +;; stur w27, [sp, #8] ;; ldur x2, [sp, #0x10] +;; ╰─╼ debug frame state: func key DefinedWasmFunction(StaticModuleIndex(0), DefinedFuncIndex(0)), wasm PC 63, slot at FP-0xb0, locals , stack I32 @ slot+0x8 ;; bl #0x318 ;; 5c: mov x21, x2 ;; mov w3, #0x4000000 @@ -43,35 +45,37 @@ ;; mov w6, #8 ;; ldur x2, [sp, #0x10] ;; bl #0x2a4 -;; 78: ldur x4, [sp, #0x10] -;; ldr x9, [x4, #8] -;; ldr x13, [x9, #0x18] -;; add x9, x13, #0x20 -;; str w24, [x9, w2, uxtw] -;; add x10, x13, #0x18 -;; mov x12, x21 -;; str w12, [x10, w2, uxtw] -;; mov w11, #0 -;; add x12, x13, #0x1c -;; stur x13, [sp, #0x18] -;; str w11, [x12, w2, uxtw] +;; 78: ldur x8, [sp, #0x10] +;; ldr x13, [x8, #8] +;; ldr x3, [x13, #0x18] +;; add x13, x3, #0x20 +;; str w27, [x13, w2, uxtw] +;; add x14, x3, #0x18 +;; mov x0, x21 +;; str w0, [x14, w2, uxtw] +;; mov w15, #0 +;; add x0, x3, #0x1c +;; stur x3, [sp, #0x18] +;; str w15, [x0, w2, uxtw] ;; mov x3, x2 ;; ldur x2, [sp, #0x10] ;; bl #0x350 ;; ├─╼ exception frame offset: SP = FP - 0xb0 ;; ╰─╼ exception handler: tag=0, context at [SP+0x10], handler=0xb8 ;; b4: .byte 0x1f, 0xc1, 0x00, 0x00 -;; ldur x13, [sp, #0x18] -;; add x13, x13, #0x20 -;; ldr w15, [x13, w0, uxtw] -;; stur w15, [sp, #8] +;; ldur x3, [sp, #0x18] +;; add x1, x3, #0x20 +;; ldr w3, [x1, w0, uxtw] +;; stur w3, [sp, #8] ;; ldur x2, [sp, #0x10] -;; ldr x0, [x2, #0x30] +;; ╰─╼ debug frame state: func key DefinedWasmFunction(StaticModuleIndex(0), DefinedFuncIndex(0)), wasm PC 69, slot at FP-0xb0, locals , stack I32 @ slot+0x8 +;; ldr x5, [x2, #0x30] ;; ldr x2, [x2, #0x40] ;; ldur x3, [sp, #0x10] -;; blr x0 +;; blr x5 ;; ╰─╼ debug frame state: func key DefinedWasmFunction(StaticModuleIndex(0), DefinedFuncIndex(0)), wasm PC 69, slot at FP-0xb0, locals , stack I32 @ slot+0x8 ;; dc: add sp, sp, #0x20 +;; ╰─╼ debug frame state: func key DefinedWasmFunction(StaticModuleIndex(0), DefinedFuncIndex(0)), wasm PC 71, slot at FP-0xb0, locals , stack I32 @ slot+0x8 ;; ldp d8, d9, [sp], #0x10 ;; ldp d10, d11, [sp], #0x10 ;; ldp d12, d13, [sp], #0x10 From 4babca4ac471c67e1889d3ea19690b7db96d2aa2 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 2 Oct 2025 16:17:48 -0700 Subject: [PATCH 4/9] Adjust objdump printing of InstPos on frame progpoints; and adjust progpoint collapsing. --- crates/environ/src/compile/frame_table.rs | 10 +++- src/commands/objdump.rs | 6 +- tests/disas/debug-exceptions.wat | 67 ++++++++++++----------- 3 files changed, 47 insertions(+), 36 deletions(-) diff --git a/crates/environ/src/compile/frame_table.rs b/crates/environ/src/compile/frame_table.rs index 944616a16883..a2f8d30a8575 100644 --- a/crates/environ/src/compile/frame_table.rs +++ b/crates/environ/src/compile/frame_table.rs @@ -260,11 +260,15 @@ impl FrameTableBuilder { frames: &[(u32, FrameTableDescriptorIndex, FrameStackShape)], ) { let pc_and_pos = FrameInstPos::encode(native_pc, pos); - // If we already have a program point record at this PC, don't add another. - if let Some(last) = self.progpoint_pcs.last() + // If we already have a program point record at this PC, + // overwrite it. + while let Some(last) = self.progpoint_pcs.last() && last.get(LittleEndian) == pc_and_pos { - return; + self.progpoint_pcs.pop(); + self.progpoint_descriptor_offsets.pop(); + self.progpoint_descriptor_data + .truncate(self.progpoint_descriptor_data.len() - 3); } let start = u32::try_from(self.progpoint_descriptor_data.len()).unwrap(); diff --git a/src/commands/objdump.rs b/src/commands/objdump.rs index 64d7a29aa827..60e595b395ab 100644 --- a/src/commands/objdump.rs +++ b/src/commands/objdump.rs @@ -696,6 +696,10 @@ impl Decorator<'_> { FrameInstPos::Post => &mut *pre_list, FrameInstPos::Pre => &mut *post_list, }; + let pos = match pos { + FrameInstPos::Post => "after previous inst", + FrameInstPos::Pre => "before next inst", + }; for (wasm_pc, frame_descriptor, stack_shape) in frames { let (frame_descriptor_data, offset) = frame_tables.frame_descriptor(frame_descriptor).unwrap(); @@ -704,7 +708,7 @@ impl Decorator<'_> { let local_shape = Self::describe_local_shape(&frame_descriptor); let stack_shape = Self::describe_stack_shape(&frame_descriptor, stack_shape); let func_key = frame_descriptor.func_key(); - list.push(format!("debug frame state: func key {func_key:?}, wasm PC {wasm_pc}, slot at FP-0x{offset:x}, locals {local_shape}, stack {stack_shape}")); + list.push(format!("debug frame state ({pos}): func key {func_key:?}, wasm PC {wasm_pc}, slot at FP-0x{offset:x}, locals {local_shape}, stack {stack_shape}")); } } } diff --git a/tests/disas/debug-exceptions.wat b/tests/disas/debug-exceptions.wat index 9d153df4bb6b..2a90b5d81553 100644 --- a/tests/disas/debug-exceptions.wat +++ b/tests/disas/debug-exceptions.wat @@ -8,6 +8,7 @@ (func (export "main") (block $b (result i32) (try_table (catch $t $b) + (drop (i32.const 42)) (throw $t (i32.const 42))) i32.const 0) (call 0) @@ -19,7 +20,7 @@ ;; ldur x16, [x16, #0x10] ;; add x16, x16, #0xc0 ;; cmp sp, x16 -;; b.lo #0x10c +;; b.lo #0x110 ;; 1c: stp x27, x28, [sp, #-0x10]! ;; stp x25, x26, [sp, #-0x10]! ;; stp x23, x24, [sp, #-0x10]! @@ -33,49 +34,51 @@ ;; stur x2, [sp] ;; stur x2, [sp, #0x10] ;; mov w27, #0x2a -;; ╰─╼ debug frame state: func key DefinedWasmFunction(StaticModuleIndex(0), DefinedFuncIndex(0)), wasm PC 53, slot at FP-0xb0, locals , stack +;; ╰─╼ debug frame state (before next inst): func key DefinedWasmFunction(StaticModuleIndex(0), DefinedFuncIndex(0)), wasm PC 61, slot at FP-0xb0, locals , stack ;; stur w27, [sp, #8] +;; stur w27, [sp, #8] +;; ╰─╼ debug frame state (before next inst): func key DefinedWasmFunction(StaticModuleIndex(0), DefinedFuncIndex(0)), wasm PC 64, slot at FP-0xb0, locals , stack ;; ldur x2, [sp, #0x10] -;; ╰─╼ debug frame state: func key DefinedWasmFunction(StaticModuleIndex(0), DefinedFuncIndex(0)), wasm PC 63, slot at FP-0xb0, locals , stack I32 @ slot+0x8 -;; bl #0x318 -;; 5c: mov x21, x2 +;; ╰─╼ debug frame state (before next inst): func key DefinedWasmFunction(StaticModuleIndex(0), DefinedFuncIndex(0)), wasm PC 66, slot at FP-0xb0, locals , stack I32 @ slot+0x8 +;; bl #0x31c +;; 60: mov x21, x2 ;; mov w3, #0x4000000 ;; mov w4, #2 ;; mov w5, #0x28 ;; mov w6, #8 ;; ldur x2, [sp, #0x10] -;; bl #0x2a4 -;; 78: ldur x8, [sp, #0x10] -;; ldr x13, [x8, #8] -;; ldr x3, [x13, #0x18] -;; add x13, x3, #0x20 -;; str w27, [x13, w2, uxtw] -;; add x14, x3, #0x18 -;; mov x0, x21 -;; str w0, [x14, w2, uxtw] -;; mov w15, #0 -;; add x0, x3, #0x1c -;; stur x3, [sp, #0x18] -;; str w15, [x0, w2, uxtw] +;; bl #0x2a8 +;; 7c: ldur x11, [sp, #0x10] +;; ldr x0, [x11, #8] +;; ldr x5, [x0, #0x18] +;; add x0, x5, #0x20 +;; str w27, [x0, w2, uxtw] +;; add x3, x5, #0x18 +;; mov x4, x21 +;; str w4, [x3, w2, uxtw] +;; mov w3, #0 +;; add x4, x5, #0x1c +;; stur x5, [sp, #0x18] +;; str w3, [x4, w2, uxtw] ;; mov x3, x2 ;; ldur x2, [sp, #0x10] -;; bl #0x350 +;; bl #0x354 ;; ├─╼ exception frame offset: SP = FP - 0xb0 -;; ╰─╼ exception handler: tag=0, context at [SP+0x10], handler=0xb8 -;; b4: .byte 0x1f, 0xc1, 0x00, 0x00 -;; ldur x3, [sp, #0x18] -;; add x1, x3, #0x20 -;; ldr w3, [x1, w0, uxtw] -;; stur w3, [sp, #8] +;; ╰─╼ exception handler: tag=0, context at [SP+0x10], handler=0xbc +;; b8: .byte 0x1f, 0xc1, 0x00, 0x00 +;; ldur x5, [sp, #0x18] +;; add x4, x5, #0x20 +;; ldr w6, [x4, w0, uxtw] +;; stur w6, [sp, #8] ;; ldur x2, [sp, #0x10] -;; ╰─╼ debug frame state: func key DefinedWasmFunction(StaticModuleIndex(0), DefinedFuncIndex(0)), wasm PC 69, slot at FP-0xb0, locals , stack I32 @ slot+0x8 -;; ldr x5, [x2, #0x30] +;; ╰─╼ debug frame state (before next inst): func key DefinedWasmFunction(StaticModuleIndex(0), DefinedFuncIndex(0)), wasm PC 72, slot at FP-0xb0, locals , stack I32 @ slot+0x8 +;; ldr x8, [x2, #0x30] ;; ldr x2, [x2, #0x40] ;; ldur x3, [sp, #0x10] -;; blr x5 -;; ╰─╼ debug frame state: func key DefinedWasmFunction(StaticModuleIndex(0), DefinedFuncIndex(0)), wasm PC 69, slot at FP-0xb0, locals , stack I32 @ slot+0x8 -;; dc: add sp, sp, #0x20 -;; ╰─╼ debug frame state: func key DefinedWasmFunction(StaticModuleIndex(0), DefinedFuncIndex(0)), wasm PC 71, slot at FP-0xb0, locals , stack I32 @ slot+0x8 +;; blr x8 +;; ╰─╼ debug frame state (after previous inst): func key DefinedWasmFunction(StaticModuleIndex(0), DefinedFuncIndex(0)), wasm PC 72, slot at FP-0xb0, locals , stack I32 @ slot+0x8 +;; e0: add sp, sp, #0x20 +;; ╰─╼ debug frame state (before next inst): func key DefinedWasmFunction(StaticModuleIndex(0), DefinedFuncIndex(0)), wasm PC 75, slot at FP-0xb0, locals , stack ;; ldp d8, d9, [sp], #0x10 ;; ldp d10, d11, [sp], #0x10 ;; ldp d12, d13, [sp], #0x10 @@ -87,4 +90,4 @@ ;; ldp x27, x28, [sp], #0x10 ;; ldp x29, x30, [sp], #0x10 ;; ret -;; 10c: .byte 0x1f, 0xc1, 0x00, 0x00 +;; 110: .byte 0x1f, 0xc1, 0x00, 0x00 From 6ad5976cb3ed4a86ced948f0ee9c4135c53c06d2 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 2 Oct 2025 21:33:23 -0700 Subject: [PATCH 5/9] Convert to iterator form. --- crates/wasmtime/src/runtime/debug.rs | 186 ++++++++++-------- .../wasmtime/src/runtime/vm/traphandlers.rs | 2 + .../src/runtime/vm/traphandlers/backtrace.rs | 58 +++++- tests/all/debug.rs | 53 +++-- 4 files changed, 201 insertions(+), 98 deletions(-) diff --git a/crates/wasmtime/src/runtime/debug.rs b/crates/wasmtime/src/runtime/debug.rs index 98bb6028f2f2..5da14c71a9e2 100644 --- a/crates/wasmtime/src/runtime/debug.rs +++ b/crates/wasmtime/src/runtime/debug.rs @@ -3,10 +3,10 @@ use crate::{ AnyRef, ExnRef, ExternRef, Func, Instance, Module, Val, ValType, store::{AutoAssertNoGc, StoreOpaque}, - vm::{Backtrace, VMContext}, + vm::{CurrentActivationBacktrace, VMContext}, }; use alloc::vec::Vec; -use core::{ffi::c_void, ops::ControlFlow, ptr::NonNull}; +use core::{ffi::c_void, ptr::NonNull}; use wasmtime_environ::{ DefinedFuncIndex, FrameInstPos, FrameStackShape, FrameStateSlot, FrameStateSlotOffset, FrameTableDescriptorIndex, FrameValType, FuncKey, @@ -33,18 +33,11 @@ impl StoreOpaque { return None; } - let mut frames = vec![]; - Backtrace::trace(self, |frame| { - // `is_trapping_frame == false`: for now, we do not yet - // support capturing stack values after a trap, so the PC - // we use to look up metadata is always a "post-position" - // PC, i.e., a call's return address. - frames.extend(VirtualFrame::decode(self, frame, false)); - ControlFlow::Continue(()) - }); + let iter = unsafe { CurrentActivationBacktrace::new(self) }; Some(StackView { - store: self, - frames, + iter, + is_trapping_frame: false, + frames: vec![], }) } } @@ -54,23 +47,53 @@ impl StoreOpaque { /// See the documentation on `Store::stack_value` for more information /// about which frames this view will show. pub struct StackView<'a> { - /// Mutable borrow held to the store. + /// Iterator over frames. /// - /// This both ensures that the stack does not mutate while we're - /// observing it (any borrow would do), and lets us create - /// host-API GC references as values that are references are read - /// off of the stack (a mutable borrow is needed for this). - store: &'a mut StoreOpaque, + /// This iterator owns the store while the view exists (accessible + /// as `iter.store`). + iter: CurrentActivationBacktrace<'a>, - /// Pre-enumerated frames. We precompute this rather than walking - /// a true iterator finger up the stack (e.g., current FP and - /// current `CallThreadState`) because our existing unwinder logic - /// is written in a visit-with-closure style; and users of this - /// API are likely to visit every frame anyway, so - /// sparseness/efficiency is not a main concern here. + /// Is the next frame to be visited by the iterator a trapping + /// frame? + /// + /// This alters how we interpret `pc`: for a trap, we look at the + /// instruction that *starts* at `pc`, while for all frames + /// further up the stack (i.e., at a callsite), we look at teh + /// instruction that *ends* at `pc`. + is_trapping_frame: bool, + + /// Virtual frame queue: decoded from `iter`, not yet + /// yielded. Innermost frame on top (last). + /// + /// This is only non-empty when there is more than one virtual + /// frame in a physical frame (i.e., for inlining); thus, its size + /// is bounded by our inlining depth. frames: Vec, } +impl<'a> Iterator for StackView<'a> { + type Item = FrameView; + fn next(&mut self) -> Option { + // If there are no virtual frames to yield, take and decode + // the next physical frame. + // + // Note that `if` rather than `while` here, and the assert + // that we get some virtual frames back, enforce the invariant + // that each physical frame decodes to at least one virtual + // frame (i.e., there are no physical frames for interstitial + // functions or other things that we completely ignore). If + // this ever changes, we can remove the assert and convert + // this to a loop that polls until it finds virtual frames. + if self.frames.is_empty() { + let next_frame = self.iter.next()?; + self.frames = VirtualFrame::decode(self.iter.store, next_frame, self.is_trapping_frame); + self.is_trapping_frame = false; + } + + self.frames.pop().map(move |vf| FrameView::new(vf)) + } +} + /// Internal data pre-computed for one stack frame. /// /// This combines physical frame info (pc, fp) with the module this PC @@ -93,32 +116,9 @@ struct VirtualFrame { stack_shape: FrameStackShape, } -/// A view of a frame that can decode values in that frame. -pub struct FrameView<'a> { - frame_state_slot: FrameStateSlot<'a>, - store: &'a mut StoreOpaque, - slot_addr: usize, - wasm_pc: u32, - stack: Vec<(FrameStateSlotOffset, FrameValType)>, -} - -impl<'a> StackView<'a> { - /// Get a handle to a specific frame. - /// - /// # Panics - /// - /// Panics if the index is out of range. - pub fn frame(&mut self, index: usize) -> FrameView<'_> { - FrameView::new(self.store, &self.frames[index]) - } - - /// Get the number of frames viewable on this stack. - pub fn len(&self) -> usize { - self.frames.len() - } -} - impl VirtualFrame { + /// Return virtual frames corresponding to a physical frame, from + /// outermost to innermost. fn decode(store: &StoreOpaque, frame: Frame, is_trapping_frame: bool) -> Vec { let module = store .modules() @@ -137,7 +137,7 @@ impl VirtualFrame { return vec![]; }; - let mut frames: Vec<_> = program_points + program_points .map(|(wasm_pc, frame_descriptor, stack_shape)| VirtualFrame { fp: frame.fp(), module: module.clone(), @@ -145,17 +145,35 @@ impl VirtualFrame { frame_descriptor, stack_shape, }) - .collect(); - - // Reverse the frames so we return them inside-out, matching - // the bottom-up stack traversal order. - frames.reverse(); - frames + .collect() } } -impl<'a> FrameView<'a> { - fn new(store: &'a mut StoreOpaque, frame: &'a VirtualFrame) -> Self { +/// A view of a frame that can decode values in that frame. +pub struct FrameView { + slot_addr: usize, + func_key: FuncKey, + wasm_pc: u32, + /// Shape of locals in this frame. + /// + /// We need to store this locally because `FrameView` cannot + /// borrow the store: it needs a mut borrow, and an iterator + /// cannot yield the same mut borrow multiple times because it + /// cannot control the lifetime of the values it yields (the + /// signature of `next()` does not bound the return value to the + /// `&mut self` arg). + locals: Vec<(FrameStateSlotOffset, FrameValType)>, + /// Shape of the stack slots at this program point in this frame. + /// + /// In addition to the borrowing-related reason above, we also + /// materialize this because we want to provide O(1) access to the + /// stack by depth, and the frame slot descriptor stores info in a + /// linked-list (actually DAG, with dedup'ing) way. + stack: Vec<(FrameStateSlotOffset, FrameValType)>, +} + +impl FrameView { + fn new(frame: VirtualFrame) -> Self { let frame_table = frame.module.frame_table(); // Parse the frame descriptor. let (data, slot_to_fp_offset) = frame_table @@ -165,42 +183,51 @@ impl<'a> FrameView<'a> { let slot_addr = frame .fp .wrapping_sub(usize::try_from(slot_to_fp_offset).unwrap()); - // Materialize the stack shape so we have O(1) access to its elements. + + // Materialize the stack shape so we have O(1) access to its + // elements, and so we don't need to keep the borrow to the + // module alive. let mut stack = frame_state_slot .stack(frame.stack_shape) .collect::>(); stack.reverse(); // Put top-of-stack last. + + // Materialize the local offsets/types so we don't need to + // keep the borrow to the module alive. + let locals = frame_state_slot.locals().collect::>(); + FrameView { - store, - frame_state_slot, slot_addr, + func_key: frame_state_slot.func_key(), wasm_pc: frame.wasm_pc, stack, + locals, } } - fn raw_instance(&mut self) -> &'a crate::vm::Instance { + fn raw_instance<'a>(&self, _store: &'a mut StoreOpaque) -> &'a crate::vm::Instance { // Read out the vmctx slot. // SAFETY: vmctx is always at offset 0 in the slot. let vmctx: *mut VMContext = unsafe { *(self.slot_addr as *mut _) }; let vmctx = NonNull::new(vmctx).expect("null vmctx in debug state slot"); // SAFETY: the stored vmctx value is a valid instance in this - // store; we only visit frames from this store in teh backtrace. + // store; we only visit frames from this store in the + // backtrace. let instance = unsafe { crate::vm::Instance::from_vmctx(vmctx) }; // SAFETY: the instance pointer read above is valid. unsafe { instance.as_ref() } } /// Get the instance associated with this frame. - pub fn instance(&mut self) -> Instance { - let instance = self.raw_instance(); - Instance::from_wasmtime(instance.id(), self.store) + pub fn instance(&self, view: &mut StackView<'_>) -> Instance { + let instance = self.raw_instance(view.iter.store); + Instance::from_wasmtime(instance.id(), view.iter.store) } /// Get the module associated with this frame, if any (i.e., not a /// container instance for a host-created entity). - pub fn module(&mut self) -> Option<&Module> { - let instance = self.raw_instance(); + pub fn module<'a>(&self, view: &'a mut StackView<'_>) -> Option<&'a Module> { + let instance = self.raw_instance(view.iter.store); instance.runtime_module() } @@ -208,13 +235,16 @@ impl<'a> FrameView<'a> { /// PC as an offset within its code section, if it is a Wasm /// function directly from the given `Module` (rather than a /// trampoline). - pub fn wasm_function_index_and_pc(&mut self) -> Option<(DefinedFuncIndex, u32)> { - let FuncKey::DefinedWasmFunction(module, func) = self.frame_state_slot.func_key() else { + pub fn wasm_function_index_and_pc( + &self, + view: &mut StackView<'_>, + ) -> Option<(DefinedFuncIndex, u32)> { + let FuncKey::DefinedWasmFunction(module, func) = self.func_key else { return None; }; debug_assert_eq!( module, - self.module() + self.module(view) .expect("module should be defined if this is a defined function") .env_module() .module_index @@ -224,7 +254,7 @@ impl<'a> FrameView<'a> { /// Get the number of locals in this frame. pub fn num_locals(&self) -> usize { - self.frame_state_slot.num_locals() + self.locals.len() } /// Get the depth of the operand stack in this frame. @@ -238,11 +268,11 @@ impl<'a> FrameView<'a> { /// /// Panics if the index is out-of-range (greater than /// `num_locals()`). - pub fn local(&mut self, index: usize) -> (ValType, Val) { - let (offset, ty) = self.frame_state_slot.local(index).unwrap(); + pub fn local(&self, view: &mut StackView<'_>, index: usize) -> (ValType, Val) { + let (offset, ty) = self.locals[index]; // SAFETY: compiler produced metadata to describe this local // slot and stored a value of the correct type into it. - unsafe { read_value(self.store, self.slot_addr, offset, ty) } + unsafe { read_value(view.iter.store, self.slot_addr, offset, ty) } } /// Get the type and value of the given operand-stack value in @@ -252,12 +282,12 @@ impl<'a> FrameView<'a> { /// from there are more recently pushed values. In other words, /// index order reads the Wasm virtual machine's abstract stack /// state left-to-right. - pub fn stack(&mut self, index: usize) -> (ValType, Val) { + pub fn stack(&self, view: &mut StackView<'_>, index: usize) -> (ValType, Val) { let (offset, ty) = self.stack[index]; // SAFETY: compiler produced metadata to describe this // operand-stack slot and stored a value of the correct type // into it. - unsafe { read_value(self.store, self.slot_addr, offset, ty) } + unsafe { read_value(view.iter.store, self.slot_addr, offset, ty) } } } diff --git a/crates/wasmtime/src/runtime/vm/traphandlers.rs b/crates/wasmtime/src/runtime/vm/traphandlers.rs index 1f6486bbc64d..939c2784d6f6 100644 --- a/crates/wasmtime/src/runtime/vm/traphandlers.rs +++ b/crates/wasmtime/src/runtime/vm/traphandlers.rs @@ -29,6 +29,8 @@ use core::ptr::{self, NonNull}; use wasmtime_unwinder::Handler; pub use self::backtrace::Backtrace; +#[cfg(feature = "debug")] +pub(crate) use self::backtrace::CurrentActivationBacktrace; #[cfg(feature = "gc")] pub use wasmtime_unwinder::Frame; diff --git a/crates/wasmtime/src/runtime/vm/traphandlers/backtrace.rs b/crates/wasmtime/src/runtime/vm/traphandlers/backtrace.rs index c8f3b314fdc8..79873c4fcaf7 100644 --- a/crates/wasmtime/src/runtime/vm/traphandlers/backtrace.rs +++ b/crates/wasmtime/src/runtime/vm/traphandlers/backtrace.rs @@ -77,7 +77,7 @@ impl Backtrace { } /// Walk the current Wasm stack, calling `f` for each frame we walk. - #[cfg(any(feature = "gc", feature = "debug"))] + #[cfg(feature = "gc")] pub fn trace(store: &StoreOpaque, f: impl FnMut(Frame) -> ControlFlow<()>) { let vm_store_context = store.vm_store_context(); let unwind = store.unwinder(); @@ -325,3 +325,59 @@ impl Backtrace { self.0.iter() } } + +/// An iterator over one Wasm activation. +#[cfg(feature = "debug")] +pub(crate) struct CurrentActivationBacktrace<'a> { + pub(crate) store: &'a mut StoreOpaque, + inner: Box>, +} + +#[cfg(feature = "debug")] +impl<'a> CurrentActivationBacktrace<'a> { + /// Return an iterator over the most recent Wasm activation. + /// + /// The iterator captures the store with a mutable borrow, and + /// then yields it back at each frame. This ensures that the stack + /// remains live while still providing a mutable store that may be + /// needed to access items in the frame (e.g., to create new roots + /// when reading out GC refs). + /// + /// This serves as an alternative to `Backtrace::trace()` and + /// friends: it allows external iteration (and e.g. lazily walking + /// through frames in a stack) rather than visiting via a closure. + /// + /// # Safety + /// + /// Although the iterator yields a mutable store back at each + /// iteration, this *must not* be used to mutate the stack + /// activation itself that this iterator is visiting. While the + /// `store` technically owns the stack in question, the only way + /// to do this with the current API would be to return back into + /// the Wasm activation. As long as this iterator is held and used + /// while within host code called from that activation (which will + /// ordinarily be ensured if the `store`'s lifetime came from the + /// host entry point) then everything will be sound. + pub(crate) unsafe fn new(store: &'a mut StoreOpaque) -> CurrentActivationBacktrace<'a> { + // Get the initial exit FP, exit PC, and entry FP. + let vm_store_context = store.vm_store_context(); + let exit_pc = unsafe { *(*vm_store_context).last_wasm_exit_pc.get() }; + let exit_fp = unsafe { (*vm_store_context).last_wasm_exit_fp() }; + let trampoline_fp = unsafe { *(*vm_store_context).last_wasm_entry_fp.get() }; + let unwind = store.unwinder(); + // Establish the iterator. + let inner = Box::new(unsafe { + wasmtime_unwinder::frame_iterator(unwind, exit_pc, exit_fp, trampoline_fp) + }); + + CurrentActivationBacktrace { store, inner } + } +} + +#[cfg(feature = "debug")] +impl<'a> Iterator for CurrentActivationBacktrace<'a> { + type Item = Frame; + fn next(&mut self) -> Option { + self.inner.next() + } +} diff --git a/tests/all/debug.rs b/tests/all/debug.rs index 274a9b6d1d1e..c8a8e866683d 100644 --- a/tests/all/debug.rs +++ b/tests/all/debug.rs @@ -54,25 +54,40 @@ fn stack_values_two_frames() -> anyhow::Result<()> { }, |mut caller: Caller<'_, ()>| { let mut stack = caller.stack_values().unwrap(); - assert_eq!(stack.len(), 2); - let mut frame = stack.frame(0); - assert_eq!(frame.wasm_function_index_and_pc().unwrap().0.as_u32(), 1); - assert_eq!(frame.wasm_function_index_and_pc().unwrap().1, 65); + let frame = stack.next().unwrap(); + assert_eq!( + frame + .wasm_function_index_and_pc(&mut stack) + .unwrap() + .0 + .as_u32(), + 1 + ); + assert_eq!(frame.wasm_function_index_and_pc(&mut stack).unwrap().1, 65); assert_eq!(frame.num_locals(), 2); assert_eq!(frame.num_stacks(), 2); - assert!(matches!(frame.local(0).0, ValType::I32)); - assert!(matches!(frame.local(1).0, ValType::I32)); - assert_eq!(frame.local(0).1.unwrap_i32(), 1); - assert_eq!(frame.local(1).1.unwrap_i32(), 2); - assert!(matches!(frame.stack(0).0, ValType::I32)); - assert!(matches!(frame.stack(1).0, ValType::I32)); - assert_eq!(frame.stack(0).1.unwrap_i32(), 1); - assert_eq!(frame.stack(1).1.unwrap_i32(), 2); + assert!(matches!(frame.local(&mut stack, 0).0, ValType::I32)); + assert!(matches!(frame.local(&mut stack, 1).0, ValType::I32)); + assert_eq!(frame.local(&mut stack, 0).1.unwrap_i32(), 1); + assert_eq!(frame.local(&mut stack, 1).1.unwrap_i32(), 2); + assert!(matches!(frame.stack(&mut stack, 0).0, ValType::I32)); + assert!(matches!(frame.stack(&mut stack, 1).0, ValType::I32)); + assert_eq!(frame.stack(&mut stack, 0).1.unwrap_i32(), 1); + assert_eq!(frame.stack(&mut stack, 1).1.unwrap_i32(), 2); - let mut frame = stack.frame(1); - assert_eq!(frame.wasm_function_index_and_pc().unwrap().0.as_u32(), 0); - assert_eq!(frame.wasm_function_index_and_pc().unwrap().1, 55); + let frame = stack.next().unwrap(); + assert_eq!( + frame + .wasm_function_index_and_pc(&mut stack) + .unwrap() + .0 + .as_u32(), + 0 + ); + assert_eq!(frame.wasm_function_index_and_pc(&mut stack).unwrap().1, 55); + + assert!(stack.next().is_none()); }, )?; } @@ -97,11 +112,11 @@ fn stack_values_exceptions() -> anyhow::Result<()> { |_config| {}, |mut caller: Caller<'_, ()>| { let mut stack = caller.stack_values().unwrap(); - assert_eq!(stack.len(), 1); - let mut frame = stack.frame(0); + let frame = stack.next().unwrap(); assert_eq!(frame.num_stacks(), 1); - assert!(matches!(frame.stack(0).0, ValType::I32)); - assert_eq!(frame.stack(0).1.unwrap_i32(), 42); + assert!(matches!(frame.stack(&mut stack, 0).0, ValType::I32)); + assert_eq!(frame.stack(&mut stack, 0).1.unwrap_i32(), 42); + assert!(stack.next().is_none()); }, ) } From 0dc211a9a218c398814011610a56a021bd3abdda Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 2 Oct 2025 21:36:55 -0700 Subject: [PATCH 6/9] Fix path in native-debug tests (debug -> native_debug rename). --- tests/all/native_debug/lldb.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/all/native_debug/lldb.rs b/tests/all/native_debug/lldb.rs index ebce6094a4b9..9968a0c113a3 100644 --- a/tests/all/native_debug/lldb.rs +++ b/tests/all/native_debug/lldb.rs @@ -508,7 +508,7 @@ fn dwarf_simple() -> Result<()> { fn dwarf_imported_memory() -> Result<()> { test_dwarf_simple( DWARF_IMPORTED_MEMORY, - &["--preload=env=./tests/all/debug/satisfy_memory_import.wat"], + &["--preload=env=./tests/all/native_debug/satisfy_memory_import.wat"], ) } From f785341cb4d66f264c892d457945c0a54cea81d2 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 3 Oct 2025 08:58:20 -0700 Subject: [PATCH 7/9] Enforce that `debug_instrumentation` can only be enabled when feature is enabled. --- crates/wasmtime/src/config.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/wasmtime/src/config.rs b/crates/wasmtime/src/config.rs index 0c370a48e25e..cc30a80299f1 100644 --- a/crates/wasmtime/src/config.rs +++ b/crates/wasmtime/src/config.rs @@ -2339,6 +2339,10 @@ impl Config { None }; + if !cfg!(feature = "debug") && tunables.debug_instrumentation { + bail!("debug instrumentation support was disabled at compile time"); + } + Ok((tunables, features)) } From 41f44cff2085af119a131f755e0aff8098ce86f5 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 3 Oct 2025 09:01:30 -0700 Subject: [PATCH 8/9] Add missing assert. --- crates/wasmtime/src/runtime/debug.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/wasmtime/src/runtime/debug.rs b/crates/wasmtime/src/runtime/debug.rs index 5da14c71a9e2..54f8bcd525c7 100644 --- a/crates/wasmtime/src/runtime/debug.rs +++ b/crates/wasmtime/src/runtime/debug.rs @@ -87,6 +87,7 @@ impl<'a> Iterator for StackView<'a> { if self.frames.is_empty() { let next_frame = self.iter.next()?; self.frames = VirtualFrame::decode(self.iter.store, next_frame, self.is_trapping_frame); + debug_assert!(!self.frames.is_empty()); self.is_trapping_frame = false; } From e21aa911a29c7de07d413ca818b436a3f027d4ae Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 6 Oct 2025 14:38:29 -0700 Subject: [PATCH 9/9] Use builtin knob for forcing intra-module inlining instead. --- crates/wasmtime/src/config.rs | 13 +------------ tests/all/debug.rs | 6 +++++- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/crates/wasmtime/src/config.rs b/crates/wasmtime/src/config.rs index cc30a80299f1..96d422aa6222 100644 --- a/crates/wasmtime/src/config.rs +++ b/crates/wasmtime/src/config.rs @@ -6,7 +6,7 @@ use core::str::FromStr; #[cfg(any(feature = "cache", feature = "cranelift", feature = "winch"))] use std::path::Path; use wasmparser::WasmFeatures; -use wasmtime_environ::{ConfigTunables, IntraModuleInlining, TripleExt, Tunables}; +use wasmtime_environ::{ConfigTunables, TripleExt, Tunables}; #[cfg(feature = "runtime")] use crate::memory::MemoryCreator; @@ -2084,17 +2084,6 @@ impl Config { self } - /// Whether to force all possible inlining. - pub fn compiler_force_inlining(&mut self, inlining: bool) -> &mut Self { - let inlining = if inlining { - IntraModuleInlining::Yes - } else { - IntraModuleInlining::No - }; - self.tunables.inlining_intra_module = Some(inlining); - self - } - /// Returns the set of features that the currently selected compiler backend /// does not support at all and may panic on. /// diff --git a/tests/all/debug.rs b/tests/all/debug.rs index c8a8e866683d..0e1065716427 100644 --- a/tests/all/debug.rs +++ b/tests/all/debug.rs @@ -50,7 +50,11 @@ fn stack_values_two_frames() -> anyhow::Result<()> { "#, |config| { config.compiler_inlining(inlining); - config.compiler_force_inlining(inlining); + if inlining { + unsafe { + config.cranelift_flag_set("wasmtime_inlining_intra_module", "true"); + } + } }, |mut caller: Caller<'_, ()>| { let mut stack = caller.stack_values().unwrap();