diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index af3c9e924052..b52b91e7e3a5 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -960,7 +960,7 @@ jobs: sudo mkdir -p /usr/lib/local/lib/python3.10/dist-packages/lldb sudo ln -s /usr/lib/llvm-15/lib/python3.10/dist-packages/lldb/* /usr/lib/python3/dist-packages/lldb/ # Only testing release since it is more likely to expose issues with our low-level symbol handling. - cargo test --release --test all -- --ignored --test-threads 1 debug:: + cargo test --release --test all -- --ignored --test-threads 1 native_debug:: env: LLDB: lldb-18 WASI_SDK_PATH: /tmp/wasi-sdk diff --git a/Cargo.toml b/Cargo.toml index 246f78c2e8c2..810e4dc230df 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -469,6 +469,7 @@ default = [ "stack-switching", "winch", "pulley", + "debug", # Enable some nice features of clap by default, but they come at a binary size # cost, so allow disabling this through disabling of our own `default` @@ -531,6 +532,7 @@ gc-drc = ["gc", "wasmtime/gc-drc", "wasmtime-cli-flags/gc-drc"] gc-null = ["gc", "wasmtime/gc-null", "wasmtime-cli-flags/gc-null"] pulley = ["wasmtime-cli-flags/pulley"] stack-switching = ["wasmtime/stack-switching", "wasmtime-cli-flags/stack-switching"] +debug = ["wasmtime-cli-flags/debug", "wasmtime/debug"] # CLI subcommands for the `wasmtime` executable. See `wasmtime $cmd --help` # for more information on each subcommand. diff --git a/crates/c-api/src/config.rs b/crates/c-api/src/config.rs index 3113ab1627b9..ff0b09de5706 100644 --- a/crates/c-api/src/config.rs +++ b/crates/c-api/src/config.rs @@ -55,7 +55,7 @@ pub extern "C" fn wasm_config_new() -> Box { #[unsafe(no_mangle)] pub extern "C" fn wasmtime_config_debug_info_set(c: &mut wasm_config_t, enable: bool) { - c.config.debug_info(enable); + c.config.native_debug_info(enable); } #[unsafe(no_mangle)] diff --git a/crates/cli-flags/Cargo.toml b/crates/cli-flags/Cargo.toml index 1469c9535b15..b5d7420a9e42 100644 --- a/crates/cli-flags/Cargo.toml +++ b/crates/cli-flags/Cargo.toml @@ -40,3 +40,4 @@ threads = ["wasmtime/threads"] memory-protection-keys = ["wasmtime/memory-protection-keys"] pulley = ["wasmtime/pulley"] stack-switching = ["wasmtime/stack-switching"] +debug = ["wasmtime/debug"] \ No newline at end of file diff --git a/crates/cli-flags/src/lib.rs b/crates/cli-flags/src/lib.rs index f0cea981c550..a191c90f27ca 100644 --- a/crates/cli-flags/src/lib.rs +++ b/crates/cli-flags/src/lib.rs @@ -263,7 +263,9 @@ wasmtime_option_group! { #[serde(rename_all = "kebab-case", deny_unknown_fields)] pub struct DebugOptions { /// Enable generation of DWARF debug information in compiled code. - pub debug_info: Option, + pub native_debug_info: Option, + /// Enable debug instrumentation for perfect value reconstruction. + pub debug_instrumentation: Option, /// Configure whether compiled code can map native addresses to wasm. pub address_map: Option, /// Configure whether logging is enabled. @@ -701,8 +703,13 @@ impl CommonOptions { enable => config.cranelift_debug_verifier(enable), true => err, } - if let Some(enable) = self.debug.debug_info { - config.debug_info(enable); + if let Some(enable) = self.debug.native_debug_info { + config.native_debug_info(enable); + } + match_feature! { + ["debug" : self.debug.debug_instrumentation] + enable => config.debug_instrumentation(enable), + _ => err, } if self.debug.coredump.is_some() { #[cfg(feature = "coredump")] diff --git a/crates/cranelift/src/compiled_function.rs b/crates/cranelift/src/compiled_function.rs index ca1faab58eb0..ac1da8676532 100644 --- a/crates/cranelift/src/compiled_function.rs +++ b/crates/cranelift/src/compiled_function.rs @@ -1,9 +1,11 @@ use crate::{Relocation, mach_reloc_to_reloc, mach_trap_to_trap}; use cranelift_codegen::{ - Final, MachBufferFinalized, MachSrcLoc, ValueLabelsRanges, ir, isa::unwind::CfaUnwindInfo, - isa::unwind::UnwindInfo, + Final, MachBufferFinalized, MachBufferFrameLayout, MachSrcLoc, ValueLabelsRanges, ir, + isa::unwind::CfaUnwindInfo, isa::unwind::UnwindInfo, +}; +use wasmtime_environ::{ + FilePos, FrameStateSlotBuilder, InstructionAddressMap, PrimaryMap, TrapInformation, }; -use wasmtime_environ::{FilePos, InstructionAddressMap, PrimaryMap, TrapInformation}; #[derive(Debug, Clone, PartialEq, Eq, Default)] /// Metadata to translate from binary offsets back to the original @@ -44,8 +46,6 @@ pub struct CompiledFunctionMetadata { pub cfa_unwind_info: Option, /// Mapping of value labels and their locations. pub value_labels_ranges: ValueLabelsRanges, - /// Allocated stack slots. - pub sized_stack_slots: ir::StackSlots, /// Start source location. pub start_srcloc: FilePos, /// End source location. @@ -63,6 +63,8 @@ pub struct CompiledFunction { /// The metadata for the compiled function, including unwind information /// the function address map. metadata: CompiledFunctionMetadata, + /// Debug metadata for the top-level function's state slot. + pub debug_slot_descriptor: Option, } impl CompiledFunction { @@ -79,6 +81,7 @@ impl CompiledFunction { name_map, alignment, metadata: Default::default(), + debug_slot_descriptor: None, } } @@ -155,9 +158,11 @@ impl CompiledFunction { self.metadata.cfa_unwind_info = Some(unwind); } - /// Set the sized stack slots. - pub fn set_sized_stack_slots(&mut self, slots: ir::StackSlots) { - self.metadata.sized_stack_slots = slots; + /// Returns the frame-layout metadata for this function. + pub fn frame_layout(&self) -> &MachBufferFrameLayout { + self.buffer + .frame_layout() + .expect("Single-function MachBuffer must have frame layout information") } } diff --git a/crates/cranelift/src/compiler.rs b/crates/cranelift/src/compiler.rs index 15dc427f83c7..112069d78d4b 100644 --- a/crates/cranelift/src/compiler.rs +++ b/crates/cranelift/src/compiler.rs @@ -14,7 +14,10 @@ use cranelift_codegen::isa::{ unwind::{UnwindInfo, UnwindInfoKind}, }; use cranelift_codegen::print_errors::pretty_error; -use cranelift_codegen::{CompiledCode, Context, FinalizedMachCallSite}; +use cranelift_codegen::{ + CompiledCode, Context, FinalizedMachCallSite, MachBufferDebugTagList, MachBufferFrameLayout, + MachDebugTagPos, +}; use cranelift_entity::PrimaryMap; use cranelift_frontend::FunctionBuilder; use object::write::{Object, StandardSegment, SymbolId}; @@ -28,13 +31,13 @@ use std::ops::Range; use std::path; use std::sync::{Arc, Mutex}; use wasmparser::{FuncValidatorAllocations, FunctionBody}; -use wasmtime_environ::obj::ELF_WASMTIME_EXCEPTIONS; +use wasmtime_environ::obj::{ELF_WASMTIME_EXCEPTIONS, ELF_WASMTIME_FRAMES}; use wasmtime_environ::{ Abi, AddressMapSection, BuiltinFunctionIndex, CacheStore, CompileError, CompiledFunctionBody, - DefinedFuncIndex, FlagValue, FuncKey, FunctionBodyData, FunctionLoc, HostCall, - InliningCompiler, ModuleTranslation, ModuleTypesBuilder, PtrSize, StackMapSection, - StaticModuleIndex, TrapEncodingBuilder, TrapSentinel, TripleExt, Tunables, VMOffsets, - WasmFuncType, WasmValType, + DefinedFuncIndex, FlagValue, FrameInstPos, FrameStackShape, FrameStateSlotBuilder, + FrameTableBuilder, FuncKey, FunctionBodyData, FunctionLoc, HostCall, InliningCompiler, + ModuleTranslation, ModuleTypesBuilder, PtrSize, StackMapSection, StaticModuleIndex, + TrapEncodingBuilder, TrapSentinel, TripleExt, Tunables, VMOffsets, WasmFuncType, WasmValType, }; use wasmtime_unwinder::ExceptionTableBuilder; @@ -53,6 +56,7 @@ struct CompilerContext { codegen_context: Context, incremental_cache_ctx: Option, validator_allocations: FuncValidatorAllocations, + debug_slot_descriptor: Option, abi: Option, } @@ -63,6 +67,7 @@ impl Default for CompilerContext { codegen_context: Context::new(), incremental_cache_ctx: None, validator_allocations: Default::default(), + debug_slot_descriptor: None, abi: None, } } @@ -252,7 +257,7 @@ impl wasmtime_environ::Compiler for Compiler { context.func.collect_debug_info(); } - let mut func_env = FuncEnvironment::new(self, translation, types, wasm_func_ty); + let mut func_env = FuncEnvironment::new(self, translation, types, wasm_func_ty, key); // The `stack_limit` global value below is the implementation of stack // overflow checks in Wasmtime. @@ -326,13 +331,19 @@ impl wasmtime_environ::Compiler for Compiler { .map_err(|e| CompileError::Codegen(e.to_string()))?; } + let needs_gc_heap = func_env.needs_gc_heap(); + + if let Some((_, slot_builder)) = func_env.state_slot { + compiler.cx.debug_slot_descriptor = Some(slot_builder); + } + let timing = cranelift_codegen::timing::take_current(); log::debug!("`{symbol}` translated to CLIF in {:?}", timing.total()); log::trace!("`{symbol}` timing info\n{timing}"); Ok(CompiledFunctionBody { code: box_dyn_any_compiler_context(Some(compiler.cx)), - needs_gc_heap: func_env.needs_gc_heap(), + needs_gc_heap, }) } @@ -558,12 +569,12 @@ impl wasmtime_environ::Compiler for Compiler { fn append_code( &self, obj: &mut Object<'static>, - funcs: &[(String, Box)], + funcs: &[(String, FuncKey, Box)], resolve_reloc: &dyn Fn(usize, FuncKey) -> usize, ) -> Result> { log::trace!( "appending functions to object file: {:#?}", - funcs.iter().map(|(sym, _)| sym).collect::>() + funcs.iter().map(|(sym, _, _)| sym).collect::>() ); let mut builder = @@ -575,9 +586,26 @@ impl wasmtime_environ::Compiler for Compiler { let mut traps = TrapEncodingBuilder::default(); let mut stack_maps = StackMapSection::default(); let mut exception_tables = ExceptionTableBuilder::default(); + let mut frame_tables = FrameTableBuilder::default(); + + let mut frame_descriptors = HashMap::new(); + if self.tunables.debug_instrumentation { + for (_, key, func) in funcs { + debug_assert!(!func.is::>()); + debug_assert!(func.is::()); + let func = func.downcast_ref::().unwrap(); + frame_descriptors.insert( + *key, + func.debug_slot_descriptor + .as_ref() + .map(|builder| builder.serialize()) + .unwrap_or_else(|| vec![]), + ); + } + } let mut ret = Vec::with_capacity(funcs.len()); - for (i, (sym, func)) in funcs.iter().enumerate() { + for (i, (sym, _key, func)) in funcs.iter().enumerate() { debug_assert!(!func.is::>()); debug_assert!(func.is::()); let func = func.downcast_ref::().unwrap(); @@ -602,6 +630,17 @@ impl wasmtime_environ::Compiler for Compiler { range.clone(), func.buffer.call_sites(), )?; + if self.tunables.debug_instrumentation + && let Some(frame_layout) = func.buffer.frame_layout() + { + clif_to_env_frame_tables( + &mut frame_tables, + range.clone(), + func.buffer.debug_tags(), + frame_layout, + &frame_descriptors, + )?; + } builder.append_padding(self.linkopts.padding_between_functions); let info = FunctionLoc { @@ -628,6 +667,17 @@ impl wasmtime_environ::Compiler for Compiler { obj.append_section_data(exception_section, bytes, 1); }); + if self.tunables.debug_instrumentation { + let frame_table_section = obj.add_section( + obj.segment_name(StandardSegment::Data).to_vec(), + ELF_WASMTIME_FRAMES.as_bytes().to_vec(), + SectionKind::ReadOnlyData, + ); + frame_tables.serialize(|bytes| { + obj.append_section_data(frame_table_section, bytes, 1); + }); + } + Ok(ret) } @@ -1380,6 +1430,10 @@ impl FunctionCompiler<'_> { } } + if let Some(builder) = self.cx.debug_slot_descriptor.take() { + compiled_function.debug_slot_descriptor = Some(builder); + } + if body_and_tunables .map(|(_, t)| t.generate_native_debuginfo) .unwrap_or(false) @@ -1401,8 +1455,6 @@ impl FunctionCompiler<'_> { } } - compiled_function - .set_sized_stack_slots(std::mem::take(&mut context.func.sized_stack_slots)); self.compiler.contexts.lock().unwrap().push(self.cx); Ok(compiled_function) @@ -1447,6 +1499,64 @@ fn clif_to_env_exception_tables<'a>( builder.add_func(CodeOffset::try_from(range.start).unwrap(), call_sites) } +/// Convert from Cranelift's representation of frame state slots and +/// debug tags to Wasmtime's serialized metadata. +fn clif_to_env_frame_tables<'a>( + builder: &mut FrameTableBuilder, + range: Range, + tag_sites: impl Iterator>, + frame_layout: &MachBufferFrameLayout, + frame_descriptors: &HashMap>, +) -> anyhow::Result<()> { + let mut frame_descriptor_indices = HashMap::new(); + for tag_site in tag_sites { + // Split into frames; each has three debug tags. + let mut frames = vec![]; + for frame_tags in tag_site.tags.chunks_exact(3) { + let &[ + ir::DebugTag::StackSlot(slot), + ir::DebugTag::User(wasm_pc), + ir::DebugTag::User(stack_shape), + ] = frame_tags + else { + panic!("Invalid tags"); + }; + + let func_key = frame_layout.stackslots[slot] + .key + .expect("Key must be present on stackslot used as state slot") + .bits(); + let func_key = FuncKey::from_raw_u64(func_key); + let frame_descriptor = *frame_descriptor_indices.entry(slot).or_insert_with(|| { + let slot_to_fp_offset = + frame_layout.frame_to_fp_offset - frame_layout.stackslots[slot].offset; + let descriptor = frame_descriptors + .get(&func_key) + .expect("frame descriptor not present for FuncKey"); + builder.add_frame_descriptor(slot_to_fp_offset, &descriptor) + }); + + frames.push(( + wasm_pc, + frame_descriptor, + FrameStackShape::from_raw(stack_shape), + )); + } + + let native_pc_in_code_section = u32::try_from(range.start) + .unwrap() + .checked_add(tag_site.offset) + .unwrap(); + let pos = match tag_site.pos { + MachDebugTagPos::Post => FrameInstPos::Post, + MachDebugTagPos::Pre => FrameInstPos::Pre, + }; + builder.add_program_point(native_pc_in_code_section, pos, &frames); + } + + Ok(()) +} + fn save_last_wasm_entry_context( builder: &mut FunctionBuilder, pointer_type: ir::Type, diff --git a/crates/cranelift/src/func_environ.rs b/crates/cranelift/src/func_environ.rs index 1ba0a3c9b407..8231c2a38aea 100644 --- a/crates/cranelift/src/func_environ.rs +++ b/crates/cranelift/src/func_environ.rs @@ -21,13 +21,14 @@ use cranelift_frontend::Variable; use cranelift_frontend::{FuncInstBuilder, FunctionBuilder}; use smallvec::{SmallVec, smallvec}; use std::mem; -use wasmparser::{Operator, WasmFeatures}; +use wasmparser::{FuncValidator, Operator, WasmFeatures, WasmModuleResources}; use wasmtime_environ::{ BuiltinFunctionIndex, DataIndex, DefinedFuncIndex, ElemIndex, EngineOrModuleTypeIndex, - FuncIndex, FuncKey, GlobalIndex, IndexType, Memory, MemoryIndex, Module, - ModuleInternedTypeIndex, ModuleTranslation, ModuleTypesBuilder, PtrSize, Table, TableIndex, - TagIndex, TripleExt, Tunables, TypeConvert, TypeIndex, VMOffsets, WasmCompositeInnerType, - WasmFuncType, WasmHeapTopType, WasmHeapType, WasmRefType, WasmResult, WasmValType, + FrameStateSlotBuilder, FrameValType, FuncIndex, FuncKey, GlobalIndex, IndexType, Memory, + MemoryIndex, Module, ModuleInternedTypeIndex, ModuleTranslation, ModuleTypesBuilder, PtrSize, + Table, TableIndex, TagIndex, TripleExt, Tunables, TypeConvert, TypeIndex, VMOffsets, + WasmCompositeInnerType, WasmFuncType, WasmHeapTopType, WasmHeapType, WasmRefType, WasmResult, + WasmValType, }; use wasmtime_environ::{FUNCREF_INIT_BIT, FUNCREF_MASK}; use wasmtime_math::f64_cvt_to_int_bounds; @@ -96,6 +97,7 @@ wasmtime_environ::foreach_builtin_function!(declare_function_signatures); pub struct FuncEnvironment<'module_environment> { compiler: &'module_environment Compiler, isa: &'module_environment (dyn TargetIsa + 'module_environment), + key: FuncKey, pub(crate) module: &'module_environment Module, types: &'module_environment ModuleTypesBuilder, wasm_func_ty: &'module_environment WasmFuncType, @@ -182,6 +184,10 @@ pub struct FuncEnvironment<'module_environment> { /// slot on this function's stack to be used for the /// current continuation's `values` field. stack_switching_values_buffer: Option, + + /// The stack-slot used for exposing Wasm state via debug + /// instrumentation, if any, and the builder containing its metadata. + pub(crate) state_slot: Option<(ir::StackSlot, FrameStateSlotBuilder)>, } impl<'module_environment> FuncEnvironment<'module_environment> { @@ -190,6 +196,7 @@ impl<'module_environment> FuncEnvironment<'module_environment> { translation: &'module_environment ModuleTranslation<'module_environment>, types: &'module_environment ModuleTypesBuilder, wasm_func_ty: &'module_environment WasmFuncType, + key: FuncKey, ) -> Self { let tunables = compiler.tunables(); let builtin_functions = BuiltinFunctions::new(compiler); @@ -199,6 +206,7 @@ impl<'module_environment> FuncEnvironment<'module_environment> { let _ = BuiltinFunctions::raise; Self { + key, isa: compiler.isa(), module: &translation.module, compiler, @@ -238,6 +246,8 @@ impl<'module_environment> FuncEnvironment<'module_environment> { stack_switching_handler_list_buffer: None, stack_switching_values_buffer: None, + + state_slot: None, } } @@ -1179,6 +1189,178 @@ impl<'module_environment> FuncEnvironment<'module_environment> { let ty = self.module.types[type_index].unwrap_module_type_index(); self.types[ty].unwrap_func().params().len() } + + /// Initialize the state slot with an empty layout. + pub(crate) fn create_state_slot(&mut self, builder: &mut FunctionBuilder) { + if self.tunables.debug_instrumentation { + let frame_builder = FrameStateSlotBuilder::new(self.key, self.pointer_type().bytes()); + + // Initially zero-size and with no descriptor; we will fill in + // this info once we're done with the function body. + let slot = builder + .func + .create_sized_stack_slot(ir::StackSlotData::new_with_key( + ir::StackSlotKind::ExplicitSlot, + 0, + 0, + ir::StackSlotKey::new(self.key.into_raw_u64()), + )); + + self.state_slot = Some((slot, frame_builder)); + } + } + + /// Update the state slot layout with a new layout given a local. + pub(crate) fn add_state_slot_local( + &mut self, + builder: &mut FunctionBuilder, + ty: WasmValType, + init: Option, + ) { + if let Some((slot, b)) = &mut self.state_slot { + let offset = b.add_local(FrameValType::from(ty)); + if let Some(init) = init { + builder.ins().stack_store(init, *slot, offset.offset()); + } + } + } + + fn update_state_slot_stack( + &mut self, + validator: &FuncValidator, + builder: &mut FunctionBuilder, + stack: &mut FuncTranslationStacks, + ) -> WasmResult<()> { + // Take ownership of the state-slot builder temporarily rather + // than mutably borrowing so we can invoke a method below. + if let Some((slot, mut b)) = self.state_slot.take() { + // If the stack-shape stack is shorter than the value + // stack, that means that values were popped and then new + // values were pushed; hence, these operand-stack values + // are "dirty" and need to be flushed to the stackslot. + for i in stack.stack_shape.len()..stack.stack.len() { + let parent_shape = i + .checked_sub(1) + .map(|parent_idx| stack.stack_shape[parent_idx]); + if let Some(this_ty) = validator + .get_operand_type(stack.stack.len() - i - 1) + .expect("Index should not be out of range") + { + let wasm_ty = self.convert_valtype(this_ty)?; + let (this_shape, offset) = + b.push_stack(parent_shape, FrameValType::from(wasm_ty)); + stack.stack_shape.push(this_shape); + + let value = stack.stack[i]; + builder.ins().stack_store(value, slot, offset.offset()); + } else { + // Unreachable code with unknown type -- no + // flushes for this or later-pushed values. + break; + } + } + + self.state_slot = Some((slot, b)); + } + + Ok(()) + } + + pub(crate) fn debug_tags( + &self, + stack: &FuncTranslationStacks, + srcloc: ir::SourceLoc, + ) -> Vec { + if let Some((slot, _b)) = &self.state_slot { + stack.assert_debug_stack_is_synced(); + let stack_shape = stack + .stack_shape + .last() + .map(|s| s.raw()) + .unwrap_or(u32::MAX); + let pc = srcloc.bits(); + vec![ + ir::DebugTag::StackSlot(*slot), + ir::DebugTag::User(pc), + ir::DebugTag::User(stack_shape), + ] + } else { + vec![] + } + } + + fn finish_debug_metadata(&self, builder: &mut FunctionBuilder) { + if let Some((slot, b)) = &self.state_slot { + builder.func.sized_stack_slots[*slot].size = b.size(); + } + } + + /// Store a new value for a local in the state slot, if present. + pub(crate) fn state_slot_local_set( + &self, + builder: &mut FunctionBuilder, + local: u32, + value: ir::Value, + ) { + if let Some((slot, b)) = &self.state_slot { + let offset = b.local_offset(local); + builder.ins().stack_store(value, *slot, offset.offset()); + } + } + + fn update_state_slot_vmctx(&mut self, builder: &mut FunctionBuilder) { + if let &Some((slot, _)) = &self.state_slot { + let vmctx = self.vmctx_val(&mut builder.cursor()); + builder.ins().stack_store(vmctx, slot, 0); + } + } + + /// Perform debug instrumentation prior to translating an + /// operator. + pub(crate) fn debug_instrumentation_before_op( + &mut self, + builder: &mut FunctionBuilder, + stack: &mut FuncTranslationStacks, + srcloc: ir::SourceLoc, + ) -> WasmResult<()> { + if stack.reachable() && self.state_slot.is_some() { + let inst = builder.ins().sequence_point(); + let tags = self.debug_tags(stack, srcloc); + builder.func.debug_tags.set(inst, tags); + } + Ok(()) + } + + /// Perform debug instrumentation after translating an operator. + pub(crate) fn debug_instrumentation_after_op( + &mut self, + validator: &FuncValidator, + builder: &mut FunctionBuilder, + stack: &mut FuncTranslationStacks, + ) -> WasmResult<()> { + if stack.reachable() { + self.update_state_slot_stack(validator, builder, stack)?; + } + Ok(()) + } + + /// Perform debug instrumentation before translating the whole function. + pub(crate) fn debug_instrumentation_at_start( + &mut self, + builder: &mut FunctionBuilder, + ) -> WasmResult<()> { + self.update_state_slot_vmctx(builder); + Ok(()) + } + + /// Perform debug instrumentation after translating the whole function. + pub(crate) fn debug_instrumentation_at_end( + &mut self, + builder: &mut FunctionBuilder, + ) -> WasmResult<()> { + self.finish_debug_metadata(builder); + Ok(()) + } } #[derive(Default)] @@ -1691,6 +1873,8 @@ impl FuncEnvironment<'_> { struct Call<'a, 'func, 'module_env> { builder: &'a mut FunctionBuilder<'func>, env: &'a mut FuncEnvironment<'module_env>, + stack: &'a FuncTranslationStacks, + srcloc: ir::SourceLoc, handlers: Vec<(Option, Block)>, tail: bool, } @@ -1712,12 +1896,16 @@ impl<'a, 'func, 'module_env> Call<'a, 'func, 'module_env> { pub fn new( builder: &'a mut FunctionBuilder<'func>, env: &'a mut FuncEnvironment<'module_env>, + stack: &'a FuncTranslationStacks, + srcloc: ir::SourceLoc, handlers: impl IntoIterator, Block)>, ) -> Self { let handlers = handlers.into_iter().collect(); Call { builder, env, + stack, + srcloc, handlers, tail: false, } @@ -1727,10 +1915,14 @@ impl<'a, 'func, 'module_env> Call<'a, 'func, 'module_env> { pub fn new_tail( builder: &'a mut FunctionBuilder<'func>, env: &'a mut FuncEnvironment<'module_env>, + stack: &'a FuncTranslationStacks, + srcloc: ir::SourceLoc, ) -> Self { Call { builder, env, + stack, + srcloc, handlers: vec![], tail: true, } @@ -2213,15 +2405,17 @@ impl<'a, 'func, 'module_env> Call<'a, 'func, 'module_env> { } else if let Some((exception_table, continuation_block, results)) = self.exception_table(sig_ref) { - self.builder.ins().try_call(callee, args, exception_table); + let inst = self.builder.ins().try_call(callee, args, exception_table); self.handle_call_result_stackmap(&results, sig_ref); self.builder.switch_to_block(continuation_block); self.builder.seal_block(continuation_block); + self.attach_tags(inst); results } else { let inst = self.builder.ins().call(callee, args); let results = self.results_from_call_inst(inst); self.handle_call_result_stackmap(&results, sig_ref); + self.attach_tags(inst); results } } @@ -2240,20 +2434,30 @@ impl<'a, 'func, 'module_env> Call<'a, 'func, 'module_env> { } else if let Some((exception_table, continuation_block, results)) = self.exception_table(sig_ref) { - self.builder + let inst = self + .builder .ins() .try_call_indirect(func_addr, args, exception_table); self.handle_call_result_stackmap(&results, sig_ref); self.builder.switch_to_block(continuation_block); self.builder.seal_block(continuation_block); + self.attach_tags(inst); results } else { let inst = self.builder.ins().call_indirect(sig_ref, func_addr, args); let results = self.results_from_call_inst(inst); self.handle_call_result_stackmap(&results, sig_ref); + self.attach_tags(inst); results } } + + fn attach_tags(&mut self, inst: ir::Inst) { + let tags = self.env.debug_tags(self.stack, self.srcloc); + if !tags.is_empty() { + self.builder.func.debug_tags.set(inst, tags); + } + } } impl TypeConvert for FuncEnvironment<'_> { @@ -2307,12 +2511,20 @@ impl FuncEnvironment<'_> { &self.heaps } - pub fn is_wasm_parameter(&self, _signature: &ir::Signature, index: usize) -> bool { + pub fn is_wasm_parameter(&self, index: usize) -> bool { // The first two parameters are the vmctx and caller vmctx. The rest are // the wasm parameters. index >= 2 } + pub fn clif_param_as_wasm_param(&self, index: usize) -> Option { + if index >= 2 { + Some(self.wasm_func_ty.params()[index - 2]) + } else { + None + } + } + pub fn param_needs_stack_map(&self, _signature: &ir::Signature, index: usize) -> bool { // Skip the caller and callee vmctx. if index < 2 { @@ -2990,6 +3202,8 @@ impl FuncEnvironment<'_> { pub fn translate_call_indirect<'a>( &mut self, builder: &'a mut FunctionBuilder, + stack: &'a FuncTranslationStacks, + srcloc: ir::SourceLoc, features: &WasmFeatures, table_index: TableIndex, ty_index: TypeIndex, @@ -2998,7 +3212,7 @@ impl FuncEnvironment<'_> { call_args: &[ir::Value], handlers: impl IntoIterator, Block)>, ) -> WasmResult> { - Call::new(builder, self, handlers).indirect_call( + Call::new(builder, self, stack, srcloc, handlers).indirect_call( features, table_index, ty_index, @@ -3011,39 +3225,55 @@ impl FuncEnvironment<'_> { pub fn translate_call<'a>( &mut self, builder: &'a mut FunctionBuilder, + stack: &'a FuncTranslationStacks, + srcloc: ir::SourceLoc, callee_index: FuncIndex, sig_ref: ir::SigRef, call_args: &[ir::Value], handlers: impl IntoIterator, Block)>, ) -> WasmResult { - Call::new(builder, self, handlers).direct_call(callee_index, sig_ref, call_args) + Call::new(builder, self, stack, srcloc, handlers).direct_call( + callee_index, + sig_ref, + call_args, + ) } pub fn translate_call_ref<'a>( &mut self, builder: &'a mut FunctionBuilder, + stack: &'a FuncTranslationStacks, + srcloc: ir::SourceLoc, sig_ref: ir::SigRef, callee: ir::Value, call_args: &[ir::Value], handlers: impl IntoIterator, Block)>, ) -> WasmResult { - Call::new(builder, self, handlers).call_ref(sig_ref, callee, call_args) + Call::new(builder, self, stack, srcloc, handlers).call_ref(sig_ref, callee, call_args) } pub fn translate_return_call( &mut self, builder: &mut FunctionBuilder, + stack: &FuncTranslationStacks, + srcloc: ir::SourceLoc, callee_index: FuncIndex, sig_ref: ir::SigRef, call_args: &[ir::Value], ) -> WasmResult<()> { - Call::new_tail(builder, self).direct_call(callee_index, sig_ref, call_args)?; + Call::new_tail(builder, self, stack, srcloc).direct_call( + callee_index, + sig_ref, + call_args, + )?; Ok(()) } pub fn translate_return_call_indirect( &mut self, builder: &mut FunctionBuilder, + stack: &FuncTranslationStacks, + srcloc: ir::SourceLoc, features: &WasmFeatures, table_index: TableIndex, ty_index: TypeIndex, @@ -3051,7 +3281,7 @@ impl FuncEnvironment<'_> { callee: ir::Value, call_args: &[ir::Value], ) -> WasmResult<()> { - Call::new_tail(builder, self).indirect_call( + Call::new_tail(builder, self, stack, srcloc).indirect_call( features, table_index, ty_index, @@ -3065,11 +3295,13 @@ impl FuncEnvironment<'_> { pub fn translate_return_call_ref( &mut self, builder: &mut FunctionBuilder, + stack: &FuncTranslationStacks, + srcloc: ir::SourceLoc, sig_ref: ir::SigRef, callee: ir::Value, call_args: &[ir::Value], ) -> WasmResult<()> { - Call::new_tail(builder, self).call_ref(sig_ref, callee, call_args)?; + Call::new_tail(builder, self, stack, srcloc).call_ref(sig_ref, callee, call_args)?; Ok(()) } diff --git a/crates/cranelift/src/translate/code_translator.rs b/crates/cranelift/src/translate/code_translator.rs index 1e15bec0c3af..151910e7e976 100644 --- a/crates/cranelift/src/translate/code_translator.rs +++ b/crates/cranelift/src/translate/code_translator.rs @@ -124,6 +124,7 @@ pub fn translate_operator( builder: &mut FunctionBuilder, stack: &mut FuncTranslationStacks, environ: &mut FuncEnvironment<'_>, + srcloc: ir::SourceLoc, ) -> WasmResult<()> { log::trace!("Translating Wasm opcode: {op:?}"); @@ -163,6 +164,7 @@ pub fn translate_operator( builder.def_var(Variable::from_u32(*local_index), val); let label = ValueLabel::from_u32(*local_index); builder.set_val_label(val, label); + environ.state_slot_local_set(builder, *local_index, val); } Operator::LocalTee { local_index } => { let mut val = stack.peek1(); @@ -176,6 +178,7 @@ pub fn translate_operator( builder.def_var(Variable::from_u32(*local_index), val); let label = ValueLabel::from_u32(*local_index); builder.set_val_label(val, label); + environ.state_slot_local_set(builder, *local_index, val); } /********************************** Globals **************************************** * `get_global` and `set_global` are handled by the environment. @@ -427,7 +430,7 @@ pub fn translate_operator( frame.restore_catch_handlers(&mut stack.handlers, builder); - frame.truncate_value_stack_to_original_size(&mut stack.stack); + frame.truncate_value_stack_to_original_size(&mut stack.stack, &mut stack.stack_shape); stack .stack .extend_from_slice(builder.block_params(next_block)); @@ -650,6 +653,8 @@ pub fn translate_operator( let inst_results = environ.translate_call( builder, + stack, + srcloc, function_index, sig_ref, args, @@ -682,6 +687,8 @@ pub fn translate_operator( let inst_results = environ.translate_call_indirect( builder, + stack, + srcloc, validator.features(), TableIndex::from_u32(*table_index), type_index, @@ -724,8 +731,9 @@ pub fn translate_operator( // Bitcast any vector arguments to their default type, I8X16, before calling. let args = stack.peekn_mut(num_args); bitcast_wasm_params(environ, sig_ref, args, builder); + let args = stack.peekn(num_args); // Reborrow immutably. - environ.translate_return_call(builder, function_index, sig_ref, args)?; + environ.translate_return_call(builder, stack, srcloc, function_index, sig_ref, args)?; stack.popn(num_args); stack.reachable = false; @@ -748,6 +756,8 @@ pub fn translate_operator( environ.translate_return_call_indirect( builder, + stack, + srcloc, validator.features(), TableIndex::from_u32(*table_index), type_index, @@ -772,7 +782,14 @@ pub fn translate_operator( let args = stack.peekn_mut(num_args); bitcast_wasm_params(environ, sigref, args, builder); - environ.translate_return_call_ref(builder, sigref, callee, stack.peekn(num_args))?; + environ.translate_return_call_ref( + builder, + stack, + srcloc, + sigref, + callee, + stack.peekn(num_args), + )?; stack.popn(num_args); stack.reachable = false; @@ -2516,6 +2533,8 @@ pub fn translate_operator( let inst_results = environ.translate_call_ref( builder, + stack, + srcloc, sigref, callee, stack.peekn(num_args), @@ -3233,7 +3252,10 @@ fn translate_unreachable_operator( blocktype_params_results(validator, blocktype)?; let else_block = block_with_params(builder, params, environ)?; let frame = stack.control_stack.last().unwrap(); - frame.truncate_value_stack_to_else_params(&mut stack.stack); + frame.truncate_value_stack_to_else_params( + &mut stack.stack, + &mut stack.stack_shape, + ); // We change the target of the branch instruction. builder.change_jump_destination( @@ -3246,7 +3268,10 @@ fn translate_unreachable_operator( } ElseData::WithElse { else_block } => { let frame = stack.control_stack.last().unwrap(); - frame.truncate_value_stack_to_else_params(&mut stack.stack); + frame.truncate_value_stack_to_else_params( + &mut stack.stack, + &mut stack.stack_shape, + ); else_block } }; @@ -3264,13 +3289,14 @@ fn translate_unreachable_operator( } Operator::End => { let value_stack = &mut stack.stack; + let stack_shape = &mut stack.stack_shape; let control_stack = &mut stack.control_stack; let frame = control_stack.pop().unwrap(); frame.restore_catch_handlers(&mut stack.handlers, builder); // Pop unused parameters from stack. - frame.truncate_value_stack_to_original_size(value_stack); + frame.truncate_value_stack_to_original_size(value_stack, stack_shape); let reachable_anyway = match frame { // If it is a loop we also have to seal the body loop block @@ -4287,7 +4313,7 @@ fn bitcast_wasm_params( ) { let callee_signature = &builder.func.dfg.signatures[callee_signature]; let changes = bitcast_arguments(builder, arguments, &callee_signature.params, |i| { - environ.is_wasm_parameter(&callee_signature, i) + environ.is_wasm_parameter(i) }); for (t, arg) in changes { let mut flags = MemFlags::new(); diff --git a/crates/cranelift/src/translate/func_translator.rs b/crates/cranelift/src/translate/func_translator.rs index fb205e067ffd..72c95bddcdff 100644 --- a/crates/cranelift/src/translate/func_translator.rs +++ b/crates/cranelift/src/translate/func_translator.rs @@ -76,6 +76,8 @@ impl FuncTranslator { builder.switch_to_block(entry_block); builder.seal_block(entry_block); // Declare all predecessors known. + environ.create_state_slot(&mut builder); + // Make sure the entry block is inserted in the layout before we make any callbacks to // `environ`. The callback functions may need to insert things in the entry block. builder.ensure_inserted_block(); @@ -103,7 +105,7 @@ impl FuncTranslator { fn declare_wasm_parameters( builder: &mut FunctionBuilder, entry_block: Block, - environ: &FuncEnvironment<'_>, + environ: &mut FuncEnvironment<'_>, ) -> usize { let sig_len = builder.func.signature.params.len(); let mut next_local = 0; @@ -111,7 +113,7 @@ fn declare_wasm_parameters( let param_type = builder.func.signature.params[i]; // There may be additional special-purpose parameters in addition to the normal WebAssembly // signature parameters. For example, a `vmctx` pointer. - if environ.is_wasm_parameter(&builder.func.signature, i) { + if let Some(wasm_type) = environ.clif_param_as_wasm_param(i) { // This is a normal WebAssembly signature parameter, so create a local for it. let local = builder.declare_var(param_type.value_type); debug_assert_eq!(local.index(), next_local); @@ -123,6 +125,8 @@ fn declare_wasm_parameters( let param_value = builder.block_params(entry_block)[i]; builder.def_var(local, param_value); + + environ.add_state_slot_local(builder, wasm_type, Some(param_value)); } if param_type.purpose == ir::ArgumentPurpose::VMContext { let param_value = builder.block_params(entry_block)[i]; @@ -221,6 +225,7 @@ fn declare_locals( builder.def_var(local, init); builder.set_val_label(init, ValueLabel::new(*next_local)); } + environ.add_state_slot_local(builder, environ.convert_valtype(wasm_type)?, init); *next_local += 1; } Ok(()) @@ -245,18 +250,32 @@ fn parse_function_body( let mut reader = OperatorsReader::new(reader); let mut operand_types = vec![]; + environ.debug_instrumentation_at_start(builder)?; + while !reader.eof() { let pos = reader.original_position(); - builder.set_srcloc(cur_srcloc(&reader.get_binary_reader())); + let srcloc = cur_srcloc(&reader.get_binary_reader()); + builder.set_srcloc(srcloc); let op = reader.read()?; let operand_types = validate_op_and_get_operand_types(validator, environ, &mut operand_types, &op, pos)?; + environ.debug_instrumentation_before_op(builder, stack, srcloc)?; environ.before_translate_operator(&op, operand_types, builder, stack)?; - translate_operator(validator, &op, operand_types, builder, stack, environ)?; + translate_operator( + validator, + &op, + operand_types, + builder, + stack, + environ, + srcloc, + )?; environ.after_translate_operator(&op, operand_types, builder, stack)?; + environ.debug_instrumentation_after_op(validator, builder, stack)?; } + environ.debug_instrumentation_at_end(builder)?; environ.after_translate_function(builder, stack)?; reader.finish()?; @@ -276,6 +295,7 @@ fn parse_function_body( // Discard any remaining values on the stack. Either we just returned them, // or the end of the function is unreachable. stack.stack.clear(); + stack.stack_shape.clear(); Ok(()) } diff --git a/crates/cranelift/src/translate/stack.rs b/crates/cranelift/src/translate/stack.rs index 5c79231d7dcf..f338fe3f0abd 100644 --- a/crates/cranelift/src/translate/stack.rs +++ b/crates/cranelift/src/translate/stack.rs @@ -7,6 +7,7 @@ use cranelift_codegen::ir::{self, Block, ExceptionTag, Inst, Value}; use cranelift_frontend::FunctionBuilder; use std::vec::Vec; +use wasmtime_environ::FrameStackShape; /// Information about the presence of an associated `else` for an `if`, or the /// lack thereof. @@ -190,14 +191,23 @@ impl ControlStackFrame { /// Pop values from the value stack so that it is left at the /// input-parameters to an else-block. - pub fn truncate_value_stack_to_else_params(&self, stack: &mut Vec) { + pub fn truncate_value_stack_to_else_params( + &self, + stack: &mut Vec, + stack_shape: &mut Vec, + ) { debug_assert!(matches!(self, &ControlStackFrame::If { .. })); stack.truncate(self.original_stack_size()); + stack_shape.truncate(self.original_stack_size()); } /// Pop values from the value stack so that it is left at the state it was /// before this control-flow frame. - pub fn truncate_value_stack_to_original_size(&self, stack: &mut Vec) { + pub fn truncate_value_stack_to_original_size( + &self, + stack: &mut Vec, + stack_shape: &mut Vec, + ) { // The "If" frame pushes its parameters twice, so they're available to the else block // (see also `FuncTranslationStacks::push_if`). // Yet, the original_stack_size member accounts for them only once, so that the else @@ -212,7 +222,10 @@ impl ControlStackFrame { } _ => 0, }; - stack.truncate(self.original_stack_size() - num_duplicated_params); + + let new_len = self.original_stack_size() - num_duplicated_params; + stack.truncate(new_len); + stack_shape.truncate(new_len); } /// Restore the catch-handlers as they were outside of this block. @@ -242,6 +255,13 @@ pub struct FuncTranslationStacks { /// A stack of values corresponding to the active values in the input wasm function at this /// point. pub(crate) stack: Vec, + /// "Shape" of stack at each index, if emitting debug instrumentation. + /// + /// When we pop `stack`, we automatically pop `stack_shape` as + /// well, but we never push automatically; this enables us to + /// determine which values are new and need to be flushed to + /// memory after translating an operator. + pub(crate) stack_shape: Vec, /// A stack of active control flow operations at this point in the input wasm function. pub(crate) control_stack: Vec, /// Exception handler state, updated as we enter and exit @@ -266,6 +286,7 @@ impl FuncTranslationStacks { pub(crate) fn new() -> Self { Self { stack: Vec::new(), + stack_shape: Vec::new(), control_stack: Vec::new(), handlers: HandlerState::default(), reachable: true, @@ -274,6 +295,7 @@ impl FuncTranslationStacks { fn clear(&mut self) { debug_assert!(self.stack.is_empty()); + debug_assert!(self.stack_shape.is_empty()); debug_assert!(self.control_stack.is_empty()); debug_assert!(self.handlers.is_empty()); self.reachable = true; @@ -313,6 +335,7 @@ impl FuncTranslationStacks { /// Pop one value. pub(crate) fn pop1(&mut self) -> Value { + self.pop_stack_shape(1); self.stack .pop() .expect("attempted to pop a value from an empty stack") @@ -328,6 +351,7 @@ impl FuncTranslationStacks { /// Pop two values. Return them in the order they were pushed. pub(crate) fn pop2(&mut self) -> (Value, Value) { + self.pop_stack_shape(2); let v2 = self.stack.pop().unwrap(); let v1 = self.stack.pop().unwrap(); (v1, v2) @@ -335,6 +359,7 @@ impl FuncTranslationStacks { /// Pop three values. Return them in the order they were pushed. pub(crate) fn pop3(&mut self) -> (Value, Value, Value) { + self.pop_stack_shape(3); let v3 = self.stack.pop().unwrap(); let v2 = self.stack.pop().unwrap(); let v1 = self.stack.pop().unwrap(); @@ -343,6 +368,7 @@ impl FuncTranslationStacks { /// Pop four values. Return them in the order they were pushed. pub(crate) fn pop4(&mut self) -> (Value, Value, Value, Value) { + self.pop_stack_shape(4); let v4 = self.stack.pop().unwrap(); let v3 = self.stack.pop().unwrap(); let v2 = self.stack.pop().unwrap(); @@ -352,6 +378,7 @@ impl FuncTranslationStacks { /// Pop five values. Return them in the order they were pushed. pub(crate) fn pop5(&mut self) -> (Value, Value, Value, Value, Value) { + self.pop_stack_shape(5); let v5 = self.stack.pop().unwrap(); let v4 = self.stack.pop().unwrap(); let v3 = self.stack.pop().unwrap(); @@ -379,6 +406,21 @@ impl FuncTranslationStacks { self.ensure_length_is_at_least(n); let new_len = self.stack.len() - n; self.stack.truncate(new_len); + self.stack_shape.truncate(new_len); + } + + fn pop_stack_shape(&mut self, n: usize) { + // The `stack_shape` vec represents the *clean* slots (already + // flushed to memory); its length is always less than or equal + // to `stack`, but indices always correspond between the + // two. Thus a pop on `stack` may or may not pop something on + // `stack_shape`; but if `stack` is truncated down to a length + // L by some number of pops, truncating `stack_shape` to that + // same length L will pop exactly the right shapes and will + // ensure that any new pushes that are "dirty" will be + // correctly represented as such. + let new_len = self.stack.len() - n; + self.stack_shape.truncate(new_len); } /// Peek at the top `n` values on the stack in the order they were pushed. @@ -467,6 +509,7 @@ impl FuncTranslationStacks { blocktype: wasmparser::BlockType, ) { debug_assert!(num_param_types <= self.stack.len()); + self.assert_debug_stack_is_synced(); // Push a second copy of our `if`'s parameters on the stack. This lets // us avoid saving them on the side in the `ControlStackFrame` for our @@ -477,6 +520,15 @@ impl FuncTranslationStacks { for i in (self.stack.len() - num_param_types)..self.stack.len() { let val = self.stack[i]; self.stack.push(val); + // Duplicate the stack-shape as well, if we're doing debug + // instrumentation. Note that we must have flushed + // everything before processing an `if`, so (as per the + // assert above) we can rely on either no shapes (if no + // instrumentation) or all shapes being present. + if !self.stack_shape.is_empty() { + let shape = self.stack_shape[i]; + self.stack_shape.push(shape); + } } self.control_stack.push(ControlStackFrame::If { @@ -491,6 +543,10 @@ impl FuncTranslationStacks { blocktype, }); } + + pub(crate) fn assert_debug_stack_is_synced(&self) { + debug_assert!(self.stack_shape.is_empty() || self.stack_shape.len() == self.stack.len()); + } } /// Exception handler state. diff --git a/crates/environ/src/compile/frame_table.rs b/crates/environ/src/compile/frame_table.rs new file mode 100644 index 000000000000..a2f8d30a8575 --- /dev/null +++ b/crates/environ/src/compile/frame_table.rs @@ -0,0 +1,320 @@ +//! Builder for the `ELF_WASMTIME_FRAME_TABLE` ("frame table") section +//! in compiled executables. +//! +//! This section is present only if debug instrumentation is +//! enabled. It describes functions, stackslots that carry Wasm state, +//! and allows looking up active Wasm frames (including multiple +//! frames in one function due to inlining), Wasm local types and Wasm +//! operand stack depth in each frame by PC, with offsets to read +//! those values off of the state in the stack frame. + +use crate::{ + FrameInstPos, FrameStackShape, FrameStateSlotOffset, FrameTableDescriptorIndex, FrameValType, + FuncKey, WasmHeapTopType, WasmValType, prelude::*, +}; +use object::{LittleEndian, U32Bytes}; +use std::collections::{HashMap, hash_map::Entry}; + +/// Builder for a stackslot descriptor. +pub struct FrameStateSlotBuilder { + /// Function identifier for this state slot. + func_key: FuncKey, + + /// Pointer size for target. + pointer_size: u32, + + /// Local types and offsets. + locals: Vec<(FrameValType, FrameStateSlotOffset)>, + + /// Stack nodes: (parent, type, offset) tuples. + stacks: Vec<(Option, FrameValType, FrameStateSlotOffset)>, + + /// Hashconsing for stack-type nodes. + stacks_dedup: + HashMap<(Option, FrameValType, FrameStateSlotOffset), FrameStackShape>, + + /// Size of vmctx (one pointer). + vmctx_size: u32, + + /// Size of all locals. + locals_size: u32, + + /// Maximum size of whole state slot. + slot_size: u32, +} + +impl From for FrameValType { + fn from(ty: WasmValType) -> FrameValType { + match ty { + WasmValType::I32 => FrameValType::I32, + WasmValType::I64 => FrameValType::I64, + WasmValType::F32 => FrameValType::F32, + WasmValType::F64 => FrameValType::F64, + WasmValType::V128 => FrameValType::V128, + WasmValType::Ref(r) => match r.heap_type.top() { + WasmHeapTopType::Any => FrameValType::AnyRef, + WasmHeapTopType::Extern => FrameValType::ExternRef, + WasmHeapTopType::Func => FrameValType::FuncRef, + WasmHeapTopType::Exn => FrameValType::ExnRef, + WasmHeapTopType::Cont => FrameValType::ContRef, + }, + } + } +} + +impl FrameStateSlotBuilder { + /// Create a new state-slot builder. + pub fn new(func_key: FuncKey, pointer_size: u32) -> FrameStateSlotBuilder { + FrameStateSlotBuilder { + func_key, + pointer_size, + locals: vec![], + stacks: vec![], + stacks_dedup: HashMap::new(), + vmctx_size: pointer_size, + locals_size: 0, + slot_size: pointer_size, + } + } + + /// Add a local to the state-slot. + /// + /// Locals must be added in local index order, and must be added + /// before any stack shapes are defined. The offset in the state + /// slot is returned. + pub fn add_local(&mut self, ty: FrameValType) -> FrameStateSlotOffset { + let offset = FrameStateSlotOffset(self.vmctx_size + self.locals_size); + let size = ty.storage_size(self.pointer_size); + self.locals_size += size; + self.slot_size += size; + self.locals.push((ty, offset)); + offset + } + + /// Get a local's offset in the state-slot. + pub fn local_offset(&self, local: u32) -> FrameStateSlotOffset { + let index = usize::try_from(local).unwrap(); + self.locals[index].1 + } + + /// Push a stack entry. Returns the stack-shape descriptor and the + /// offset at which to write the pushed value. + pub fn push_stack( + &mut self, + parent: Option, + ty: FrameValType, + ) -> (FrameStackShape, FrameStateSlotOffset) { + let offset = parent + .map(|parent| { + let (_, ty, offset) = self.stacks[parent.index()]; + offset.add(ty.storage_size(self.pointer_size)) + }) + .unwrap_or(FrameStateSlotOffset(self.vmctx_size + self.locals_size)); + + self.slot_size = core::cmp::max( + self.slot_size, + offset.0 + ty.storage_size(self.pointer_size), + ); + + let shape = match self.stacks_dedup.entry((parent, ty, offset)) { + Entry::Occupied(o) => *o.get(), + Entry::Vacant(v) => { + let shape = FrameStackShape(u32::try_from(self.stacks.len()).unwrap()); + self.stacks.push((parent, ty, offset)); + *v.insert(shape) + } + }; + + (shape, offset) + } + + /// Get the offset for the top slot in a given stack shape. + pub fn stack_last_offset(&self, shape: FrameStackShape) -> FrameStateSlotOffset { + self.stacks[shape.index()].2 + } + + /// Serialize the frame-slot descriptor so it can be included as + /// metadata. + pub fn serialize(&self) -> Vec { + // Format (all little-endian): + // - func_key: (u32, u32) + // - num_locals: u32 + // - num_stack_shapes: u32 + // - local_offsets: num_locals times: + // - offset: u32 (offset from start of state slot) + // - stack_shape_parents: num_stack_shapes times: + // - parent_shape: u32 (or u32::MAX for none) + // - stack_shape_offsets: num_stack_shapes times: + // - offset: u32 (offset from start of state slot for top-of-stack value) + // - local_types: num_locals times: + // - type: u8 + // - stack_shape_types: num_stack_shapes times: + // - type: u8 (type of top-of-stack value) + + let mut buffer = vec![]; + let (func_key_namespace, func_key_index) = self.func_key.into_parts(); + buffer.extend_from_slice(&u32::to_le_bytes(func_key_namespace.into_raw())); + buffer.extend_from_slice(&u32::to_le_bytes(func_key_index.into_raw())); + + buffer.extend_from_slice(&u32::to_le_bytes(u32::try_from(self.locals.len()).unwrap())); + buffer.extend_from_slice(&u32::to_le_bytes(u32::try_from(self.stacks.len()).unwrap())); + + for (_, offset) in &self.locals { + buffer.extend_from_slice(&u32::to_le_bytes(offset.0)); + } + for (parent, _, _) in &self.stacks { + let parent = parent.map(|p| p.0).unwrap_or(u32::MAX); + buffer.extend_from_slice(&u32::to_le_bytes(parent)); + } + for (_, _, offset) in &self.stacks { + buffer.extend_from_slice(&u32::to_le_bytes(offset.0)); + } + for (ty, _) in &self.locals { + buffer.push(*ty as u8); + } + for (_, ty, _) in &self.stacks { + buffer.push(*ty as u8); + } + + buffer + } + + /// The total size required for all locals/stack storage. + pub fn size(&self) -> u32 { + self.slot_size + } +} + +/// Builder for the Frame Table. +/// +/// Format: +/// +/// - `num_slot_descriptors`: u32 +/// - `num_progpoints`: u32 +/// - `frame_descriptor_pool_length`: u32 +/// - `progpoint_descriptor_pool_length`; U32 +/// - `num_slot_descriptors` times: +/// - frame descriptor offset: u32 +/// - length: u32 +/// - `num_slot_descriptors` times: +/// - offset from frame up to FP: u32 +/// - `num_progpoints` times: +/// - PC, from start of text section, position (post/pre): u32 +/// - encoded as (pc << 1) | post_pre_bit +/// - `num_progpoints` times: +/// - progpoint descriptor offset: u32 +/// - frame descriptors (format described above; `frame_descriptor_pool_length` bytes) +/// - progpoint descriptors (`progpoint_descriptor_pool_length` bytes) +/// - each descriptor: sequence of frames +/// - Wasm PC: u32 (high bit set to indicate a parent frame) +/// - slot descriptor index: u32 +/// - stack shape index: u32 (or u32::MAX for none) +#[derive(Default)] +pub struct FrameTableBuilder { + /// (offset, length) pairs into `frame_descriptor_data`, indexed + /// by frame descriptor number. + frame_descriptor_ranges: Vec>, + frame_descriptor_data: Vec, + + /// Offset from frame slot up to FP for each frame descriptor. + frame_descriptor_fp_offsets: Vec>, + + progpoint_pcs: Vec>, + progpoint_descriptor_offsets: Vec>, + progpoint_descriptor_data: Vec>, +} + +impl FrameTableBuilder { + /// Add one frame descriptor. + /// + /// Returns the frame descriptor index. + pub fn add_frame_descriptor( + &mut self, + slot_to_fp_offset: u32, + data: &[u8], + ) -> FrameTableDescriptorIndex { + let start = u32::try_from(self.frame_descriptor_data.len()).unwrap(); + self.frame_descriptor_data.extend(data.iter().cloned()); + let end = u32::try_from(self.frame_descriptor_data.len()).unwrap(); + + let index = FrameTableDescriptorIndex( + u32::try_from(self.frame_descriptor_fp_offsets.len()).unwrap(), + ); + self.frame_descriptor_fp_offsets + .push(U32Bytes::new(LittleEndian, slot_to_fp_offset)); + self.frame_descriptor_ranges + .push(U32Bytes::new(LittleEndian, start)); + self.frame_descriptor_ranges + .push(U32Bytes::new(LittleEndian, end)); + + index + } + + /// Add one program point. + pub fn add_program_point( + &mut self, + native_pc: u32, + pos: FrameInstPos, + // For each frame: Wasm PC, frame descriptor, stack shape + // within the frame descriptor. + frames: &[(u32, FrameTableDescriptorIndex, FrameStackShape)], + ) { + let pc_and_pos = FrameInstPos::encode(native_pc, pos); + // If we already have a program point record at this PC, + // overwrite it. + while let Some(last) = self.progpoint_pcs.last() + && last.get(LittleEndian) == pc_and_pos + { + self.progpoint_pcs.pop(); + self.progpoint_descriptor_offsets.pop(); + self.progpoint_descriptor_data + .truncate(self.progpoint_descriptor_data.len() - 3); + } + + let start = u32::try_from(self.progpoint_descriptor_data.len()).unwrap(); + self.progpoint_pcs + .push(U32Bytes::new(LittleEndian, pc_and_pos)); + self.progpoint_descriptor_offsets + .push(U32Bytes::new(LittleEndian, start)); + + for (i, &(wasm_pc, frame_descriptor, stack_shape)) in frames.iter().enumerate() { + debug_assert!(wasm_pc < 0x8000_0000); + let not_last = i < (frames.len() - 1); + let wasm_pc = wasm_pc | if not_last { 0x8000_0000 } else { 0 }; + self.progpoint_descriptor_data + .push(U32Bytes::new(LittleEndian, wasm_pc)); + self.progpoint_descriptor_data + .push(U32Bytes::new(LittleEndian, frame_descriptor.0)); + self.progpoint_descriptor_data + .push(U32Bytes::new(LittleEndian, stack_shape.0)); + } + } + + /// Serialize the exception-handler data section, taking a closure + /// to consume slices. + pub fn serialize(&mut self, mut f: F) { + // Pad `frame_descriptor_data` to a multiple of 4 bytes so + // `progpoint_descriptor_data` is aligned as well. + while self.frame_descriptor_data.len() & 3 != 0 { + self.frame_descriptor_data.push(0); + } + + let num_frame_descriptors = u32::try_from(self.frame_descriptor_fp_offsets.len()).unwrap(); + f(&num_frame_descriptors.to_le_bytes()); + let num_prog_points = u32::try_from(self.progpoint_pcs.len()).unwrap(); + f(&num_prog_points.to_le_bytes()); + + let frame_descriptor_pool_length = u32::try_from(self.frame_descriptor_data.len()).unwrap(); + f(&frame_descriptor_pool_length.to_le_bytes()); + let progpoint_descriptor_pool_length = + u32::try_from(self.progpoint_descriptor_data.len()).unwrap(); + f(&progpoint_descriptor_pool_length.to_le_bytes()); + + f(object::bytes_of_slice(&self.frame_descriptor_ranges)); + f(object::bytes_of_slice(&self.frame_descriptor_fp_offsets)); + f(object::bytes_of_slice(&self.progpoint_pcs)); + f(object::bytes_of_slice(&self.progpoint_descriptor_offsets)); + f(&self.frame_descriptor_data); + f(object::bytes_of_slice(&self.progpoint_descriptor_data)); + } +} diff --git a/crates/environ/src/compile/mod.rs b/crates/environ/src/compile/mod.rs index 5bff028d4d69..034923b769e9 100644 --- a/crates/environ/src/compile/mod.rs +++ b/crates/environ/src/compile/mod.rs @@ -16,6 +16,7 @@ use std::path; use std::sync::Arc; mod address_map; +mod frame_table; mod module_artifacts; mod module_environ; mod module_types; @@ -23,6 +24,7 @@ mod stack_maps; mod trap_encoding; pub use self::address_map::*; +pub use self::frame_table::*; pub use self::module_artifacts::*; pub use self::module_environ::*; pub use self::module_types::*; @@ -331,7 +333,7 @@ pub trait Compiler: Send + Sync { fn append_code( &self, obj: &mut Object<'static>, - funcs: &[(String, Box)], + funcs: &[(String, FuncKey, Box)], resolve_reloc: &dyn Fn(usize, FuncKey) -> usize, ) -> Result>; diff --git a/crates/environ/src/frame_table.rs b/crates/environ/src/frame_table.rs new file mode 100644 index 000000000000..2fdf32c72048 --- /dev/null +++ b/crates/environ/src/frame_table.rs @@ -0,0 +1,481 @@ +//! Frame-table parser and lookup logic. +//! +//! This module contains utilities to interpret the `.wasmtime.frame` +//! section in a compiled artifact as produced by +//! [`crate::compile::frame_table::FrameTableBuilder`]. + +use crate::FuncKey; +use alloc::vec::Vec; +use object::{Bytes, LittleEndian, U32Bytes}; + +/// An index into the table of stack shapes. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub struct FrameStackShape(pub(crate) u32); +impl FrameStackShape { + pub(crate) fn index(self) -> usize { + usize::try_from(self.0).unwrap() + } + + /// Get the raw stack-shape index suitable for serializing into + /// metadata. + pub fn raw(self) -> u32 { + self.0 + } + + /// Wrap a raw stack shape index (e.g. from debug tags) into a FrameStackShape. + pub fn from_raw(index: u32) -> FrameStackShape { + FrameStackShape(index) + } +} + +/// An index to a frame descriptor that can be referenced from a +/// program point descriptor. +#[derive(Clone, Copy, Debug)] +pub struct FrameTableDescriptorIndex(pub(crate) u32); +impl FrameTableDescriptorIndex { + fn index(self) -> usize { + usize::try_from(self.0).unwrap() + } +} + +/// A parser for a frame-table section. +/// +/// This parser holds slices to the in-memory section data, and is +/// cheap to construct: it reads some header fields but does not +/// interpret or validate content data until queried. +pub struct FrameTable<'a> { + frame_descriptor_ranges: &'a [U32Bytes], + frame_descriptor_data: &'a [u8], + + frame_descriptor_fp_offsets: &'a [U32Bytes], + + progpoint_pcs: &'a [U32Bytes], + progpoint_descriptor_offsets: &'a [U32Bytes], + progpoint_descriptor_data: &'a [U32Bytes], +} + +impl<'a> FrameTable<'a> { + /// Parse a frame table section from a byte-slice as produced by + /// [`crate::compile::frame_table::FrameTableBuilder`]. + pub fn parse(data: &'a [u8]) -> anyhow::Result> { + let mut data = Bytes(data); + let num_frame_descriptors = data + .read::>() + .map_err(|_| anyhow::anyhow!("Unable to read frame descriptor count prefix"))?; + let num_frame_descriptors = usize::try_from(num_frame_descriptors.get(LittleEndian))?; + let num_progpoint_descriptors = data + .read::>() + .map_err(|_| anyhow::anyhow!("Unable to read progpoint descriptor count prefix"))?; + let num_progpoint_descriptors = + usize::try_from(num_progpoint_descriptors.get(LittleEndian))?; + let frame_descriptor_pool_length = data + .read::>() + .map_err(|_| anyhow::anyhow!("Unable to read frame descriptor pool length"))?; + let frame_descriptor_pool_length = + usize::try_from(frame_descriptor_pool_length.get(LittleEndian))?; + let progpoint_descriptor_pool_length = data + .read::>() + .map_err(|_| anyhow::anyhow!("Unable to read progpoint descriptor pool length"))?; + let progpoint_descriptor_pool_length = + usize::try_from(progpoint_descriptor_pool_length.get(LittleEndian))?; + + let (frame_descriptor_ranges, data) = + object::slice_from_bytes::>(data.0, 2 * num_frame_descriptors) + .map_err(|_| anyhow::anyhow!("Unable to read frame descriptor ranges slice"))?; + let (frame_descriptor_fp_offsets, data) = + object::slice_from_bytes::>(data, num_frame_descriptors) + .map_err(|_| anyhow::anyhow!("Unable to read frame descriptor FP offset slice"))?; + + let (progpoint_pcs, data) = + object::slice_from_bytes::>(data, num_progpoint_descriptors) + .map_err(|_| anyhow::anyhow!("Unable to read progpoint PC slice"))?; + let (progpoint_descriptor_offsets, data) = + object::slice_from_bytes::>(data, num_progpoint_descriptors) + .map_err(|_| anyhow::anyhow!("Unable to read progpoint descriptor offset slice"))?; + + let (frame_descriptor_data, data) = data + .split_at_checked(frame_descriptor_pool_length) + .ok_or_else(|| anyhow::anyhow!("Unable to read frame descriptor pool"))?; + + let (progpoint_descriptor_data, _) = object::slice_from_bytes::>( + data, + progpoint_descriptor_pool_length, + ) + .map_err(|_| anyhow::anyhow!("Unable to read progpoint descriptor pool"))?; + + Ok(FrameTable { + frame_descriptor_ranges, + frame_descriptor_data, + frame_descriptor_fp_offsets, + progpoint_pcs, + progpoint_descriptor_offsets, + progpoint_descriptor_data, + }) + } + + /// Get raw frame descriptor data and slot-to-FP-offset for a + /// given frame descriptor. + pub fn frame_descriptor( + &self, + frame_descriptor: FrameTableDescriptorIndex, + ) -> Option<(&'a [u8], u32)> { + let range_start = self + .frame_descriptor_ranges + .get(frame_descriptor.index() * 2)? + .get(LittleEndian); + let range_end = self + .frame_descriptor_ranges + .get(frame_descriptor.index() * 2 + 1)? + .get(LittleEndian); + let range_start = usize::try_from(range_start).unwrap(); + let range_end = usize::try_from(range_end).unwrap(); + if range_end < range_start || range_end > self.frame_descriptor_data.len() { + return None; + } + let descriptor = &self.frame_descriptor_data[range_start..range_end]; + let slot_to_fp_offset = self + .frame_descriptor_fp_offsets + .get(frame_descriptor.index())? + .get(LittleEndian); + Some((descriptor, slot_to_fp_offset)) + } + + /// Get frames for the program point at the PC upper-bounded by a + /// given search PC (offset in text section). + pub fn find_program_point( + &self, + search_pc: u32, + search_pos: FrameInstPos, + ) -> Option> { + let key = FrameInstPos::encode(search_pc, search_pos); + let index = match self + .progpoint_pcs + .binary_search_by_key(&key, |entry| entry.get(LittleEndian)) + { + Ok(idx) => idx, + Err(idx) if idx > 0 => idx - 1, + Err(_) => return None, + }; + + Some(self.program_point_frame_iter(index)) + } + + /// Get all program point records with iterators over + /// corresponding frames for each. + pub fn into_program_points( + self, + ) -> impl Iterator< + Item = ( + u32, + FrameInstPos, + Vec<(u32, FrameTableDescriptorIndex, FrameStackShape)>, + ), + > + 'a { + self.progpoint_pcs.iter().enumerate().map(move |(i, pc)| { + let pc_and_pos = pc.get(LittleEndian); + let (pc, pos) = FrameInstPos::decode(pc_and_pos); + ( + pc, + pos, + self.program_point_frame_iter(i).collect::>(), + ) + }) + } + + fn program_point_frame_iter( + &self, + index: usize, + ) -> impl Iterator { + let offset = + usize::try_from(self.progpoint_descriptor_offsets[index].get(LittleEndian)).unwrap(); + let mut data = &self.progpoint_descriptor_data[offset..]; + + core::iter::from_fn(move || { + if data.len() < 3 { + return None; + } + let wasm_pc = data[0].get(LittleEndian); + let frame_descriptor = FrameTableDescriptorIndex(data[1].get(LittleEndian)); + let stack_shape = FrameStackShape(data[2].get(LittleEndian)); + data = &data[3..]; + let not_last = wasm_pc & 0x8000_0000 != 0; + let wasm_pc = wasm_pc & 0x7fff_ffff; + if !not_last { + data = &[]; + } + Some((wasm_pc, frame_descriptor, stack_shape)) + }) + } +} + +/// An instruction position for a program point. +/// +/// We attach debug metadata to a *position* on an offset in the text +/// (code) section, either "post" or "pre". The "post" position +/// logically comes first, and is associated with the instruction that +/// ends at this offset (i.e., the previous instruction). The "pre" +/// position comes next, and is associated with the instruction that +/// begins at this offset (i.e., the next instruction). +/// +/// We make this distinction because metadata lookups sometimes occur +/// with a PC that is after the instruction (e.g., the return address +/// after a call instruction), and sometimes at the instruction (e.g., +/// a trapping PC address). The lookup context will know which one to +/// use -- e.g., when walking the stack, "pre" for a trapping PC and +/// "post" for every frame after that -- so we simply encode it as +/// part of the position and allow searching on it. +/// +/// The need for this distinction can be understood by way of an +/// example; say we have: +/// +/// ```plain +/// call ... +/// trapping_store ... +/// ``` +/// +/// where both instructions have debug metadata. We might look up the +/// PC of `trapping_store` once as we walk the stack from within the +/// call (we will get this PC because it is the return address) and +/// once when `trapping_store` itself traps; and we want different +/// metadata in each case. +/// +/// An alternative is to universally attach tags to the end offset of +/// an instruction, which allows us to handle return addresses +/// naturally but requires traps to adjust their PC. However, this +/// requires trap handlers to know the length of the trapping +/// instruction, which is not always easy -- in the most general case, +/// on variable-length instruction sets, it requires a full +/// instruction decoder. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum FrameInstPos { + /// The "post" position at an offset attaches to the instruction + /// that ends at this offset, i.e., came previously. + Post, + /// The "pre" position at an offset attaches to the instruction + /// that begins at this offset, i.e., comes next. + Pre, +} + +impl FrameInstPos { + pub(crate) fn encode(pc: u32, pos: FrameInstPos) -> u32 { + let lsb = match pos { + Self::Post => 0, + Self::Pre => 1, + }; + debug_assert!(pc < 0x8000_0000); + (pc << 1) | lsb + } + pub(crate) fn decode(bits: u32) -> (u32, FrameInstPos) { + let pos = match bits & 1 { + 0 => Self::Post, + 1 => Self::Pre, + _ => unreachable!(), + }; + let pc = bits >> 1; + (pc, pos) + } +} + +/// An offset into the state slot. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub struct FrameStateSlotOffset(pub(crate) u32); +impl FrameStateSlotOffset { + #[cfg(feature = "compile")] + pub(crate) fn add(self, offset: u32) -> FrameStateSlotOffset { + FrameStateSlotOffset(self.0 + offset) + } + + /// Get the offset into the state stackslot, suitable for use in a + /// `stack_store`/`stack_load` instruction. + pub fn offset(self) -> i32 { + i32::try_from(self.0).unwrap() + } +} + +/// A type stored in a frame. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[allow(missing_docs, reason = "self-describing variants")] +pub enum FrameValType { + I32, + I64, + F32, + F64, + V128, + AnyRef, + FuncRef, + ExternRef, + ExnRef, + ContRef, +} + +impl FrameValType { + #[cfg(feature = "compile")] + pub(crate) fn storage_size(&self, pointer_size: u32) -> u32 { + match self { + FrameValType::I32 => 4, + FrameValType::I64 => 8, + FrameValType::F32 => 4, + FrameValType::F64 => 8, + FrameValType::V128 => 16, + FrameValType::AnyRef | FrameValType::ExternRef | FrameValType::ExnRef => 4, + FrameValType::FuncRef => pointer_size, + FrameValType::ContRef => 2 * pointer_size, + } + } +} + +impl From for u8 { + fn from(value: FrameValType) -> u8 { + match value { + FrameValType::I32 => 0, + FrameValType::I64 => 1, + FrameValType::F32 => 2, + FrameValType::F64 => 3, + FrameValType::V128 => 4, + FrameValType::AnyRef => 5, + FrameValType::FuncRef => 6, + FrameValType::ExternRef => 7, + FrameValType::ExnRef => 8, + FrameValType::ContRef => 9, + } + } +} + +impl TryFrom for FrameValType { + type Error = anyhow::Error; + fn try_from(value: u8) -> anyhow::Result { + match value { + 0 => Ok(Self::I32), + 1 => Ok(Self::I64), + 2 => Ok(Self::F32), + 3 => Ok(Self::F64), + 4 => Ok(Self::V128), + 5 => Ok(Self::AnyRef), + 6 => Ok(Self::FuncRef), + 7 => Ok(Self::ExternRef), + 8 => Ok(Self::ExnRef), + 9 => Ok(Self::ContRef), + _ => Err(anyhow::anyhow!("Invalid type")), + } + } +} + +/// Parser for a frame state slot descriptor. +/// +/// This provides the ability to extract offsets and types for locals +/// and for the stack given a stack shape. +pub struct FrameStateSlot<'a> { + func_key: FuncKey, + local_offsets: &'a [U32Bytes], + stack_shape_parents: &'a [U32Bytes], + stack_shape_offsets: &'a [U32Bytes], + local_types: &'a [u8], + stack_shape_types: &'a [u8], +} + +impl<'a> FrameStateSlot<'a> { + /// Parse a slot descriptor. + /// + /// This parses the descriptor bytes as provided by + /// [`FrameTable::frame_descriptor`]. + pub fn parse(descriptor: &'a [u8]) -> anyhow::Result> { + let mut data = Bytes(descriptor); + let func_key_namespace = data + .read::>() + .map_err(|_| anyhow::anyhow!("Unable to read func key namespace"))? + .get(LittleEndian); + let func_key_index = data + .read::>() + .map_err(|_| anyhow::anyhow!("Unable to read func key index"))? + .get(LittleEndian); + let func_key = FuncKey::from_raw_parts(func_key_namespace, func_key_index); + + let num_locals = data + .read::>() + .map_err(|_| anyhow::anyhow!("Unable to read num_locals"))? + .get(LittleEndian); + let num_locals = usize::try_from(num_locals)?; + let num_stack_shapes = data + .read::>() + .map_err(|_| anyhow::anyhow!("Unable to read num_stack_shapes"))? + .get(LittleEndian); + let num_stack_shapes = usize::try_from(num_stack_shapes)?; + + let (local_offsets, data) = + object::slice_from_bytes::>(data.0, num_locals) + .map_err(|_| anyhow::anyhow!("Unable to read local_offsets slice"))?; + let (stack_shape_parents, data) = + object::slice_from_bytes::>(data, num_stack_shapes) + .map_err(|_| anyhow::anyhow!("Unable to read stack_shape_parents slice"))?; + let (stack_shape_offsets, data) = + object::slice_from_bytes::>(data, num_stack_shapes) + .map_err(|_| anyhow::anyhow!("Unable to read stack_shape_offsets slice"))?; + let (local_types, data) = data + .split_at_checked(num_locals) + .ok_or_else(|| anyhow::anyhow!("Unable to read local_types slice"))?; + let (stack_shape_types, _) = data + .split_at_checked(num_stack_shapes) + .ok_or_else(|| anyhow::anyhow!("Unable to read stack_shape_types slice"))?; + + Ok(FrameStateSlot { + func_key, + local_offsets, + stack_shape_parents, + stack_shape_offsets, + local_types, + stack_shape_types, + }) + } + + /// Get the FuncKey for the function that produced this frame + /// slot. + pub fn func_key(&self) -> FuncKey { + self.func_key + } + + /// Get the local offsets and types. + pub fn locals(&self) -> impl Iterator { + (0..self.num_locals()).map(|i| self.local(i).unwrap()) + } + + /// Get the type and offset for a given local. + pub fn local(&self, index: usize) -> Option<(FrameStateSlotOffset, FrameValType)> { + let offset = FrameStateSlotOffset(self.local_offsets.get(index)?.get(LittleEndian)); + let ty = FrameValType::try_from(*self.local_types.get(index)?).expect("Invalid type"); + Some((offset, ty)) + } + + /// Get the number of locals in the frame. + pub fn num_locals(&self) -> usize { + self.local_offsets.len() + } + + /// Get the offsets and types for operand stack values, from top + /// of stack (most recently pushed) down. + pub fn stack( + &self, + shape: FrameStackShape, + ) -> impl Iterator { + fn unpack_option_shape(shape: FrameStackShape) -> Option { + if shape.0 == u32::MAX { + None + } else { + Some(shape) + } + } + + let mut shape = unpack_option_shape(shape); + core::iter::from_fn(move || { + shape.map(|s| { + let parent = FrameStackShape(self.stack_shape_parents[s.index()].get(LittleEndian)); + let parent = unpack_option_shape(parent); + let offset = + FrameStateSlotOffset(self.stack_shape_offsets[s.index()].get(LittleEndian)); + let ty = FrameValType::try_from(self.stack_shape_types[s.index()]) + .expect("Invalid type"); + shape = parent; + (offset, ty) + }) + }) + } +} diff --git a/crates/environ/src/key.rs b/crates/environ/src/key.rs index b05f6b3fed92..f09d108d6e7d 100644 --- a/crates/environ/src/key.rs +++ b/crates/environ/src/key.rs @@ -392,6 +392,25 @@ impl FuncKey { } } + /// Create a key from a raw packed `u64` representation. + /// + /// Should only be given a value produced by `into_raw_u64()`. + /// + /// Panics when given an invalid value. + pub fn from_raw_u64(value: u64) -> Self { + let hi = u32::try_from(value >> 32).unwrap(); + let lo = u32::try_from(value & 0xffff_ffff).unwrap(); + FuncKey::from_raw_parts(hi, lo) + } + + /// Produce a packed `u64` representation of this key. + /// + /// May be used with `from_raw_64()` to reconstruct this key. + pub fn into_raw_u64(&self) -> u64 { + let (hi, lo) = self.into_raw_parts(); + (u64::from(hi) << 32) | u64::from(lo) + } + /// Unwrap a `FuncKey::DefinedWasmFunction` or else panic. pub fn unwrap_defined_wasm_function(self) -> (StaticModuleIndex, DefinedFuncIndex) { match self { diff --git a/crates/environ/src/lib.rs b/crates/environ/src/lib.rs index 19bff1c498dc..d78640d13710 100644 --- a/crates/environ/src/lib.rs +++ b/crates/environ/src/lib.rs @@ -19,6 +19,7 @@ extern crate alloc; pub mod prelude; mod address_map; +mod frame_table; #[macro_use] mod builtin; mod demangling; @@ -45,6 +46,7 @@ pub use crate::address_map::*; pub use crate::builtin::*; pub use crate::demangling::*; pub use crate::error::*; +pub use crate::frame_table::*; pub use crate::gc::*; pub use crate::hostcall::*; pub use crate::key::*; diff --git a/crates/environ/src/obj.rs b/crates/environ/src/obj.rs index a846d667c863..9a3eb6e06a15 100644 --- a/crates/environ/src/obj.rs +++ b/crates/environ/src/obj.rs @@ -109,6 +109,17 @@ pub const ELF_WASMTIME_TRAPS: &str = ".wasmtime.traps"; /// code offsets are relative to the start of the text segment. pub const ELF_WASMTIME_EXCEPTIONS: &str = ".wasmtime.exceptions"; +/// A custom binary-encoded section of the wasmtime compilation +/// artifacts which encodes frame tables. +/// +/// This section is used at runtime to allow debug APIs to decode Wasm +/// VM-level state from state stack slots. +/// +/// This section's format is defined by the +/// [`wasmtime_environ::FrameTableBuilder`] data structure. Its code +/// offsets are relative to the start of the text segment. +pub const ELF_WASMTIME_FRAMES: &str = ".wasmtime.frames"; + /// A custom section which consists of just 1 byte which is either 0 or 1 as to /// whether BTI is enabled. pub const ELF_WASM_BTI: &str = ".wasmtime.bti"; diff --git a/crates/environ/src/tunables.rs b/crates/environ/src/tunables.rs index 55b2bdad8f7c..f4b4d4e5c328 100644 --- a/crates/environ/src/tunables.rs +++ b/crates/environ/src/tunables.rs @@ -136,6 +136,10 @@ define_tunables! { /// The general size threshold for the sum of the caller's and callee's /// sizes, past which we will generally not inline calls anymore. pub inlining_sum_size_threshold: u32, + + /// Whether we are emitting debug instrumentation for precise + /// Wasm state. + pub debug_instrumentation: bool, } pub struct ConfigTunables { @@ -210,6 +214,7 @@ impl Tunables { inlining_intra_module: IntraModuleInlining::WhenUsingGc, inlining_small_callee_size: 50, inlining_sum_size_threshold: 2000, + debug_instrumentation: false, } } diff --git a/crates/wasmtime/Cargo.toml b/crates/wasmtime/Cargo.toml index 81ccc0e5083a..cb180e96e1b6 100644 --- a/crates/wasmtime/Cargo.toml +++ b/crates/wasmtime/Cargo.toml @@ -408,3 +408,6 @@ component-model-async-bytes = [ "component-model-async", "dep:bytes", ] + +# Enables support for guest debugging. +debug = ['runtime'] diff --git a/crates/wasmtime/src/compile.rs b/crates/wasmtime/src/compile.rs index 5d210bd58c4d..1a2796394b08 100644 --- a/crates/wasmtime/src/compile.rs +++ b/crates/wasmtime/src/compile.rs @@ -890,7 +890,7 @@ impl UnlinkedCompileOutputs<'_> { needs_gc_heap |= output.function.needs_gc_heap; let index = compiled_funcs.len(); - compiled_funcs.push((output.symbol, output.function.code)); + compiled_funcs.push((output.symbol, output.key, output.function.code)); if output.start_srcloc != FilePos::none() { indices @@ -913,9 +913,9 @@ impl UnlinkedCompileOutputs<'_> { struct PreLinkOutput { /// Whether or not any of these functions require a GC heap needs_gc_heap: bool, - /// The flattened list of (symbol name, compiled function) pairs, as they - /// will be laid out in the object file. - compiled_funcs: Vec<(String, Box)>, + /// The flattened list of (symbol name, FuncKey, compiled + /// function) pairs, as they will be laid out in the object file. + compiled_funcs: Vec<(String, FuncKey, Box)>, /// The `FunctionIndices` mapping our function keys to indices in that flat /// list. indices: FunctionIndices, @@ -937,7 +937,7 @@ impl FunctionIndices { self, mut obj: object::write::Object<'static>, engine: &'a Engine, - compiled_funcs: Vec<(String, Box)>, + compiled_funcs: Vec<(String, FuncKey, Box)>, translations: PrimaryMap>, dwarf_package_bytes: Option<&[u8]>, ) -> Result<(wasmtime_environ::ObjectBuilder<'a>, Artifacts)> { @@ -966,7 +966,7 @@ impl FunctionIndices { &|module, func| { let i = self.indices[&FuncKey::DefinedWasmFunction(module, func)]; let (symbol, _) = symbol_ids_and_locs[i]; - let (_, compiled_func) = &compiled_funcs[i]; + let (_, _, compiled_func) = &compiled_funcs[i]; (symbol, &**compiled_func) }, dwarf_package_bytes, diff --git a/crates/wasmtime/src/config.rs b/crates/wasmtime/src/config.rs index 4b313dfa5000..96d422aa6222 100644 --- a/crates/wasmtime/src/config.rs +++ b/crates/wasmtime/src/config.rs @@ -431,8 +431,9 @@ impl Config { self } - /// Configures whether DWARF debug information will be emitted during - /// compilation. + /// Configures whether DWARF debug information will be emitted + /// during compilation for a native debugger on the Wasmtime + /// process to consume. /// /// Note that the `debug-builtins` compile-time Cargo feature must also be /// enabled for native debuggers such as GDB or LLDB to be able to debug @@ -440,11 +441,32 @@ impl Config { /// /// By default this option is `false`. /// **Note** Enabling this option is not compatible with the Winch compiler. - pub fn debug_info(&mut self, enable: bool) -> &mut Self { + pub fn native_debug_info(&mut self, enable: bool) -> &mut Self { self.tunables.generate_native_debuginfo = Some(enable); self } + /// Configures whether compiled code will be instrumented to + /// provide precise debug state at the Wasm VM level. + /// + /// Without this enabled, debugger-visible state is "best-effort": + /// we may be able to recover some Wasm locals or operand stack + /// values, but it is not guaranteed, even when optimizations are + /// disabled. + /// + /// When this is enabled, additional instrumentation is inserted + /// that directly tracks the Wasm VM state at every step. This has + /// some performance impact, but allows perfect debugging + /// fidelity. + /// + /// ***Note*** Enabling this option is not compatible with the + /// Winch compiler. + #[cfg(feature = "debug")] + pub fn debug_instrumentation(&mut self, enable: bool) -> &mut Self { + self.tunables.debug_instrumentation = Some(enable); + self + } + /// Configures whether [`WasmBacktrace`] will be present in the context of /// errors returned from Wasmtime. /// @@ -2306,6 +2328,10 @@ impl Config { None }; + if !cfg!(feature = "debug") && tunables.debug_instrumentation { + bail!("debug instrumentation support was disabled at compile time"); + } + Ok((tunables, features)) } diff --git a/crates/wasmtime/src/engine/serialization.rs b/crates/wasmtime/src/engine/serialization.rs index c38d26671b8d..3e9506b017ef 100644 --- a/crates/wasmtime/src/engine/serialization.rs +++ b/crates/wasmtime/src/engine/serialization.rs @@ -278,6 +278,7 @@ impl Metadata<'_> { memory_reservation, memory_guard_size, generate_native_debuginfo, + debug_instrumentation, parse_wasm_debuginfo, consume_fuel, epoch_interruption, @@ -322,6 +323,11 @@ impl Metadata<'_> { other.generate_native_debuginfo, "debug information support", )?; + Self::check_bool( + debug_instrumentation, + other.debug_instrumentation, + "debug instrumentation", + )?; Self::check_bool( parse_wasm_debuginfo, other.parse_wasm_debuginfo, @@ -702,7 +708,7 @@ Caused by: assert_eq!(cache_config.cache_misses(), 1); let mut cfg = Config::new(); - cfg.debug_info(true) + cfg.native_debug_info(true) .cache(Some(Cache::from_file(Some(&config_path))?)); let engine = Engine::new(&cfg)?; let cache_config = engine diff --git a/crates/wasmtime/src/runtime.rs b/crates/wasmtime/src/runtime.rs index 8ff64707e810..a0ff5e0ec65e 100644 --- a/crates/wasmtime/src/runtime.rs +++ b/crates/wasmtime/src/runtime.rs @@ -31,7 +31,7 @@ pub(crate) mod func; pub(crate) mod code; pub(crate) mod code_memory; -#[cfg(feature = "debug-builtins")] +#[cfg(feature = "debug")] pub(crate) mod debug; #[cfg(feature = "gc")] pub(crate) mod exception; @@ -45,6 +45,8 @@ pub(crate) mod limits; pub(crate) mod linker; pub(crate) mod memory; pub(crate) mod module; +#[cfg(feature = "debug-builtins")] +pub(crate) mod native_debug; pub(crate) mod resources; pub(crate) mod store; pub(crate) mod trampoline; @@ -74,6 +76,8 @@ cfg_if::cfg_if! { } pub use code_memory::CodeMemory; +#[cfg(feature = "debug")] +pub use debug::*; #[cfg(feature = "gc")] pub use exception::*; pub use externals::*; diff --git a/crates/wasmtime/src/runtime/code_memory.rs b/crates/wasmtime/src/runtime/code_memory.rs index acb05e81e630..6777790b183f 100644 --- a/crates/wasmtime/src/runtime/code_memory.rs +++ b/crates/wasmtime/src/runtime/code_memory.rs @@ -36,6 +36,7 @@ pub struct CodeMemory { address_map_data: Range, stack_map_data: Range, exception_data: Range, + frame_tables_data: Range, func_name_data: Range, info_data: Range, wasm_dwarf: Range, @@ -122,6 +123,7 @@ impl CodeMemory { let mut has_native_debug_info = false; let mut trap_data = 0..0; let mut exception_data = 0..0; + let mut frame_tables_data = 0..0; let mut wasm_data = 0..0; let mut address_map_data = 0..0; let mut stack_map_data = 0..0; @@ -172,6 +174,7 @@ impl CodeMemory { obj::ELF_WASMTIME_STACK_MAP => stack_map_data = range, obj::ELF_WASMTIME_TRAPS => trap_data = range, obj::ELF_WASMTIME_EXCEPTIONS => exception_data = range, + obj::ELF_WASMTIME_FRAMES => frame_tables_data = range, obj::ELF_NAME_DATA => func_name_data = range, obj::ELF_WASMTIME_INFO => info_data = range, obj::ELF_WASMTIME_DWARF => wasm_dwarf = range, @@ -216,6 +219,7 @@ impl CodeMemory { address_map_data, stack_map_data, exception_data, + frame_tables_data, func_name_data, wasm_dwarf, info_data, @@ -277,6 +281,12 @@ impl CodeMemory { &self.mmap[self.exception_data.clone()] } + /// Returns the encoded frame-tables section to pass to + /// `wasmtime_environ::FrameTable::parse`. + pub fn frame_tables(&self) -> &[u8] { + &self.mmap[self.frame_tables_data.clone()] + } + /// Returns the contents of the `ELF_WASMTIME_INFO` section, or an empty /// slice if it wasn't found. #[inline] @@ -420,7 +430,7 @@ impl CodeMemory { // and anything else necessary that is done in "create_gdbjit_image" right now. let image = self.mmap().to_vec(); let text: &[u8] = self.text(); - let bytes = crate::debug::create_gdbjit_image(image, (text.as_ptr(), text.len()))?; + let bytes = crate::native_debug::create_gdbjit_image(image, (text.as_ptr(), text.len()))?; let reg = crate::runtime::vm::GdbJitImageRegistration::register(bytes); self.debug_registration = Some(reg); Ok(()) diff --git a/crates/wasmtime/src/runtime/debug.rs b/crates/wasmtime/src/runtime/debug.rs index 1fa4d430b88d..54f8bcd525c7 100644 --- a/crates/wasmtime/src/runtime/debug.rs +++ b/crates/wasmtime/src/runtime/debug.rs @@ -1,172 +1,363 @@ -use crate::prelude::*; -use core::mem::size_of; -use object::elf::*; -use object::endian::{BigEndian, Endian, Endianness, LittleEndian}; -use object::read::elf::{FileHeader, SectionHeader}; -use object::{ - File, NativeEndian as NE, Object, ObjectSection, ObjectSymbol, RelocationEncoding, - RelocationKind, RelocationTarget, U64Bytes, +//! Debugging API. + +use crate::{ + AnyRef, ExnRef, ExternRef, Func, Instance, Module, Val, ValType, + store::{AutoAssertNoGc, StoreOpaque}, + vm::{CurrentActivationBacktrace, VMContext}, +}; +use alloc::vec::Vec; +use core::{ffi::c_void, ptr::NonNull}; +use wasmtime_environ::{ + DefinedFuncIndex, FrameInstPos, FrameStackShape, FrameStateSlot, FrameStateSlotOffset, + FrameTableDescriptorIndex, FrameValType, FuncKey, }; -use wasmtime_environ::obj; +use wasmtime_unwinder::Frame; -pub(crate) fn create_gdbjit_image( - mut bytes: Vec, - code_region: (*const u8, usize), -) -> Result, Error> { - let e = ensure_supported_elf_format(&bytes)?; +impl StoreOpaque { + /// Provide an object that captures Wasm stack state, including + /// Wasm VM-level values (locals and operand stack). + /// + /// This object views all activations for the current store that + /// are on the stack. An activation is a contiguous sequence of + /// Wasm frames (called functions) that were called from host code + /// and called back out to host code. If there are activations + /// from multiple stores on the stack, for example if Wasm code in + /// one store calls out to host code which invokes another Wasm + /// function in another store, then the other stores are "opaque" + /// to our view here in the same way that host code is. + /// + /// Returns `None` if debug instrumentation is not enabled for + /// the engine containing this store. + pub fn stack_values(&mut self) -> Option> { + if !self.engine().tunables().debug_instrumentation { + return None; + } - // patch relocs - relocate_dwarf_sections(&mut bytes, code_region)?; + let iter = unsafe { CurrentActivationBacktrace::new(self) }; + Some(StackView { + iter, + is_trapping_frame: false, + frames: vec![], + }) + } +} - // elf is still missing details... - match e { - Endianness::Little => { - convert_object_elf_to_loadable_file::(&mut bytes, code_region) - } - Endianness::Big => { - convert_object_elf_to_loadable_file::(&mut bytes, code_region) +/// A view of values in active Wasm stack frames. +/// +/// See the documentation on `Store::stack_value` for more information +/// about which frames this view will show. +pub struct StackView<'a> { + /// Iterator over frames. + /// + /// This iterator owns the store while the view exists (accessible + /// as `iter.store`). + iter: CurrentActivationBacktrace<'a>, + + /// Is the next frame to be visited by the iterator a trapping + /// frame? + /// + /// This alters how we interpret `pc`: for a trap, we look at the + /// instruction that *starts* at `pc`, while for all frames + /// further up the stack (i.e., at a callsite), we look at teh + /// instruction that *ends* at `pc`. + is_trapping_frame: bool, + + /// Virtual frame queue: decoded from `iter`, not yet + /// yielded. Innermost frame on top (last). + /// + /// This is only non-empty when there is more than one virtual + /// frame in a physical frame (i.e., for inlining); thus, its size + /// is bounded by our inlining depth. + frames: Vec, +} + +impl<'a> Iterator for StackView<'a> { + type Item = FrameView; + fn next(&mut self) -> Option { + // If there are no virtual frames to yield, take and decode + // the next physical frame. + // + // Note that `if` rather than `while` here, and the assert + // that we get some virtual frames back, enforce the invariant + // that each physical frame decodes to at least one virtual + // frame (i.e., there are no physical frames for interstitial + // functions or other things that we completely ignore). If + // this ever changes, we can remove the assert and convert + // this to a loop that polls until it finds virtual frames. + if self.frames.is_empty() { + let next_frame = self.iter.next()?; + self.frames = VirtualFrame::decode(self.iter.store, next_frame, self.is_trapping_frame); + debug_assert!(!self.frames.is_empty()); + self.is_trapping_frame = false; } + + self.frames.pop().map(move |vf| FrameView::new(vf)) } +} - Ok(bytes) +/// Internal data pre-computed for one stack frame. +/// +/// This combines physical frame info (pc, fp) with the module this PC +/// maps to (yielding a frame table) and one frame as produced by the +/// progpoint lookup (Wasm PC, frame descriptor index, stack shape). +struct VirtualFrame { + /// The frame pointer. + fp: usize, + /// The resolved module handle for the physical PC. + /// + /// The module for each inlined frame within the physical frame is + /// resolved from the vmctx reachable for each such frame; this + /// module isused only for looking up the frame table. + module: Module, + /// The Wasm PC for this frame. + wasm_pc: u32, + /// The frame descriptor for this frame. + frame_descriptor: FrameTableDescriptorIndex, + /// The stack shape for this frame. + stack_shape: FrameStackShape, } -fn relocate_dwarf_sections(bytes: &mut [u8], code_region: (*const u8, usize)) -> Result<(), Error> { - let mut relocations = Vec::new(); - let obj = File::parse(&bytes[..]).map_err(obj::ObjectCrateErrorWrapper)?; - for section in obj.sections() { - let section_start = match section.file_range() { - Some((start, _)) => start, - None => continue, +impl VirtualFrame { + /// Return virtual frames corresponding to a physical frame, from + /// outermost to innermost. + fn decode(store: &StoreOpaque, frame: Frame, is_trapping_frame: bool) -> Vec { + let module = store + .modules() + .lookup_module_by_pc(frame.pc()) + .expect("Wasm frame PC does not correspond to a module"); + let base = module.code_object().code_memory().text().as_ptr() as usize; + let pc = frame.pc().wrapping_sub(base); + let table = module.frame_table(); + let pc = u32::try_from(pc).expect("PC offset too large"); + let pos = if is_trapping_frame { + FrameInstPos::Pre + } else { + FrameInstPos::Post }; - for (off, r) in section.relocations() { - if r.kind() != RelocationKind::Absolute - || r.encoding() != RelocationEncoding::Generic - || r.size() != 64 - { - continue; - } - - let sym = match r.target() { - RelocationTarget::Symbol(index) => match obj.symbol_by_index(index) { - Ok(sym) => sym, - Err(_) => continue, - }, - _ => continue, - }; - relocations.push(( - section_start + off, - (code_region.0 as u64) - .wrapping_add(sym.address()) - .wrapping_add(r.addend() as u64), - )); + let Some(program_points) = table.find_program_point(pc, pos) else { + return vec![]; + }; + + program_points + .map(|(wasm_pc, frame_descriptor, stack_shape)| VirtualFrame { + fp: frame.fp(), + module: module.clone(), + wasm_pc, + frame_descriptor, + stack_shape, + }) + .collect() + } +} + +/// A view of a frame that can decode values in that frame. +pub struct FrameView { + slot_addr: usize, + func_key: FuncKey, + wasm_pc: u32, + /// Shape of locals in this frame. + /// + /// We need to store this locally because `FrameView` cannot + /// borrow the store: it needs a mut borrow, and an iterator + /// cannot yield the same mut borrow multiple times because it + /// cannot control the lifetime of the values it yields (the + /// signature of `next()` does not bound the return value to the + /// `&mut self` arg). + locals: Vec<(FrameStateSlotOffset, FrameValType)>, + /// Shape of the stack slots at this program point in this frame. + /// + /// In addition to the borrowing-related reason above, we also + /// materialize this because we want to provide O(1) access to the + /// stack by depth, and the frame slot descriptor stores info in a + /// linked-list (actually DAG, with dedup'ing) way. + stack: Vec<(FrameStateSlotOffset, FrameValType)>, +} + +impl FrameView { + fn new(frame: VirtualFrame) -> Self { + let frame_table = frame.module.frame_table(); + // Parse the frame descriptor. + let (data, slot_to_fp_offset) = frame_table + .frame_descriptor(frame.frame_descriptor) + .unwrap(); + let frame_state_slot = FrameStateSlot::parse(data).unwrap(); + let slot_addr = frame + .fp + .wrapping_sub(usize::try_from(slot_to_fp_offset).unwrap()); + + // Materialize the stack shape so we have O(1) access to its + // elements, and so we don't need to keep the borrow to the + // module alive. + let mut stack = frame_state_slot + .stack(frame.stack_shape) + .collect::>(); + stack.reverse(); // Put top-of-stack last. + + // Materialize the local offsets/types so we don't need to + // keep the borrow to the module alive. + let locals = frame_state_slot.locals().collect::>(); + + FrameView { + slot_addr, + func_key: frame_state_slot.func_key(), + wasm_pc: frame.wasm_pc, + stack, + locals, } } - for (offset, value) in relocations { - let (loc, _) = offset - .try_into() - .ok() - .and_then(|offset| object::from_bytes_mut::>(&mut bytes[offset..]).ok()) - .ok_or_else(|| anyhow!("invalid dwarf relocations"))?; - loc.set(NE, value); + fn raw_instance<'a>(&self, _store: &'a mut StoreOpaque) -> &'a crate::vm::Instance { + // Read out the vmctx slot. + // SAFETY: vmctx is always at offset 0 in the slot. + let vmctx: *mut VMContext = unsafe { *(self.slot_addr as *mut _) }; + let vmctx = NonNull::new(vmctx).expect("null vmctx in debug state slot"); + // SAFETY: the stored vmctx value is a valid instance in this + // store; we only visit frames from this store in the + // backtrace. + let instance = unsafe { crate::vm::Instance::from_vmctx(vmctx) }; + // SAFETY: the instance pointer read above is valid. + unsafe { instance.as_ref() } + } + + /// Get the instance associated with this frame. + pub fn instance(&self, view: &mut StackView<'_>) -> Instance { + let instance = self.raw_instance(view.iter.store); + Instance::from_wasmtime(instance.id(), view.iter.store) + } + + /// Get the module associated with this frame, if any (i.e., not a + /// container instance for a host-created entity). + pub fn module<'a>(&self, view: &'a mut StackView<'_>) -> Option<&'a Module> { + let instance = self.raw_instance(view.iter.store); + instance.runtime_module() + } + + /// Get the raw function index associated with this frame, and the + /// PC as an offset within its code section, if it is a Wasm + /// function directly from the given `Module` (rather than a + /// trampoline). + pub fn wasm_function_index_and_pc( + &self, + view: &mut StackView<'_>, + ) -> Option<(DefinedFuncIndex, u32)> { + let FuncKey::DefinedWasmFunction(module, func) = self.func_key else { + return None; + }; + debug_assert_eq!( + module, + self.module(view) + .expect("module should be defined if this is a defined function") + .env_module() + .module_index + ); + Some((func, self.wasm_pc)) + } + + /// Get the number of locals in this frame. + pub fn num_locals(&self) -> usize { + self.locals.len() + } + + /// Get the depth of the operand stack in this frame. + pub fn num_stacks(&self) -> usize { + self.stack.len() + } + + /// Get the type and value of the given local in this frame. + /// + /// # Panics + /// + /// Panics if the index is out-of-range (greater than + /// `num_locals()`). + pub fn local(&self, view: &mut StackView<'_>, index: usize) -> (ValType, Val) { + let (offset, ty) = self.locals[index]; + // SAFETY: compiler produced metadata to describe this local + // slot and stored a value of the correct type into it. + unsafe { read_value(view.iter.store, self.slot_addr, offset, ty) } + } + + /// Get the type and value of the given operand-stack value in + /// this frame. + /// + /// Index 0 corresponds to the bottom-of-stack, and higher indices + /// from there are more recently pushed values. In other words, + /// index order reads the Wasm virtual machine's abstract stack + /// state left-to-right. + pub fn stack(&self, view: &mut StackView<'_>, index: usize) -> (ValType, Val) { + let (offset, ty) = self.stack[index]; + // SAFETY: compiler produced metadata to describe this + // operand-stack slot and stored a value of the correct type + // into it. + unsafe { read_value(view.iter.store, self.slot_addr, offset, ty) } } - Ok(()) } -fn ensure_supported_elf_format(bytes: &[u8]) -> Result { - use object::elf::*; - use object::read::elf::*; +/// Read the value at the given offset. +/// +/// # Safety +/// +/// The `offset` and `ty` must correspond to a valid value written +/// to the frame by generated code of the correct type. This will +/// be the case if this information comes from the frame tables +/// (as long as the frontend that generates the tables and +/// instrumentation is correct, and as long as the tables are +/// preserved through serialization). +unsafe fn read_value( + store: &mut StoreOpaque, + slot_base: usize, + offset: FrameStateSlotOffset, + ty: FrameValType, +) -> (ValType, Val) { + let address = slot_base.wrapping_add(usize::try_from(offset.offset()).unwrap()); - let kind = match object::FileKind::parse(bytes) { - Ok(file) => file, - Err(err) => { - bail!("Failed to parse file: {}", err); + // SAFETY: each case reads a value from memory that should be + // valid according to our safety condition. + match ty { + FrameValType::I32 => { + let value = unsafe { *(address as *const i32) }; + (ValType::I32, Val::I32(value)) } - }; - let header = match kind { - object::FileKind::Elf64 => match object::elf::FileHeader64::::parse(bytes) { - Ok(header) => header, - Err(err) => { - bail!("Unsupported ELF file: {}", err); - } - }, - _ => { - bail!("only 64-bit ELF files currently supported") + FrameValType::I64 => { + let value = unsafe { *(address as *const i64) }; + (ValType::I64, Val::I64(value)) } - }; - let e = header.endian().unwrap(); - - match header.e_machine.get(e) { - EM_AARCH64 => (), - EM_X86_64 => (), - EM_S390 => (), - EM_RISCV => (), - machine => { - bail!("Unsupported ELF target machine: {:x}", machine); + FrameValType::F32 => { + let value = unsafe { *(address as *const u32) }; + (ValType::F32, Val::F32(value)) } - } - ensure!( - header.e_phoff.get(e) == 0 && header.e_phnum.get(e) == 0, - "program header table is empty" - ); - let e_shentsize = header.e_shentsize.get(e); - let req_shentsize = match e { - Endianness::Little => size_of::>(), - Endianness::Big => size_of::>(), - }; - ensure!(e_shentsize as usize == req_shentsize, "size of sh"); - Ok(e) -} - -fn convert_object_elf_to_loadable_file( - bytes: &mut Vec, - code_region: (*const u8, usize), -) { - let e = E::default(); - - let header = FileHeader64::::parse(&bytes[..]).unwrap(); - let sections = header.sections(e, &bytes[..]).unwrap(); - let text_range = match sections.section_by_name(e, b".text") { - Some((i, text)) => { - let range = text.file_range(e); - let e_shoff = usize::try_from(header.e_shoff.get(e)).unwrap(); - let off = e_shoff + i.0 * header.e_shentsize.get(e) as usize; - - let section: &mut SectionHeader64 = - object::from_bytes_mut(&mut bytes[off..]).unwrap().0; - // Patch vaddr, and save file location and its size. - section.sh_addr.set(e, code_region.0 as u64); - range + FrameValType::F64 => { + let value = unsafe { *(address as *const u64) }; + (ValType::F64, Val::F64(value)) + } + FrameValType::V128 => { + let value = unsafe { *(address as *const u128) }; + (ValType::V128, Val::V128(value.into())) + } + FrameValType::AnyRef => { + let mut nogc = AutoAssertNoGc::new(store); + let value = unsafe { *(address as *const u32) }; + let value = AnyRef::_from_raw(&mut nogc, value); + (ValType::ANYREF, Val::AnyRef(value)) + } + FrameValType::ExnRef => { + let mut nogc = AutoAssertNoGc::new(store); + let value = unsafe { *(address as *const u32) }; + let value = ExnRef::_from_raw(&mut nogc, value); + (ValType::EXNREF, Val::ExnRef(value)) + } + FrameValType::ExternRef => { + let mut nogc = AutoAssertNoGc::new(store); + let value = unsafe { *(address as *const u32) }; + let value = ExternRef::_from_raw(&mut nogc, value); + (ValType::EXTERNREF, Val::ExternRef(value)) + } + FrameValType::FuncRef => { + let value = unsafe { *(address as *const *mut c_void) }; + let value = unsafe { Func::_from_raw(store, value) }; + (ValType::EXTERNREF, Val::FuncRef(value)) + } + FrameValType::ContRef => { + unimplemented!("contref values are not implemented in the host API yet") } - None => None, - }; - - // LLDB wants segment with virtual address set, placing them at the end of ELF. - let ph_off = bytes.len(); - let e_phentsize = size_of::>(); - let e_phnum = 1; - bytes.resize(ph_off + e_phentsize * e_phnum, 0); - if let Some((sh_offset, sh_size)) = text_range { - let (v_offset, size) = code_region; - let program: &mut ProgramHeader64 = - object::from_bytes_mut(&mut bytes[ph_off..]).unwrap().0; - program.p_type.set(e, PT_LOAD); - program.p_offset.set(e, sh_offset); - program.p_vaddr.set(e, v_offset as u64); - program.p_paddr.set(e, v_offset as u64); - program.p_filesz.set(e, sh_size); - program.p_memsz.set(e, size as u64); - } else { - unreachable!(); } - - // It is somewhat loadable ELF file at this moment. - let header: &mut FileHeader64 = object::from_bytes_mut(bytes).unwrap().0; - header.e_type.set(e, ET_DYN); - header.e_phoff.set(e, ph_off as u64); - header - .e_phentsize - .set(e, u16::try_from(e_phentsize).unwrap()); - header.e_phnum.set(e, u16::try_from(e_phnum).unwrap()); } diff --git a/crates/wasmtime/src/runtime/func.rs b/crates/wasmtime/src/runtime/func.rs index 4f801593ea6b..a3a476423a45 100644 --- a/crates/wasmtime/src/runtime/func.rs +++ b/crates/wasmtime/src/runtime/func.rs @@ -2228,6 +2228,15 @@ impl Caller<'_, T> { pub fn fuel_async_yield_interval(&mut self, interval: Option) -> Result<()> { self.store.fuel_async_yield_interval(interval) } + + /// Provide an object that captures Wasm stack state, including + /// Wasm VM-level values (locals and operand stack). + /// + /// See ['Store::stack_values`] for more details. + #[cfg(feature = "debug")] + pub fn stack_values(&mut self) -> Option> { + self.store.stack_values() + } } impl AsContext for Caller<'_, T> { diff --git a/crates/wasmtime/src/runtime/module.rs b/crates/wasmtime/src/runtime/module.rs index 35b721405892..d044a5577d77 100644 --- a/crates/wasmtime/src/runtime/module.rs +++ b/crates/wasmtime/src/runtime/module.rs @@ -18,6 +18,8 @@ use core::ptr::NonNull; #[cfg(feature = "std")] use std::{fs::File, path::Path}; use wasmparser::{Parser, ValidPayload, Validator}; +#[cfg(feature = "debug")] +use wasmtime_environ::FrameTable; use wasmtime_environ::{ CompiledFunctionsTable, CompiledModuleInfo, EntityIndex, HostPtr, ModuleTypes, ObjectKind, TypeTrace, VMOffsets, VMSharedTypeIndex, @@ -1142,6 +1144,14 @@ impl Module { ExceptionTable::parse(self.inner.code.code_memory().exception_tables()) .expect("Exception tables were validated on module load") } + + /// Obtain a frame-table parser on this module's frame state slot + /// (debug instrumentation) metadata. + #[cfg(feature = "debug")] + pub(crate) fn frame_table<'a>(&'a self) -> FrameTable<'a> { + FrameTable::parse(self.inner.code.code_memory().frame_tables()) + .expect("Frame tables were validated on module load") + } } /// Describes a function for a given module. diff --git a/crates/wasmtime/src/runtime/module/registry.rs b/crates/wasmtime/src/runtime/module/registry.rs index 2167b2244699..a7836b4e3b8b 100644 --- a/crates/wasmtime/src/runtime/module/registry.rs +++ b/crates/wasmtime/src/runtime/module/registry.rs @@ -70,7 +70,7 @@ impl ModuleRegistry { } /// Fetches a registered module given a program counter value. - #[cfg(feature = "gc")] + #[cfg(any(feature = "gc", feature = "debug"))] pub fn lookup_module_by_pc(&self, pc: usize) -> Option<&Module> { let (module, _) = self.module_and_offset(pc)?; Some(module) diff --git a/crates/wasmtime/src/runtime/native_debug.rs b/crates/wasmtime/src/runtime/native_debug.rs new file mode 100644 index 000000000000..1fa4d430b88d --- /dev/null +++ b/crates/wasmtime/src/runtime/native_debug.rs @@ -0,0 +1,172 @@ +use crate::prelude::*; +use core::mem::size_of; +use object::elf::*; +use object::endian::{BigEndian, Endian, Endianness, LittleEndian}; +use object::read::elf::{FileHeader, SectionHeader}; +use object::{ + File, NativeEndian as NE, Object, ObjectSection, ObjectSymbol, RelocationEncoding, + RelocationKind, RelocationTarget, U64Bytes, +}; +use wasmtime_environ::obj; + +pub(crate) fn create_gdbjit_image( + mut bytes: Vec, + code_region: (*const u8, usize), +) -> Result, Error> { + let e = ensure_supported_elf_format(&bytes)?; + + // patch relocs + relocate_dwarf_sections(&mut bytes, code_region)?; + + // elf is still missing details... + match e { + Endianness::Little => { + convert_object_elf_to_loadable_file::(&mut bytes, code_region) + } + Endianness::Big => { + convert_object_elf_to_loadable_file::(&mut bytes, code_region) + } + } + + Ok(bytes) +} + +fn relocate_dwarf_sections(bytes: &mut [u8], code_region: (*const u8, usize)) -> Result<(), Error> { + let mut relocations = Vec::new(); + let obj = File::parse(&bytes[..]).map_err(obj::ObjectCrateErrorWrapper)?; + for section in obj.sections() { + let section_start = match section.file_range() { + Some((start, _)) => start, + None => continue, + }; + for (off, r) in section.relocations() { + if r.kind() != RelocationKind::Absolute + || r.encoding() != RelocationEncoding::Generic + || r.size() != 64 + { + continue; + } + + let sym = match r.target() { + RelocationTarget::Symbol(index) => match obj.symbol_by_index(index) { + Ok(sym) => sym, + Err(_) => continue, + }, + _ => continue, + }; + relocations.push(( + section_start + off, + (code_region.0 as u64) + .wrapping_add(sym.address()) + .wrapping_add(r.addend() as u64), + )); + } + } + + for (offset, value) in relocations { + let (loc, _) = offset + .try_into() + .ok() + .and_then(|offset| object::from_bytes_mut::>(&mut bytes[offset..]).ok()) + .ok_or_else(|| anyhow!("invalid dwarf relocations"))?; + loc.set(NE, value); + } + Ok(()) +} + +fn ensure_supported_elf_format(bytes: &[u8]) -> Result { + use object::elf::*; + use object::read::elf::*; + + let kind = match object::FileKind::parse(bytes) { + Ok(file) => file, + Err(err) => { + bail!("Failed to parse file: {}", err); + } + }; + let header = match kind { + object::FileKind::Elf64 => match object::elf::FileHeader64::::parse(bytes) { + Ok(header) => header, + Err(err) => { + bail!("Unsupported ELF file: {}", err); + } + }, + _ => { + bail!("only 64-bit ELF files currently supported") + } + }; + let e = header.endian().unwrap(); + + match header.e_machine.get(e) { + EM_AARCH64 => (), + EM_X86_64 => (), + EM_S390 => (), + EM_RISCV => (), + machine => { + bail!("Unsupported ELF target machine: {:x}", machine); + } + } + ensure!( + header.e_phoff.get(e) == 0 && header.e_phnum.get(e) == 0, + "program header table is empty" + ); + let e_shentsize = header.e_shentsize.get(e); + let req_shentsize = match e { + Endianness::Little => size_of::>(), + Endianness::Big => size_of::>(), + }; + ensure!(e_shentsize as usize == req_shentsize, "size of sh"); + Ok(e) +} + +fn convert_object_elf_to_loadable_file( + bytes: &mut Vec, + code_region: (*const u8, usize), +) { + let e = E::default(); + + let header = FileHeader64::::parse(&bytes[..]).unwrap(); + let sections = header.sections(e, &bytes[..]).unwrap(); + let text_range = match sections.section_by_name(e, b".text") { + Some((i, text)) => { + let range = text.file_range(e); + let e_shoff = usize::try_from(header.e_shoff.get(e)).unwrap(); + let off = e_shoff + i.0 * header.e_shentsize.get(e) as usize; + + let section: &mut SectionHeader64 = + object::from_bytes_mut(&mut bytes[off..]).unwrap().0; + // Patch vaddr, and save file location and its size. + section.sh_addr.set(e, code_region.0 as u64); + range + } + None => None, + }; + + // LLDB wants segment with virtual address set, placing them at the end of ELF. + let ph_off = bytes.len(); + let e_phentsize = size_of::>(); + let e_phnum = 1; + bytes.resize(ph_off + e_phentsize * e_phnum, 0); + if let Some((sh_offset, sh_size)) = text_range { + let (v_offset, size) = code_region; + let program: &mut ProgramHeader64 = + object::from_bytes_mut(&mut bytes[ph_off..]).unwrap().0; + program.p_type.set(e, PT_LOAD); + program.p_offset.set(e, sh_offset); + program.p_vaddr.set(e, v_offset as u64); + program.p_paddr.set(e, v_offset as u64); + program.p_filesz.set(e, sh_size); + program.p_memsz.set(e, size as u64); + } else { + unreachable!(); + } + + // It is somewhat loadable ELF file at this moment. + let header: &mut FileHeader64 = object::from_bytes_mut(bytes).unwrap().0; + header.e_type.set(e, ET_DYN); + header.e_phoff.set(e, ph_off as u64); + header + .e_phentsize + .set(e, u16::try_from(e_phentsize).unwrap()); + header.e_phnum.set(e, u16::try_from(e_phnum).unwrap()); +} diff --git a/crates/wasmtime/src/runtime/store.rs b/crates/wasmtime/src/runtime/store.rs index ca54b0f084d7..4bad060699a1 100644 --- a/crates/wasmtime/src/runtime/store.rs +++ b/crates/wasmtime/src/runtime/store.rs @@ -1167,6 +1167,25 @@ impl Store { pub fn has_pending_exception(&self) -> bool { self.inner.pending_exception.is_some() } + + /// Provide an object that captures Wasm stack state, including + /// Wasm VM-level values (locals and operand stack). + /// + /// This object views all activations for the current store that + /// are on the stack. An activation is a contiguous sequence of + /// Wasm frames (called functions) that were called from host code + /// and called back out to host code. If there are activations + /// from multiple stores on the stack, for example if Wasm code in + /// one store calls out to host code which invokes another Wasm + /// function in another store, then the other stores are "opaque" + /// to our view here in the same way that host code is. + /// + /// Returns `None` if debug instrumentation is not enabled for + /// the engine containing this store. + #[cfg(feature = "debug")] + pub fn stack_values(&mut self) -> Option> { + self.inner.stack_values() + } } impl<'a, T> StoreContext<'a, T> { @@ -1290,6 +1309,15 @@ impl<'a, T> StoreContextMut<'a, T> { pub fn has_pending_exception(&self) -> bool { self.0.inner.pending_exception.is_some() } + + /// Provide an object that captures Wasm stack state, including + /// Wasm VM-level values (locals and operand stack). + /// + /// See ['Store::stack_values`] for more details. + #[cfg(feature = "debug")] + pub fn stack_values(&mut self) -> Option> { + self.0.inner.stack_values() + } } impl StoreInner { diff --git a/crates/wasmtime/src/runtime/vm/instance.rs b/crates/wasmtime/src/runtime/vm/instance.rs index 41965ac15c77..f59be04881e4 100644 --- a/crates/wasmtime/src/runtime/vm/instance.rs +++ b/crates/wasmtime/src/runtime/vm/instance.rs @@ -346,7 +346,7 @@ impl Instance { self.runtime_info.env_module() } - #[cfg(feature = "gc")] + #[cfg(any(feature = "gc", feature = "debug"))] pub(crate) fn runtime_module(&self) -> Option<&crate::Module> { match &self.runtime_info { ModuleRuntimeInfo::Module(m) => Some(m), diff --git a/crates/wasmtime/src/runtime/vm/traphandlers.rs b/crates/wasmtime/src/runtime/vm/traphandlers.rs index 1f6486bbc64d..939c2784d6f6 100644 --- a/crates/wasmtime/src/runtime/vm/traphandlers.rs +++ b/crates/wasmtime/src/runtime/vm/traphandlers.rs @@ -29,6 +29,8 @@ use core::ptr::{self, NonNull}; use wasmtime_unwinder::Handler; pub use self::backtrace::Backtrace; +#[cfg(feature = "debug")] +pub(crate) use self::backtrace::CurrentActivationBacktrace; #[cfg(feature = "gc")] pub use wasmtime_unwinder::Frame; diff --git a/crates/wasmtime/src/runtime/vm/traphandlers/backtrace.rs b/crates/wasmtime/src/runtime/vm/traphandlers/backtrace.rs index 055c4f2aff69..79873c4fcaf7 100644 --- a/crates/wasmtime/src/runtime/vm/traphandlers/backtrace.rs +++ b/crates/wasmtime/src/runtime/vm/traphandlers/backtrace.rs @@ -136,6 +136,19 @@ impl Backtrace { /// If Wasm hit a trap, and we calling this from the trap handler, then the /// Wasm exit trampoline didn't run, and we use the provided PC and FP /// instead of looking them up in `VMStoreContext`. + /// + /// We define "current Wasm stack" here as "all activations + /// associated with the given store". That is: if we have a stack like + /// + /// ```plain + /// host --> (Wasm functions in store A) --> host --> (Wasm functions in store B) --> host + /// --> (Wasm functions in store A) --> host --> call `trace_with_trap_state` with store A + /// ``` + /// + /// then we will see the first and third Wasm activations (those + /// associated with store A), but not that with store B. In + /// essence, activations from another store might as well be some + /// other opaque host code; we don't know anything about it. pub(crate) unsafe fn trace_with_trap_state( vm_store_context: *const VMStoreContext, unwind: &dyn Unwind, @@ -312,3 +325,59 @@ impl Backtrace { self.0.iter() } } + +/// An iterator over one Wasm activation. +#[cfg(feature = "debug")] +pub(crate) struct CurrentActivationBacktrace<'a> { + pub(crate) store: &'a mut StoreOpaque, + inner: Box>, +} + +#[cfg(feature = "debug")] +impl<'a> CurrentActivationBacktrace<'a> { + /// Return an iterator over the most recent Wasm activation. + /// + /// The iterator captures the store with a mutable borrow, and + /// then yields it back at each frame. This ensures that the stack + /// remains live while still providing a mutable store that may be + /// needed to access items in the frame (e.g., to create new roots + /// when reading out GC refs). + /// + /// This serves as an alternative to `Backtrace::trace()` and + /// friends: it allows external iteration (and e.g. lazily walking + /// through frames in a stack) rather than visiting via a closure. + /// + /// # Safety + /// + /// Although the iterator yields a mutable store back at each + /// iteration, this *must not* be used to mutate the stack + /// activation itself that this iterator is visiting. While the + /// `store` technically owns the stack in question, the only way + /// to do this with the current API would be to return back into + /// the Wasm activation. As long as this iterator is held and used + /// while within host code called from that activation (which will + /// ordinarily be ensured if the `store`'s lifetime came from the + /// host entry point) then everything will be sound. + pub(crate) unsafe fn new(store: &'a mut StoreOpaque) -> CurrentActivationBacktrace<'a> { + // Get the initial exit FP, exit PC, and entry FP. + let vm_store_context = store.vm_store_context(); + let exit_pc = unsafe { *(*vm_store_context).last_wasm_exit_pc.get() }; + let exit_fp = unsafe { (*vm_store_context).last_wasm_exit_fp() }; + let trampoline_fp = unsafe { *(*vm_store_context).last_wasm_entry_fp.get() }; + let unwind = store.unwinder(); + // Establish the iterator. + let inner = Box::new(unsafe { + wasmtime_unwinder::frame_iterator(unwind, exit_pc, exit_fp, trampoline_fp) + }); + + CurrentActivationBacktrace { store, inner } + } +} + +#[cfg(feature = "debug")] +impl<'a> Iterator for CurrentActivationBacktrace<'a> { + type Item = Frame; + fn next(&mut self) -> Option { + self.inner.next() + } +} diff --git a/crates/winch/src/builder.rs b/crates/winch/src/builder.rs index a7ddfbb3bab2..0c6b2b5e7132 100644 --- a/crates/winch/src/builder.rs +++ b/crates/winch/src/builder.rs @@ -67,6 +67,10 @@ impl CompilerBuilder for Builder { bail!("Winch does not currently support generating native debug information"); } + if tunables.debug_instrumentation { + bail!("Winch does not currently support debug instrumentation"); + } + self.tunables = Some(tunables.clone()); self.cranelift.set_tunables(tunables)?; Ok(()) diff --git a/crates/winch/src/compiler.rs b/crates/winch/src/compiler.rs index 66279e0f2bd7..13a5e80d988f 100644 --- a/crates/winch/src/compiler.rs +++ b/crates/winch/src/compiler.rs @@ -189,7 +189,7 @@ impl wasmtime_environ::Compiler for Compiler { fn append_code( &self, obj: &mut Object<'static>, - funcs: &[(String, Box)], + funcs: &[(String, FuncKey, Box)], resolve_reloc: &dyn Fn(usize, wasmtime_environ::FuncKey) -> usize, ) -> Result> { self.trampolines.append_code(obj, funcs, resolve_reloc) @@ -334,7 +334,7 @@ impl wasmtime_environ::Compiler for NoInlineCompiler { fn append_code( &self, obj: &mut Object<'static>, - funcs: &[(String, Box)], + funcs: &[(String, FuncKey, Box)], resolve_reloc: &dyn Fn(usize, FuncKey) -> usize, ) -> Result> { self.0.append_code(obj, funcs, resolve_reloc) diff --git a/examples/fib-debug/main.rs b/examples/fib-debug/main.rs index 55a6e84cafe2..b1025c0bc97d 100644 --- a/examples/fib-debug/main.rs +++ b/examples/fib-debug/main.rs @@ -16,7 +16,7 @@ fn main() -> Result<()> { // debugged in GDB. let engine = Engine::new( Config::new() - .debug_info(true) + .native_debug_info(true) .cranelift_opt_level(OptLevel::None), )?; let mut store = Store::new(&engine, ()); diff --git a/src/commands/objdump.rs b/src/commands/objdump.rs index bf0469331c2f..60e595b395ab 100644 --- a/src/commands/objdump.rs +++ b/src/commands/objdump.rs @@ -14,7 +14,10 @@ use std::iter::{self, Peekable}; use std::path::{Path, PathBuf}; use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; use wasmtime::Engine; -use wasmtime_environ::{FilePos, StackMap, Trap, obj}; +use wasmtime_environ::{ + FilePos, FrameInstPos, FrameStackShape, FrameStateSlot, FrameTable, FrameTableDescriptorIndex, + StackMap, Trap, obj, +}; use wasmtime_unwinder::{ExceptionHandler, ExceptionTable}; /// A helper utility in wasmtime to explore the compiled object file format of @@ -70,6 +73,10 @@ pub struct ObjdumpCommand { /// Whether or not to show information about exception tables. #[arg(long, require_equals = true, value_name = "true|false")] exception_tables: Option>, + + /// Whether or not to show information about frame tables. + #[arg(long, require_equals = true, value_name = "true|false")] + frame_tables: Option>, } fn optional_flag_with_default(flag: Option>, default: bool) -> bool { @@ -97,6 +104,10 @@ impl ObjdumpCommand { optional_flag_with_default(self.exception_tables, true) } + fn frame_tables(&self) -> bool { + optional_flag_with_default(self.frame_tables, true) + } + /// Executes the command. pub fn execute(self) -> Result<()> { // Setup stdout handling color options. Also build some variables used @@ -150,6 +161,18 @@ impl ObjdumpCommand { .and_then(|bytes| ExceptionTable::parse(bytes).ok()) .map(|table| table.into_iter()) .map(|i| (Box::new(i) as Box>).peekable()), + frame_tables: elf + .section_by_name(obj::ELF_WASMTIME_FRAMES) + .and_then(|section| section.data().ok()) + .and_then(|bytes| FrameTable::parse(bytes).ok()) + .map(|table| table.into_program_points()) + .map(|i| (Box::new(i) as Box>).peekable()), + + frame_table_descriptors: elf + .section_by_name(obj::ELF_WASMTIME_FRAMES) + .and_then(|section| section.data().ok()) + .and_then(|bytes| FrameTable::parse(bytes).ok()), + objdump: &self, }; @@ -528,6 +551,21 @@ struct Decorator<'a> { stack_maps: Option)> + 'a>>>, exception_tables: Option, Vec)> + 'a>>>, + frame_tables: Option< + Peekable< + Box< + dyn Iterator< + Item = ( + u32, + FrameInstPos, + Vec<(u32, FrameTableDescriptorIndex, FrameStackShape)>, + ), + > + 'a, + >, + >, + >, + + frame_table_descriptors: Option>, } impl Decorator<'_> { @@ -536,6 +574,7 @@ impl Decorator<'_> { self.traps(address, post_list); self.stack_maps(address, post_list); self.exception_table(address, pre_list); + self.frame_table(address, pre_list, post_list); } fn addrmap(&mut self, address: u64, list: &mut Vec) { @@ -625,4 +664,69 @@ impl Decorator<'_> { } } } + + fn frame_table( + &mut self, + address: u64, + pre_list: &mut Vec, + post_list: &mut Vec, + ) { + if !self.objdump.frame_tables() { + return; + } + let (Some(frame_table_iter), Some(frame_tables)) = + (&mut self.frame_tables, &self.frame_table_descriptors) + else { + return; + }; + + while let Some((addr, pos, frames)) = + frame_table_iter.next_if(|(addr, _, _)| u64::from(*addr) <= address) + { + if u64::from(addr) != address { + continue; + } + let list = match pos { + // N.B.: the "post" position means that we are + // attached to the end of the previous instruction + // (its "post"); which means that from this + // instruction's PoV, we print before the instruction + // (the "pre list"). And vice versa for the "pre" + // position. Hence the reversal here. + FrameInstPos::Post => &mut *pre_list, + FrameInstPos::Pre => &mut *post_list, + }; + let pos = match pos { + FrameInstPos::Post => "after previous inst", + FrameInstPos::Pre => "before next inst", + }; + for (wasm_pc, frame_descriptor, stack_shape) in frames { + let (frame_descriptor_data, offset) = + frame_tables.frame_descriptor(frame_descriptor).unwrap(); + let frame_descriptor = FrameStateSlot::parse(frame_descriptor_data).unwrap(); + + let local_shape = Self::describe_local_shape(&frame_descriptor); + let stack_shape = Self::describe_stack_shape(&frame_descriptor, stack_shape); + let func_key = frame_descriptor.func_key(); + list.push(format!("debug frame state ({pos}): func key {func_key:?}, wasm PC {wasm_pc}, slot at FP-0x{offset:x}, locals {local_shape}, stack {stack_shape}")); + } + } + } + + fn describe_local_shape(desc: &FrameStateSlot<'_>) -> String { + let mut parts = vec![]; + for (offset, ty) in desc.locals() { + parts.push(format!("{ty:?} @ slot+0x{:x}", offset.offset())); + } + parts.join(", ") + } + + fn describe_stack_shape(desc: &FrameStateSlot<'_>, shape: FrameStackShape) -> String { + let mut parts = vec![]; + for (offset, ty) in desc.stack(shape) { + parts.push(format!("{ty:?} @ slot+0x{:x}", offset.offset())); + } + parts.reverse(); + parts.join(", ") + } } diff --git a/tests/all/debug.rs b/tests/all/debug.rs new file mode 100644 index 000000000000..0e1065716427 --- /dev/null +++ b/tests/all/debug.rs @@ -0,0 +1,126 @@ +//! Tests for instrumentation-based debugging. + +use wasmtime::{Caller, Config, Engine, Extern, Func, Instance, Module, Store, ValType}; + +fn test_stack_values) + Send + Sync + 'static>( + wat: &str, + c: C, + f: F, +) -> anyhow::Result<()> { + let mut config = Config::default(); + config.debug_instrumentation(true); + config.wasm_exceptions(true); + c(&mut config); + let engine = Engine::new(&config)?; + let module = Module::new(&engine, wat)?; + + let mut store = Store::new(&engine, ()); + let func = Func::wrap(&mut store, move |caller: Caller<'_, ()>| { + f(caller); + }); + let instance = Instance::new(&mut store, &module, &[Extern::Func(func)])?; + let mut results = []; + instance + .get_func(&mut store, "main") + .unwrap() + .call(&mut store, &[], &mut results)?; + + Ok(()) +} + +#[test] +fn stack_values_two_frames() -> anyhow::Result<()> { + let _ = env_logger::try_init(); + + for inlining in [false, true] { + test_stack_values( + r#" + (module + (import "" "host" (func)) + (func (export "main") + i32.const 1 + i32.const 2 + call 2 + drop) + (func (param i32 i32) (result i32) + local.get 0 + local.get 1 + call 0 + i32.add)) + "#, + |config| { + config.compiler_inlining(inlining); + if inlining { + unsafe { + config.cranelift_flag_set("wasmtime_inlining_intra_module", "true"); + } + } + }, + |mut caller: Caller<'_, ()>| { + let mut stack = caller.stack_values().unwrap(); + let frame = stack.next().unwrap(); + assert_eq!( + frame + .wasm_function_index_and_pc(&mut stack) + .unwrap() + .0 + .as_u32(), + 1 + ); + assert_eq!(frame.wasm_function_index_and_pc(&mut stack).unwrap().1, 65); + + assert_eq!(frame.num_locals(), 2); + assert_eq!(frame.num_stacks(), 2); + assert!(matches!(frame.local(&mut stack, 0).0, ValType::I32)); + assert!(matches!(frame.local(&mut stack, 1).0, ValType::I32)); + assert_eq!(frame.local(&mut stack, 0).1.unwrap_i32(), 1); + assert_eq!(frame.local(&mut stack, 1).1.unwrap_i32(), 2); + assert!(matches!(frame.stack(&mut stack, 0).0, ValType::I32)); + assert!(matches!(frame.stack(&mut stack, 1).0, ValType::I32)); + assert_eq!(frame.stack(&mut stack, 0).1.unwrap_i32(), 1); + assert_eq!(frame.stack(&mut stack, 1).1.unwrap_i32(), 2); + + let frame = stack.next().unwrap(); + assert_eq!( + frame + .wasm_function_index_and_pc(&mut stack) + .unwrap() + .0 + .as_u32(), + 0 + ); + assert_eq!(frame.wasm_function_index_and_pc(&mut stack).unwrap().1, 55); + + assert!(stack.next().is_none()); + }, + )?; + } + Ok(()) +} + +#[test] +fn stack_values_exceptions() -> anyhow::Result<()> { + test_stack_values( + r#" + (module + (tag $t (param i32)) + (import "" "host" (func)) + (func (export "main") + (block $b (result i32) + (try_table (catch $t $b) + (throw $t (i32.const 42))) + i32.const 0) + (call 0) + (drop))) + "#, + |_config| {}, + |mut caller: Caller<'_, ()>| { + let mut stack = caller.stack_values().unwrap(); + let frame = stack.next().unwrap(); + assert_eq!(frame.num_stacks(), 1); + assert!(matches!(frame.stack(&mut stack, 0).0, ValType::I32)); + assert_eq!(frame.stack(&mut stack, 0).1.unwrap_i32(), 42); + assert!(stack.next().is_none()); + }, + ) +} diff --git a/tests/all/main.rs b/tests/all/main.rs index c91f7bb78ca8..5e3568fda87f 100644 --- a/tests/all/main.rs +++ b/tests/all/main.rs @@ -35,6 +35,7 @@ mod memory_creator; mod module; mod module_serialize; mod name; +mod native_debug; mod noextern; mod piped_tests; mod pooling_allocator; diff --git a/tests/all/debug/dump.rs b/tests/all/native_debug/dump.rs similarity index 100% rename from tests/all/debug/dump.rs rename to tests/all/native_debug/dump.rs diff --git a/tests/all/debug/gdb.rs b/tests/all/native_debug/gdb.rs similarity index 98% rename from tests/all/debug/gdb.rs rename to tests/all/native_debug/gdb.rs index 9a124de51cea..c5e104ee11c6 100644 --- a/tests/all/debug/gdb.rs +++ b/tests/all/native_debug/gdb.rs @@ -55,7 +55,7 @@ fn test_debug_dwarf_gdb() -> Result<()> { let output = gdb_with_script( &[ "-Ccache=n", - "-Ddebug-info", + "-Dnative-debug-info", "-Oopt-level=0", "--invoke", "fib", diff --git a/tests/all/debug/lldb.rs b/tests/all/native_debug/lldb.rs similarity index 93% rename from tests/all/debug/lldb.rs rename to tests/all/native_debug/lldb.rs index d64387fe2688..9968a0c113a3 100644 --- a/tests/all/debug/lldb.rs +++ b/tests/all/native_debug/lldb.rs @@ -85,7 +85,7 @@ pub fn dwarf_fib_wasm() -> Result<()> { let output = lldb_with_script( &[ "-Ccache=n", - "-Ddebug-info", + "-Dnative-debug-info", "-Oopt-level=0", "--invoke", "fib", @@ -123,7 +123,7 @@ pub fn dwarf_fib_wasm_dwarf5() -> Result<()> { let output = lldb_with_script( &[ "-Ccache=n", - "-Ddebug-info", + "-Dnative-debug-info", "-Oopt-level=0", "--invoke", "fib", @@ -161,7 +161,7 @@ pub fn dwarf_fib_wasm_split4() -> Result<()> { let output = lldb_with_script( &[ "-Ccache=n", - "-Ddebug-info", + "-Dnative-debug-info", "-Oopt-level=0", "--invoke", "fib", @@ -197,7 +197,12 @@ check: exited with status #[ignore] pub fn dwarf_generic() -> Result<()> { let output = lldb_with_script( - &["-Ccache=n", "-Ddebug-info", "-Oopt-level=0", DWARF_GENERIC], + &[ + "-Ccache=n", + "-Dnative-debug-info", + "-Oopt-level=0", + DWARF_GENERIC, + ], r#"br set -n debug_break -C up r p __vmctx->set() @@ -256,7 +261,7 @@ pub fn dwarf_codegen_optimized() -> Result<()> { let output = lldb_with_script( &[ "-Ccache=n", - "-Ddebug-info", + "-Dnative-debug-info", "-Oopt-level=2", DWARF_CODEGEN_OPTIMIZED, ], @@ -291,7 +296,7 @@ pub fn dwarf_codegen_optimized_wasm_optimized() -> Result<()> { let output = lldb_with_script( &[ "-Ccache=n", - "-Ddebug-info", + "-Dnative-debug-info", "-Oopt-level=2", DWARF_CODEGEN_OPTIMIZED_WASM_OPTIMIZED, ], @@ -327,7 +332,7 @@ pub fn dwarf_fraction_norm() -> Result<()> { &[ "-Ccache=n", "-Oopt-level=0", - "-Ddebug-info", + "-Dnative-debug-info", DWARF_FRACTION_NORM, ], r#"b dwarf_fraction_norm.cc:26 @@ -357,7 +362,7 @@ pub fn dwarf_two_removed_branches() -> Result<()> { &[ "-Ccache=n", "-Oopt-level=0", - "-Ddebug-info", + "-Dnative-debug-info", DWARF_TWO_REMOVED_BRANCHES, ], r#"r"#, @@ -380,7 +385,7 @@ pub fn dwarf_spilled_frame_base() -> Result<()> { &[ "-Ccache=n", "-Oopt-level=0", - "-Ddebug-info", + "-Dnative-debug-info", DWARF_SPILLED_FRAME_BASE, ], r#"b dwarf_spilled_frame_base.c:13 @@ -421,7 +426,12 @@ check: exited with status #[ignore] pub fn dwarf_fission() -> Result<()> { let output = lldb_with_script( - &["-Ccache=n", "-Ddebug-info", "-Oopt-level=0", DWARF_FISSION], + &[ + "-Ccache=n", + "-Dnative-debug-info", + "-Oopt-level=0", + DWARF_FISSION, + ], r#"breakpoint set --file dwarf_fission.c --line 8 r fr v @@ -449,7 +459,7 @@ check: exited with status = 0 fn test_dwarf_simple(wasm: &str, extra_args: &[&str]) -> Result<()> { println!("testing {wasm:?}"); - let mut args = vec!["-Ccache=n", "-Oopt-level=0", "-Ddebug-info"]; + let mut args = vec!["-Ccache=n", "-Oopt-level=0", "-Dnative-debug-info"]; args.extend(extra_args); args.push(wasm); let output = lldb_with_script( @@ -498,7 +508,7 @@ fn dwarf_simple() -> Result<()> { fn dwarf_imported_memory() -> Result<()> { test_dwarf_simple( DWARF_IMPORTED_MEMORY, - &["--preload=env=./tests/all/debug/satisfy_memory_import.wat"], + &["--preload=env=./tests/all/native_debug/satisfy_memory_import.wat"], ) } @@ -517,7 +527,7 @@ fn dwarf_multiple_codegen_units() -> Result<()> { ] { println!("testing {wasm:?}"); let output = lldb_with_script( - &["-Ccache=n", "-Oopt-level=0", "-Ddebug-info", wasm], + &["-Ccache=n", "-Oopt-level=0", "-Dnative-debug-info", wasm], r#" breakpoint set --file dwarf_multiple_codegen_units.rs --line 3 breakpoint set --file dwarf_multiple_codegen_units.rs --line 10 diff --git a/tests/all/debug/mod.rs b/tests/all/native_debug/mod.rs similarity index 100% rename from tests/all/debug/mod.rs rename to tests/all/native_debug/mod.rs diff --git a/tests/all/debug/obj.rs b/tests/all/native_debug/obj.rs similarity index 95% rename from tests/all/debug/obj.rs rename to tests/all/native_debug/obj.rs index 4395da771b9d..feb993b99f72 100644 --- a/tests/all/debug/obj.rs +++ b/tests/all/native_debug/obj.rs @@ -12,7 +12,7 @@ pub fn compile_cranelift( output: impl AsRef, ) -> Result<()> { let mut config = Config::new(); - config.debug_info(true); + config.native_debug_info(true); if let Some(target) = target { config.target(&target.to_string())?; } diff --git a/tests/all/debug/satisfy_memory_import.wat b/tests/all/native_debug/satisfy_memory_import.wat similarity index 100% rename from tests/all/debug/satisfy_memory_import.wat rename to tests/all/native_debug/satisfy_memory_import.wat diff --git a/tests/all/debug/simulate.rs b/tests/all/native_debug/simulate.rs similarity index 100% rename from tests/all/debug/simulate.rs rename to tests/all/native_debug/simulate.rs diff --git a/tests/all/debug/translate.rs b/tests/all/native_debug/translate.rs similarity index 100% rename from tests/all/debug/translate.rs rename to tests/all/native_debug/translate.rs diff --git a/tests/all/pulley.rs b/tests/all/pulley.rs index 870d621c34b0..427adf77d06c 100644 --- a/tests/all/pulley.rs +++ b/tests/all/pulley.rs @@ -487,7 +487,7 @@ async fn pulley_provenance_test_async_components() -> Result<()> { #[cfg(not(miri))] fn enabling_debug_info_doesnt_break_anything() -> Result<()> { let mut config = pulley_config(); - config.debug_info(true); + config.native_debug_info(true); let engine = Engine::new(&config)?; assert!(Module::from_file(&engine, "./tests/all/cli_tests/greeter_command.wat").is_err()); Ok(()) diff --git a/tests/all/winch_engine_features.rs b/tests/all/winch_engine_features.rs index 27f3508cae29..954ce213a277 100644 --- a/tests/all/winch_engine_features.rs +++ b/tests/all/winch_engine_features.rs @@ -52,7 +52,7 @@ fn ensure_compatibility_between_winch_and_signals_based_traps(config: &mut Confi fn ensure_compatibility_between_winch_and_generate_native_debuginfo( config: &mut Config, ) -> Result<()> { - config.debug_info(true); + config.native_debug_info(true); let result = Engine::new(&config); match result { Ok(_) => { diff --git a/tests/disas/debug-exceptions.wat b/tests/disas/debug-exceptions.wat new file mode 100644 index 000000000000..2a90b5d81553 --- /dev/null +++ b/tests/disas/debug-exceptions.wat @@ -0,0 +1,93 @@ +;;! target = "aarch64" +;;! test = "compile" +;;! flags = ["-Wexceptions=yes", "-Wgc=yes", "-Ddebug-instrumentation=yes"] + +(module + (tag $t (param i32)) + (import "" "host" (func)) + (func (export "main") + (block $b (result i32) + (try_table (catch $t $b) + (drop (i32.const 42)) + (throw $t (i32.const 42))) + i32.const 0) + (call 0) + (drop))) +;; wasm[0]::function[1]: +;; stp x29, x30, [sp, #-0x10]! +;; mov x29, sp +;; ldur x16, [x2, #8] +;; ldur x16, [x16, #0x10] +;; add x16, x16, #0xc0 +;; cmp sp, x16 +;; b.lo #0x110 +;; 1c: stp x27, x28, [sp, #-0x10]! +;; stp x25, x26, [sp, #-0x10]! +;; stp x23, x24, [sp, #-0x10]! +;; stp x21, x22, [sp, #-0x10]! +;; stp x19, x20, [sp, #-0x10]! +;; stp d14, d15, [sp, #-0x10]! +;; stp d12, d13, [sp, #-0x10]! +;; stp d10, d11, [sp, #-0x10]! +;; stp d8, d9, [sp, #-0x10]! +;; sub sp, sp, #0x20 +;; stur x2, [sp] +;; stur x2, [sp, #0x10] +;; mov w27, #0x2a +;; ╰─╼ debug frame state (before next inst): func key DefinedWasmFunction(StaticModuleIndex(0), DefinedFuncIndex(0)), wasm PC 61, slot at FP-0xb0, locals , stack +;; stur w27, [sp, #8] +;; stur w27, [sp, #8] +;; ╰─╼ debug frame state (before next inst): func key DefinedWasmFunction(StaticModuleIndex(0), DefinedFuncIndex(0)), wasm PC 64, slot at FP-0xb0, locals , stack +;; ldur x2, [sp, #0x10] +;; ╰─╼ debug frame state (before next inst): func key DefinedWasmFunction(StaticModuleIndex(0), DefinedFuncIndex(0)), wasm PC 66, slot at FP-0xb0, locals , stack I32 @ slot+0x8 +;; bl #0x31c +;; 60: mov x21, x2 +;; mov w3, #0x4000000 +;; mov w4, #2 +;; mov w5, #0x28 +;; mov w6, #8 +;; ldur x2, [sp, #0x10] +;; bl #0x2a8 +;; 7c: ldur x11, [sp, #0x10] +;; ldr x0, [x11, #8] +;; ldr x5, [x0, #0x18] +;; add x0, x5, #0x20 +;; str w27, [x0, w2, uxtw] +;; add x3, x5, #0x18 +;; mov x4, x21 +;; str w4, [x3, w2, uxtw] +;; mov w3, #0 +;; add x4, x5, #0x1c +;; stur x5, [sp, #0x18] +;; str w3, [x4, w2, uxtw] +;; mov x3, x2 +;; ldur x2, [sp, #0x10] +;; bl #0x354 +;; ├─╼ exception frame offset: SP = FP - 0xb0 +;; ╰─╼ exception handler: tag=0, context at [SP+0x10], handler=0xbc +;; b8: .byte 0x1f, 0xc1, 0x00, 0x00 +;; ldur x5, [sp, #0x18] +;; add x4, x5, #0x20 +;; ldr w6, [x4, w0, uxtw] +;; stur w6, [sp, #8] +;; ldur x2, [sp, #0x10] +;; ╰─╼ debug frame state (before next inst): func key DefinedWasmFunction(StaticModuleIndex(0), DefinedFuncIndex(0)), wasm PC 72, slot at FP-0xb0, locals , stack I32 @ slot+0x8 +;; ldr x8, [x2, #0x30] +;; ldr x2, [x2, #0x40] +;; ldur x3, [sp, #0x10] +;; blr x8 +;; ╰─╼ debug frame state (after previous inst): func key DefinedWasmFunction(StaticModuleIndex(0), DefinedFuncIndex(0)), wasm PC 72, slot at FP-0xb0, locals , stack I32 @ slot+0x8 +;; e0: add sp, sp, #0x20 +;; ╰─╼ debug frame state (before next inst): func key DefinedWasmFunction(StaticModuleIndex(0), DefinedFuncIndex(0)), wasm PC 75, slot at FP-0xb0, locals , stack +;; ldp d8, d9, [sp], #0x10 +;; ldp d10, d11, [sp], #0x10 +;; ldp d12, d13, [sp], #0x10 +;; ldp d14, d15, [sp], #0x10 +;; ldp x19, x20, [sp], #0x10 +;; ldp x21, x22, [sp], #0x10 +;; ldp x23, x24, [sp], #0x10 +;; ldp x25, x26, [sp], #0x10 +;; ldp x27, x28, [sp], #0x10 +;; ldp x29, x30, [sp], #0x10 +;; ret +;; 110: .byte 0x1f, 0xc1, 0x00, 0x00