|
| 1 | +//! Synthetic 64-bit Wasm address space expected by the LLDB Wasm |
| 2 | +//! extensions. |
| 3 | +//! |
| 4 | +//! WebAssembly is natively *multi-memory* and *multi-address-space*: |
| 5 | +//! |
| 6 | +//! - It supports zero or more "linear memories", and they have no |
| 7 | +//! canonical mapping into a single global address space for |
| 8 | +//! pointers; rather, each load and store instruction names which |
| 9 | +//! memory it accesses statically. |
| 10 | +//! - It supports one or more "modules" containing first-class |
| 11 | +//! functions, and they have no canonical mapping into a single |
| 12 | +//! global code space; rather, control flow is structured, and calls |
| 13 | +//! between functions in different modules only occur via explicit |
| 14 | +//! strongly-typed function imports and exports. |
| 15 | +//! |
| 16 | +//! Wasm implementations typically represent these concepts directly |
| 17 | +//! rather than attempt to map to a more conventional ISA model of a |
| 18 | +//! single flat address space with machine code and data. However, the |
| 19 | +//! gdbstub protocol assumes the latter: all of its commands, such as |
| 20 | +//! memory reads/writes, breakpoint updates, and the like, use |
| 21 | +//! integers as pointers in a single address space. |
| 22 | +//! |
| 23 | +//! The LLDB Wasm extensions thus define a canonical mapping between |
| 24 | +//! the multi-address-space world and a flat 64-bit address space for |
| 25 | +//! the purposes of the protocol only. Note that this is 64-bit even |
| 26 | +//! when Wasm natively has 32-bit memory offsets (the "wasm32" |
| 27 | +//! architecture), because the definition adds additional information |
| 28 | +//! above the 32-bit offset. |
| 29 | +//! |
| 30 | +//! The [ProcessWasm.h] header file in the LLDB source contains |
| 31 | +//! definitions that are as close to documentation as we can find: see |
| 32 | +//! the `WasmAddressType` and `wasm_addr_t` definitions. |
| 33 | +//! |
| 34 | +//! An address consists of three parts: |
| 35 | +//! |
| 36 | +//! - The type: code or data. Wasm has separate "address spaces" for |
| 37 | +//! these, so they are mapped to different regions of the 64-bit |
| 38 | +//! synthetic space. |
| 39 | +//! |
| 40 | +//! Note that this implies that the original bytecode (the full |
| 41 | +//! image of the Wasm module, starting with its magic number) is |
| 42 | +//! present in this synthetic address space. An engine that |
| 43 | +//! implements debugging for Wasm should keep around the original |
| 44 | +//! bytecode, even if it does ahead-of-time compilation or other |
| 45 | +//! processing, so that the debugger can use it: LLDB will read the |
| 46 | +//! module bytecode from the synthetic address space, including its |
| 47 | +//! debug sections, rather than find the image elsewhere. |
| 48 | +//! |
| 49 | +//! - The module/memory index. The engine decides an arbitrary index |
| 50 | +//! ordering for all of the Wasm modules and Wasm linear memories |
| 51 | +//! present in a given execution. |
| 52 | +//! |
| 53 | +//! - The offset within that Wasm module bytecode or linear memory. |
| 54 | +//! |
| 55 | +//! [ProcessWasm.h]: https://github.com/llvm/llvm-project/blob/main/lldb/source/Plugins/Process/wasm/ProcessWasm.h |
| 56 | +
|
| 57 | +/// The type of an address in the synthetic address space used by the |
| 58 | +/// Wasm target. |
| 59 | +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] |
| 60 | +pub enum WasmAddrType { |
| 61 | + /// Address in a 32-bit linear memory. |
| 62 | + Memory, |
| 63 | + /// Address in a `.wasm` module image. |
| 64 | + /// |
| 65 | + /// Used both for memory-read commands to fetch the Wasm binary |
| 66 | + /// from the gdbstub host, and software-breakpoint commands. |
| 67 | + Object, |
| 68 | +} |
| 69 | + |
| 70 | +/// An address in the synthetic address space used by the Wasm target. |
| 71 | +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] |
| 72 | +pub struct WasmAddr(u64); |
| 73 | + |
| 74 | +impl WasmAddr { |
| 75 | + const TYPE_BITS: u32 = 2; |
| 76 | + const MODULE_BITS: u32 = 30; |
| 77 | + const OFFSET_BITS: u32 = 32; |
| 78 | + |
| 79 | + const MODULE_SHIFT: u32 = Self::OFFSET_BITS; |
| 80 | + const TYPE_SHIFT: u32 = Self::OFFSET_BITS + Self::MODULE_BITS; |
| 81 | + |
| 82 | + const TYPE_MASK: u64 = (1u64 << Self::TYPE_BITS) - 1; |
| 83 | + const MODULE_MASK: u64 = (1u64 << Self::MODULE_BITS) - 1; |
| 84 | + const OFFSET_MASK: u64 = (1u64 << Self::OFFSET_BITS) - 1; |
| 85 | + |
| 86 | + /// Construct a `WasmAddr` from a raw 64-bit encoded address. |
| 87 | + /// |
| 88 | + /// Returns `None` if the encoding is invalid. |
| 89 | + pub fn from_raw(raw: u64) -> Option<Self> { |
| 90 | + let type_bits = (raw >> Self::TYPE_SHIFT) & Self::TYPE_MASK; |
| 91 | + if type_bits > 1 { |
| 92 | + return None; |
| 93 | + } |
| 94 | + Some(WasmAddr(raw)) |
| 95 | + } |
| 96 | + |
| 97 | + /// Provide the raw 64-bit encoding of this `WasmAddr`. |
| 98 | + pub fn as_raw(self) -> u64 { |
| 99 | + self.0 |
| 100 | + } |
| 101 | + |
| 102 | + /// Construct a `WasmAddr` from its constituent parts. |
| 103 | + pub fn new(addr_type: WasmAddrType, module_index: u32, offset: u32) -> Self { |
| 104 | + // There are fewer than 32 bits in the encoding for the module |
| 105 | + // index. |
| 106 | + assert_eq!( |
| 107 | + module_index >> Self::MODULE_BITS, |
| 108 | + 0, |
| 109 | + "Out-of-bounds module index" |
| 110 | + ); |
| 111 | + let type_bits: u64 = match addr_type { |
| 112 | + WasmAddrType::Memory => 0, |
| 113 | + WasmAddrType::Object => 1, |
| 114 | + }; |
| 115 | + WasmAddr( |
| 116 | + (type_bits << Self::TYPE_SHIFT) |
| 117 | + | ((u64::from(module_index)) << Self::MODULE_SHIFT) |
| 118 | + | (u64::from(offset)), |
| 119 | + ) |
| 120 | + } |
| 121 | + |
| 122 | + /// Get the type of this address. |
| 123 | + pub fn addr_type(self) -> WasmAddrType { |
| 124 | + match (self.0 >> Self::TYPE_SHIFT) & Self::TYPE_MASK { |
| 125 | + 0 => WasmAddrType::Memory, |
| 126 | + 1 => WasmAddrType::Object, |
| 127 | + // We never set other type-bits and the raw bits are fully |
| 128 | + // encapsulated and checked in `from_raw`, so this is |
| 129 | + // unreachable. |
| 130 | + _ => unreachable!("WasmAddr: invalid type bits"), |
| 131 | + } |
| 132 | + } |
| 133 | + |
| 134 | + /// Get the index of the module or memory referenced by this |
| 135 | + /// address. |
| 136 | + pub fn module_index(self) -> u32 { |
| 137 | + ((self.0 >> Self::MODULE_SHIFT) & Self::MODULE_MASK) as u32 |
| 138 | + } |
| 139 | + |
| 140 | + /// Get the offset within the module or memory referenced by this |
| 141 | + /// address. |
| 142 | + pub fn offset(self) -> u32 { |
| 143 | + (self.0 & Self::OFFSET_MASK) as u32 |
| 144 | + } |
| 145 | +} |
| 146 | + |
| 147 | +impl core::fmt::Display for WasmAddr { |
| 148 | + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
| 149 | + let type_str = match self.addr_type() { |
| 150 | + WasmAddrType::Memory => "Memory", |
| 151 | + WasmAddrType::Object => "Object", |
| 152 | + }; |
| 153 | + write!( |
| 154 | + f, |
| 155 | + "{}(module={}, offset={:#x})", |
| 156 | + type_str, |
| 157 | + self.module_index(), |
| 158 | + self.offset() |
| 159 | + ) |
| 160 | + } |
| 161 | +} |
| 162 | + |
| 163 | +impl core::fmt::Debug for WasmAddr { |
| 164 | + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
| 165 | + write!(f, "WasmAddr({self})") |
| 166 | + } |
| 167 | +} |
0 commit comments