Skip to content

Commit 56f1981

Browse files
committed
gdbstub_arch: add a WebAssembly architecture.
Builds on #188, #189, #190: add definitions for the WebAssembly architecture to `gdbstub_arch`. This includes all definitions needed for the `Arch` trait, as well as utility code for encoding/decoding the synthetic address space used by the gdbstub definitions for this architecture. Tested with Wasmtime using a (soon to be upstreamed) guest-debugging facility.
1 parent 9c644ea commit 56f1981

7 files changed

Lines changed: 286 additions & 0 deletions

File tree

gdbstub_arch/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ pub mod mips;
5959
pub mod msp430;
6060
pub mod ppc;
6161
pub mod riscv;
62+
pub mod wasm;
6263
pub mod x86;
6364

6465
// used as part of intra-doc link

gdbstub_arch/src/wasm/addr.rs

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
//! Synthetic 64-bit Wasm address space expected by the LLDB Wasm
2+
//! extensions.
3+
//!
4+
//! WebAssembly is natively *multi-memory* and *multi-address-space*:
5+
//!
6+
//! - It supports zero or more "linear memories", and they have no
7+
//! canonical mapping into a single global address space for
8+
//! pointers; rather, each load and store instruction names which
9+
//! memory it accesses statically.
10+
//! - It supports one or more "modules" containing first-class
11+
//! functions, and they have no canonical mapping into a single
12+
//! global code space; rather, control flow is structured, and calls
13+
//! between functions in different modules only occur via explicit
14+
//! strongly-typed function imports and exports.
15+
//!
16+
//! Wasm implementations typically represent these concepts directly
17+
//! rather than attempt to map to a more conventional ISA model of a
18+
//! single flat address space with machine code and data. However, the
19+
//! gdbstub protocol assumes the latter: all of its commands, such as
20+
//! memory reads/writes, breakpoint updates, and the like, use
21+
//! integers as pointers in a single address space.
22+
//!
23+
//! The LLDB Wasm extensions thus define a canonical mapping between
24+
//! the multi-address-space world and a flat 64-bit address space for
25+
//! the purposes of the protocol only. Note that this is 64-bit even
26+
//! when Wasm natively has 32-bit memory offsets (the "wasm32"
27+
//! architecture), because the definition adds additional information
28+
//! above the 32-bit offset.
29+
//!
30+
//! The [ProcessWasm.h] header file in the LLDB source contains
31+
//! definitions that are as close to documentation as we can find: see
32+
//! the `WasmAddressType` and `wasm_addr_t` definitions.
33+
//!
34+
//! An address consists of three parts:
35+
//!
36+
//! - The type: code or data. Wasm has separate "address spaces" for
37+
//! these, so they are mapped to different regions of the 64-bit
38+
//! synthetic space.
39+
//!
40+
//! Note that this implies that the original bytecode (the full
41+
//! image of the Wasm module, starting with its magic number) is
42+
//! present in this synthetic address space. An engine that
43+
//! implements debugging for Wasm should keep around the original
44+
//! bytecode, even if it does ahead-of-time compilation or other
45+
//! processing, so that the debugger can use it: LLDB will read the
46+
//! module bytecode from the synthetic address space, including its
47+
//! debug sections, rather than find the image elsewhere.
48+
//!
49+
//! - The module/memory index. The engine decides an arbitrary index
50+
//! ordering for all of the Wasm modules and Wasm linear memories
51+
//! present in a given execution.
52+
//!
53+
//! - The offset within that Wasm module bytecode or linear memory.
54+
//!
55+
//! [ProcessWasm.h]: https://github.com/llvm/llvm-project/blob/main/lldb/source/Plugins/Process/wasm/ProcessWasm.h
56+
57+
/// The type of an address in the synthetic address space used by the
58+
/// Wasm target.
59+
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
60+
pub enum WasmAddrType {
61+
/// Address in a 32-bit linear memory.
62+
Memory,
63+
/// Address in a `.wasm` module image.
64+
///
65+
/// Used both for memory-read commands to fetch the Wasm binary
66+
/// from the gdbstub host, and software-breakpoint commands.
67+
Object,
68+
}
69+
70+
/// An address in the synthetic address space used by the Wasm target.
71+
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
72+
pub struct WasmAddr(u64);
73+
74+
impl WasmAddr {
75+
const TYPE_BITS: u32 = 2;
76+
const MODULE_BITS: u32 = 30;
77+
const OFFSET_BITS: u32 = 32;
78+
79+
const MODULE_SHIFT: u32 = Self::OFFSET_BITS;
80+
const TYPE_SHIFT: u32 = Self::OFFSET_BITS + Self::MODULE_BITS;
81+
82+
const TYPE_MASK: u64 = (1u64 << Self::TYPE_BITS) - 1;
83+
const MODULE_MASK: u64 = (1u64 << Self::MODULE_BITS) - 1;
84+
const OFFSET_MASK: u64 = (1u64 << Self::OFFSET_BITS) - 1;
85+
86+
/// Construct a `WasmAddr` from a raw 64-bit encoded address.
87+
///
88+
/// Returns `None` if the encoding is invalid.
89+
pub fn from_raw(raw: u64) -> Option<Self> {
90+
let type_bits = (raw >> Self::TYPE_SHIFT) & Self::TYPE_MASK;
91+
if type_bits > 1 {
92+
return None;
93+
}
94+
Some(WasmAddr(raw))
95+
}
96+
97+
/// Provide the raw 64-bit encoding of this `WasmAddr`.
98+
pub fn as_raw(self) -> u64 {
99+
self.0
100+
}
101+
102+
/// Construct a `WasmAddr` from its constituent parts.
103+
pub fn new(addr_type: WasmAddrType, module_index: u32, offset: u32) -> Self {
104+
// There are fewer than 32 bits in the encoding for the module
105+
// index.
106+
assert_eq!(
107+
module_index >> Self::MODULE_BITS,
108+
0,
109+
"Out-of-bounds module index"
110+
);
111+
let type_bits: u64 = match addr_type {
112+
WasmAddrType::Memory => 0,
113+
WasmAddrType::Object => 1,
114+
};
115+
WasmAddr(
116+
(type_bits << Self::TYPE_SHIFT)
117+
| ((u64::from(module_index)) << Self::MODULE_SHIFT)
118+
| (u64::from(offset)),
119+
)
120+
}
121+
122+
/// Get the type of this address.
123+
pub fn addr_type(self) -> WasmAddrType {
124+
match (self.0 >> Self::TYPE_SHIFT) & Self::TYPE_MASK {
125+
0 => WasmAddrType::Memory,
126+
1 => WasmAddrType::Object,
127+
// We never set other type-bits and the raw bits are fully
128+
// encapsulated and checked in `from_raw`, so this is
129+
// unreachable.
130+
_ => unreachable!("WasmAddr: invalid type bits"),
131+
}
132+
}
133+
134+
/// Get the index of the module or memory referenced by this
135+
/// address.
136+
pub fn module_index(self) -> u32 {
137+
((self.0 >> Self::MODULE_SHIFT) & Self::MODULE_MASK) as u32
138+
}
139+
140+
/// Get the offset within the module or memory referenced by this
141+
/// address.
142+
pub fn offset(self) -> u32 {
143+
(self.0 & Self::OFFSET_MASK) as u32
144+
}
145+
}
146+
147+
impl core::fmt::Display for WasmAddr {
148+
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
149+
let type_str = match self.addr_type() {
150+
WasmAddrType::Memory => "Memory",
151+
WasmAddrType::Object => "Object",
152+
};
153+
write!(
154+
f,
155+
"{}(module={}, offset={:#x})",
156+
type_str,
157+
self.module_index(),
158+
self.offset()
159+
)
160+
}
161+
}
162+
163+
impl core::fmt::Debug for WasmAddr {
164+
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
165+
write!(f, "WasmAddr({self})")
166+
}
167+
}

gdbstub_arch/src/wasm/mod.rs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
//! Implementation for the WebAssembly architecture.
2+
//!
3+
//! This implementation follows the [LLDB-specific Wasm extensions] to
4+
//! the gdbstub protocol, which define a mapping from Wasm concepts to
5+
//! more classicasl ISA concepts.
6+
//!
7+
//! WebAssembly is somewhat *unlike* most ISAs in many of its details:
8+
//! for example, it uses an operand stack rather than classical
9+
//! registers, and has explicit concepts of function locals, of
10+
//! globals, and of first-class functions and a callstack, rather than
11+
//! a flat address space of bytes that are used to build up machine
12+
//! code, a stack, and storage as in most other ISAs.
13+
//!
14+
//! You'll likely want to implement the `Wasm` extension trait in your
15+
//! `Target` implementation in order to provide LLDB access to these
16+
//! Wasm-native concepts.
17+
//!
18+
//! As a particularly important detail, note that the natively
19+
//! multi-address-space Wasm world, where multiple code modules exist
20+
//! without a native concept of a global PC space, and multiple linear
21+
//! memories exist with every load/store qualified by the memory it
22+
//! accesses, is mapped into a single synthesized 64-bit address space
23+
//! by definition of the protocol extensions. See the [`self::addr`]
24+
//! submodule for utilities to encode and decode these synthesized
25+
//! addresses.
26+
//!
27+
//! [LLDB-specific Wasm extensions]: https://lldb.llvm.org/resources/lldbgdbremote.html#wasm-packets
28+
29+
use gdbstub::arch::Arch;
30+
31+
pub mod reg;
32+
pub mod addr;
33+
34+
/// Implements `Arch` for the WebAssembly architecture.
35+
pub enum Wasm {}
36+
37+
impl Arch for Wasm {
38+
/// Even though Wasm is nominally a 32-bit platform, the gdbstub
39+
/// protocol for Wasm uses a 64-bit address word to multiplex module
40+
/// bytecode regions and linear memory regions into a single address
41+
/// space.
42+
type Usize = u64;
43+
type Registers = reg::WasmRegisters;
44+
type RegId = reg::id::WasmRegId;
45+
type BreakpointKind = usize;
46+
}

gdbstub_arch/src/wasm/reg/id.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
use core::num::NonZeroUsize;
2+
use gdbstub::arch::RegId;
3+
4+
/// The only register exposed to GDB: `pc` (register index 0).
5+
#[derive(Debug, Clone, Copy)]
6+
#[non_exhaustive]
7+
pub enum WasmRegId {
8+
/// Program Counter.
9+
Pc,
10+
}
11+
12+
impl RegId for WasmRegId {
13+
fn from_raw_id(id: usize) -> Option<(Self, Option<NonZeroUsize>)> {
14+
match id {
15+
0 => Some((WasmRegId::Pc, NonZeroUsize::new(8))),
16+
_ => None,
17+
}
18+
}
19+
20+
fn to_raw_id(&self) -> Option<usize> {
21+
match self {
22+
WasmRegId::Pc => Some(0),
23+
}
24+
}
25+
}

gdbstub_arch/src/wasm/reg/mod.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
//! `Register` structs for the WebAssembly architecture.
2+
//!
3+
//! Because Wasm is mostly stack-based, it only has one "register":
4+
//! the program counter (PC) according to the gdbstub mappings for
5+
//! this architecture.
6+
7+
/// `RegId` definitions for WebAssembly.
8+
pub mod id;
9+
10+
mod wasm_regs;
11+
12+
pub use wasm_regs::WasmRegisters;
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
use core::convert::TryInto;
2+
use gdbstub::arch::Registers;
3+
4+
/// The register state for WebAssembly.
5+
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
6+
pub struct WasmRegisters {
7+
/// Program Counter. See [`crate::wasm::addr`] for the 64-bit
8+
/// synthetic address space in which this PC exists.
9+
pub pc: u64,
10+
}
11+
12+
impl Registers for WasmRegisters {
13+
type ProgramCounter = u64;
14+
15+
fn pc(&self) -> u64 {
16+
self.pc
17+
}
18+
19+
fn gdb_serialize(&self, mut write_byte: impl FnMut(Option<u8>)) {
20+
for byte in self.pc.to_le_bytes() {
21+
write_byte(Some(byte));
22+
}
23+
}
24+
25+
fn gdb_deserialize(&mut self, bytes: &[u8]) -> Result<(), ()> {
26+
if bytes.len() < 8 {
27+
return Err(());
28+
}
29+
self.pc = u64::from_le_bytes(bytes[..8].try_into().unwrap());
30+
Ok(())
31+
}
32+
}

src/target/ext/wasm.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@
1919
//!
2020
//! [LLDB source code]:
2121
//! https://github.com/llvm/llvm-project/blob/main/lldb/source/Plugins/Process/wasm/ProcessWasm.h
22+
//!
23+
//! An implementation of this address encoding/decoding can be found
24+
//! in the `gdbstub_arch` crate in `gdbstub_arch::wasm::addr`.
2225
use crate::common::Tid;
2326
use crate::target::Target;
2427

0 commit comments

Comments
 (0)