Skip to content

Commit b7c0eae

Browse files
abrownfitzgenalexcrichton
authored
Add MPK-protected stripes to the pooling allocator (bytecodealliance#7072)
* Add memory protection keys (MPK) In order to use MPK on an x86_64 Linux system, we need access to the underlying `pkey_*` system calls (`sys`), control of the x86 PKRU register (`pkru`), and a way of determining if MPK is even supported (`is_supported`). These various parts are wrapped in a `ProtectionKey` abstraction along with a `ProtectionMask` that can be used `allow` the CPU to access protected regions. * Integrate MPK into the pooling allocator This change adds "stripes" to the pooling allocator's `MemoryPool`. Now, when requesting a slot in which to instantiate, the user (i.e., `InstanceAllocationRequest`) will be transparently assigned to one of the stripes, each of which is associated with a protection key. The user can also request a specific protection key to use, which will override the original "find me a slot logic". This has implications for how instances get allocated: once a store is assigned a protection key, it will only allocate requests with that key, limiting how many slots it has access to. E.g., if 15 keys are active, the store can only ever access 1/15th of the slots. This change also includes a tri-bool configuration field, `memory_protection_keys`, which is disabled by default for the time being. * Address review comments This is a rollup of 43 commits addressing review comments of various kinds: bug fixes, refactorings, documentation improvements, etc. It also ensures that CI runs all checks. A big thanks to @fitzgen and @alexcrichton for the review! prtest:full Co-authored-by: Nick Fitzgerald <[email protected]> Co-authored-by: Alex Crichton <[email protected]> --------- Co-authored-by: Nick Fitzgerald <[email protected]> Co-authored-by: Alex Crichton <[email protected]>
1 parent 96f0083 commit b7c0eae

File tree

20 files changed

+1412
-151
lines changed

20 files changed

+1412
-151
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/runtime/Cargo.toml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ features = [
5050

5151
[dev-dependencies]
5252
once_cell = { workspace = true }
53+
proptest = "1.0.0"
5354

5455
[build-dependencies]
5556
cc = "1.0"
@@ -61,9 +62,6 @@ async = ["wasmtime-fiber"]
6162
# Enables support for the pooling instance allocator
6263
pooling-allocator = []
6364

64-
component-model = [
65-
"wasmtime-environ/component-model",
66-
"dep:encoding_rs",
67-
]
65+
component-model = ["wasmtime-environ/component-model", "dep:encoding_rs"]
6866

6967
wmemcheck = []
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Seeds for failure cases proptest has generated in the past. It is
2+
# automatically read and these particular cases re-run before any
3+
# novel cases are generated.
4+
#
5+
# It is recommended to check this file in to source control so that
6+
# everyone who runs the test benefits from these saved cases.
7+
cc 696808084287d5d58b85c60c4720227ab4dd83ada7be6841a67162023aaf4914 # shrinks to c = SlabConstraints { max_memory_bytes: 0, num_memory_slots: 1, num_pkeys_available: 0, guard_bytes: 9223372036854775808 }
8+
cc cf9f6c36659f7f56ed8ea646e8c699cbf46708cef6911cdd376418ad69ea1388 # shrinks to c = SlabConstraints { max_memory_bytes: 14161452635954640438, num_memory_slots: 0, num_pkeys_available: 0, guard_bytes: 4285291437754911178 }

crates/runtime/src/instance/allocator.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use crate::imports::Imports;
22
use crate::instance::{Instance, InstanceHandle};
33
use crate::memory::Memory;
4+
use crate::mpk::ProtectionKey;
45
use crate::table::Table;
56
use crate::{CompiledModuleId, ModuleRuntimeInfo, Store};
67
use anyhow::{anyhow, bail, Result};
@@ -59,6 +60,10 @@ pub struct InstanceAllocationRequest<'a> {
5960

6061
/// Indicates '--wmemcheck' flag.
6162
pub wmemcheck: bool,
63+
64+
/// Request that the instance's memories be protected by a specific
65+
/// protection key.
66+
pub pkey: Option<ProtectionKey>,
6267
}
6368

6469
/// A pointer to a Store. This Option<*mut dyn Store> is wrapped in a struct
@@ -267,6 +272,24 @@ pub unsafe trait InstanceAllocatorImpl {
267272
/// Primarily present for the pooling allocator to remove mappings of
268273
/// this module from slots in linear memory.
269274
fn purge_module(&self, module: CompiledModuleId);
275+
276+
/// Use the next available protection key.
277+
///
278+
/// The pooling allocator can use memory protection keys (MPK) for
279+
/// compressing the guard regions protecting against OOB. Each
280+
/// pool-allocated store needs its own key.
281+
fn next_available_pkey(&self) -> Option<ProtectionKey>;
282+
283+
/// Restrict access to memory regions protected by `pkey`.
284+
///
285+
/// This is useful for the pooling allocator, which can use memory
286+
/// protection keys (MPK). Note: this may still allow access to other
287+
/// protection keys, such as the default kernel key; see implementations of
288+
/// this.
289+
fn restrict_to_pkey(&self, pkey: ProtectionKey);
290+
291+
/// Allow access to memory regions protected by any protection key.
292+
fn allow_all_pkeys(&self);
270293
}
271294

272295
/// A thing that can allocate instances.

crates/runtime/src/instance/allocator/on_demand.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use super::{
33
};
44
use crate::instance::RuntimeMemoryCreator;
55
use crate::memory::{DefaultMemoryCreator, Memory};
6+
use crate::mpk::ProtectionKey;
67
use crate::table::Table;
78
use crate::CompiledModuleId;
89
use anyhow::Result;
@@ -151,4 +152,25 @@ unsafe impl InstanceAllocatorImpl for OnDemandInstanceAllocator {
151152
}
152153

153154
fn purge_module(&self, _: CompiledModuleId) {}
155+
156+
fn next_available_pkey(&self) -> Option<ProtectionKey> {
157+
// The on-demand allocator cannot use protection keys--it requires
158+
// back-to-back allocation of memory slots that this allocator cannot
159+
// guarantee.
160+
None
161+
}
162+
163+
fn restrict_to_pkey(&self, _: ProtectionKey) {
164+
// The on-demand allocator cannot use protection keys; an on-demand
165+
// allocator will never hand out protection keys to the stores its
166+
// engine creates.
167+
unreachable!()
168+
}
169+
170+
fn allow_all_pkeys(&self) {
171+
// The on-demand allocator cannot use protection keys; an on-demand
172+
// allocator will never hand out protection keys to the stores its
173+
// engine creates.
174+
unreachable!()
175+
}
154176
}

crates/runtime/src/instance/allocator/pooling.rs

Lines changed: 80 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,62 @@
11
//! Implements the pooling instance allocator.
22
//!
3-
//! The pooling instance allocator maps memory in advance
4-
//! and allocates instances, memories, tables, and stacks from
5-
//! a pool of available resources.
3+
//! The pooling instance allocator maps memory in advance and allocates
4+
//! instances, memories, tables, and stacks from a pool of available resources.
5+
//! Using the pooling instance allocator can speed up module instantiation when
6+
//! modules can be constrained based on configurable limits
7+
//! ([`InstanceLimits`]). Each new instance is stored in a "slot"; as instances
8+
//! are allocated and freed, these slots are either filled or emptied:
69
//!
7-
//! Using the pooling instance allocator can speed up module instantiation
8-
//! when modules can be constrained based on configurable limits.
10+
//! ```text
11+
//! ┌──────┬──────┬──────┬──────┬──────┐
12+
//! │Slot 0│Slot 1│Slot 2│Slot 3│......│
13+
//! └──────┴──────┴──────┴──────┴──────┘
14+
//! ```
15+
//!
16+
//! Note that these slots are a useful abstraction but not exactly how this is
17+
//! mapped to memory in fact. Each new instance _does_ get associated with a
18+
//! slot number (see uses of `index` and [`SlotId`] in this module) but the
19+
//! parts of the instances are stored in separate pools: memories in the
20+
//! [`MemoryPool`], tables in the [`TablePool`], etc. What ties these various
21+
//! parts together is the slot number generated by an [`IndexAllocator`] .
22+
//!
23+
//! The [`MemoryPool`] protects Wasmtime from out-of-bounds memory accesses by
24+
//! inserting inaccessible guard regions between memory slots. The
25+
//! [`MemoryPool`] documentation has a more in-depth chart but one can think of
26+
//! memories being laid out like the following:
27+
//!
28+
//! ```text
29+
//! ┌─────┬─────┬─────┬─────┬─────┬─────┬─────┬─────┐
30+
//! │Guard│Mem 0│Guard│Mem 1│Guard│Mem 2│.....│Guard│
31+
//! └─────┴─────┴─────┴─────┴─────┴─────┴─────┴─────┘
32+
//! ```
33+
//!
34+
//! To complicate matters, each instance can have multiple memories, multiple
35+
//! tables, etc. You might think these would be stored consecutively in their
36+
//! respective pools (for instance `n`, table 0 is at table pool slot `n + 0`
37+
//! and table 1 is at `n + 1`), but for memories this is not the case. With
38+
//! protection keys enabled, memories do not need interleaved guard regions
39+
//! because the protection key will signal a fault if the wrong memory is
40+
//! accessed. Instead, the pooling allocator "stripes" the memories with
41+
//! different protection keys.
42+
//!
43+
//! This concept, dubbed [ColorGuard] in the original paper, relies on careful
44+
//! calculation of the memory sizes to prevent any "overlapping access": there
45+
//! are limited protection keys available (15) so the next memory using the same
46+
//! key must be at least as far away as the guard region we would insert
47+
//! otherwise. This ends up looking like the following, where a store for
48+
//! instance 0 (`I0`) "stripes" two memories (`M0` and `M1`) with the same
49+
//! protection key 1 and far enough apart to signal an OOB access:
50+
//!
51+
//! ```text
52+
//! ┌─────┬─────┬─────┬─────┬────────────────┬─────┬─────┬─────┐
53+
//! │.....│I0:M1│.....│.....│.<enough slots>.│I0:M2│.....│.....│
54+
//! ├─────┼─────┼─────┼─────┼────────────────┼─────┼─────┼─────┤
55+
//! │.....│key 1│key 2│key 3│..<more keys>...│key 1│key 2│.....│
56+
//! └─────┴─────┴─────┴─────┴────────────────┴─────┴─────┴─────┘
57+
//! ```
58+
//!
59+
//! [ColorGuard]: https://plas2022.github.io/files/pdf/SegueColorGuard.pdf
960
1061
mod index_allocator;
1162
mod memory_pool;
@@ -27,7 +78,11 @@ cfg_if::cfg_if! {
2778
use super::{
2879
InstanceAllocationRequest, InstanceAllocatorImpl, MemoryAllocationIndex, TableAllocationIndex,
2980
};
30-
use crate::{instance::Instance, CompiledModuleId, Memory, Table};
81+
use crate::{
82+
instance::Instance,
83+
mpk::{self, MpkEnabled, ProtectionKey, ProtectionMask},
84+
CompiledModuleId, Memory, Table,
85+
};
3186
use anyhow::{bail, Result};
3287
use memory_pool::MemoryPool;
3388
use std::{
@@ -162,6 +217,8 @@ pub struct PoolingInstanceAllocatorConfig {
162217
pub linear_memory_keep_resident: usize,
163218
/// Same as `linear_memory_keep_resident` but for tables.
164219
pub table_keep_resident: usize,
220+
/// Whether to enable memory protection keys.
221+
pub memory_protection_keys: MpkEnabled,
165222
}
166223

167224
impl Default for PoolingInstanceAllocatorConfig {
@@ -174,15 +231,18 @@ impl Default for PoolingInstanceAllocatorConfig {
174231
async_stack_keep_resident: 0,
175232
linear_memory_keep_resident: 0,
176233
table_keep_resident: 0,
234+
memory_protection_keys: MpkEnabled::Disable,
177235
}
178236
}
179237
}
180238

181239
/// Implements the pooling instance allocator.
182240
///
183-
/// This allocator internally maintains pools of instances, memories, tables, and stacks.
241+
/// This allocator internally maintains pools of instances, memories, tables,
242+
/// and stacks.
184243
///
185-
/// Note: the resource pools are manually dropped so that the fault handler terminates correctly.
244+
/// Note: the resource pools are manually dropped so that the fault handler
245+
/// terminates correctly.
186246
#[derive(Debug)]
187247
pub struct PoolingInstanceAllocator {
188248
limits: InstanceLimits,
@@ -533,6 +593,18 @@ unsafe impl InstanceAllocatorImpl for PoolingInstanceAllocator {
533593
fn purge_module(&self, module: CompiledModuleId) {
534594
self.memories.purge_module(module);
535595
}
596+
597+
fn next_available_pkey(&self) -> Option<ProtectionKey> {
598+
self.memories.next_available_pkey()
599+
}
600+
601+
fn restrict_to_pkey(&self, pkey: ProtectionKey) {
602+
mpk::allow(ProtectionMask::zero().or(pkey));
603+
}
604+
605+
fn allow_all_pkeys(&self) {
606+
mpk::allow(ProtectionMask::all());
607+
}
536608
}
537609

538610
#[cfg(test)]

crates/runtime/src/instance/allocator/pooling/index_allocator.rs

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,12 @@ impl ModuleAffinityIndexAllocator {
167167
}))
168168
}
169169

170+
/// How many slots can this allocator allocate?
171+
pub fn len(&self) -> usize {
172+
let inner = self.0.lock().unwrap();
173+
inner.slot_state.len()
174+
}
175+
170176
/// Are zero slots in use right now?
171177
pub fn is_empty(&self) -> bool {
172178
let inner = self.0.lock().unwrap();
@@ -299,8 +305,16 @@ impl ModuleAffinityIndexAllocator {
299305
});
300306
}
301307

302-
/// For testing only, we want to be able to assert what is on the
303-
/// single freelist, for the policies that keep just one.
308+
/// Return the number of empty slots available in this allocator.
309+
#[cfg(test)]
310+
pub fn num_empty_slots(&self) -> usize {
311+
let inner = self.0.lock().unwrap();
312+
let total_slots = inner.slot_state.len();
313+
(total_slots - inner.last_cold as usize) + inner.unused_warm_slots as usize
314+
}
315+
316+
/// For testing only, we want to be able to assert what is on the single
317+
/// freelist, for the policies that keep just one.
304318
#[cfg(test)]
305319
#[allow(unused)]
306320
pub(crate) fn testing_freelist(&self) -> Vec<SlotId> {
@@ -311,8 +325,8 @@ impl ModuleAffinityIndexAllocator {
311325
.collect()
312326
}
313327

314-
/// For testing only, get the list of all modules with at least
315-
/// one slot with affinity for that module.
328+
/// For testing only, get the list of all modules with at least one slot
329+
/// with affinity for that module.
316330
#[cfg(test)]
317331
pub(crate) fn testing_module_affinity_list(&self) -> Vec<MemoryInModule> {
318332
let inner = self.0.lock().unwrap();
@@ -475,7 +489,9 @@ mod test {
475489
fn test_next_available_allocation_strategy() {
476490
for size in 0..20 {
477491
let state = ModuleAffinityIndexAllocator::new(size, 0);
492+
assert_eq!(state.num_empty_slots() as u32, size);
478493
for i in 0..size {
494+
assert_eq!(state.num_empty_slots() as u32, size - i);
479495
assert_eq!(state.alloc(None).unwrap().index(), i as usize);
480496
}
481497
assert!(state.alloc(None).is_none());
@@ -496,6 +512,9 @@ mod test {
496512
assert_ne!(index1, index2);
497513

498514
state.free(index1);
515+
assert_eq!(state.num_empty_slots(), 99);
516+
517+
// Allocate to the same `index1` slot again.
499518
let index3 = state.alloc(Some(id1)).unwrap();
500519
assert_eq!(index3, index1);
501520
state.free(index3);
@@ -512,13 +531,14 @@ mod test {
512531
// for id1, and 98 empty. Allocate 100 for id2. The first
513532
// should be equal to the one we know was previously used for
514533
// id2. The next 99 are arbitrary.
515-
534+
assert_eq!(state.num_empty_slots(), 100);
516535
let mut indices = vec![];
517536
for _ in 0..100 {
518537
indices.push(state.alloc(Some(id2)).unwrap());
519538
}
520539
assert!(state.alloc(None).is_none());
521540
assert_eq!(indices[0], index2);
541+
assert_eq!(state.num_empty_slots(), 0);
522542

523543
for i in indices {
524544
state.free(i);

0 commit comments

Comments
 (0)