Skip to content

Commit 27ce0ba

Browse files
authored
Improve MallocMemory implementation (#9634)
* Optimize memory growth in debug mode which was showing up locally in profiles as being particularly slow. * Fix a bug where the `memory_reservation_for_growth` was accidentally initialized instead of leaving it uninitialized as intended.
1 parent 5f91722 commit 27ce0ba

File tree

2 files changed

+117
-8
lines changed

2 files changed

+117
-8
lines changed

crates/environ/src/tunables.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ impl Tunables {
154154
}
155155

156156
/// Returns the default set of tunables for running under MIRI.
157-
pub fn default_miri() -> Tunables {
157+
pub const fn default_miri() -> Tunables {
158158
Tunables {
159159
collector: None,
160160

@@ -183,7 +183,7 @@ impl Tunables {
183183
}
184184

185185
/// Returns the default set of tunables for running under a 32-bit host.
186-
pub fn default_u32() -> Tunables {
186+
pub const fn default_u32() -> Tunables {
187187
Tunables {
188188
// For 32-bit we scale way down to 10MB of reserved memory. This
189189
// impacts performance severely but allows us to have more than a
@@ -197,7 +197,7 @@ impl Tunables {
197197
}
198198

199199
/// Returns the default set of tunables for running under a 64-bit host.
200-
pub fn default_u64() -> Tunables {
200+
pub const fn default_u64() -> Tunables {
201201
Tunables {
202202
// 64-bit has tons of address space to static memories can have 4gb
203203
// address space reservations liberally by default, allowing us to

crates/wasmtime/src/runtime/vm/memory/malloc.rs

Lines changed: 114 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ impl MallocMemory {
3838
bail!("malloc memory cannot be used with CoW images");
3939
}
4040

41-
let byte_size = minimum
41+
let initial_allocation_byte_size = minimum
4242
.checked_add(
4343
tunables
4444
.memory_reservation_for_growth
@@ -47,10 +47,14 @@ impl MallocMemory {
4747
)
4848
.context("memory allocation size too large")?;
4949

50-
let element_len = byte_size_to_element_len(byte_size);
50+
let initial_allocation_len = byte_size_to_element_len(initial_allocation_byte_size);
5151
let mut storage = Vec::new();
52-
storage.try_reserve(element_len).err2anyhow()?;
53-
storage.resize(element_len, Align16(0));
52+
storage.try_reserve(initial_allocation_len).err2anyhow()?;
53+
54+
let initial_len = byte_size_to_element_len(minimum);
55+
if initial_len > 0 {
56+
grow_storage_to(&mut storage, initial_len);
57+
}
5458
Ok(MallocMemory {
5559
base_ptr: SendSyncPtr::new(NonNull::new(storage.as_mut_ptr()).unwrap()).cast(),
5660
storage,
@@ -74,7 +78,7 @@ impl RuntimeLinearMemory for MallocMemory {
7478
self.storage
7579
.try_reserve(new_element_len - self.storage.len())
7680
.err2anyhow()?;
77-
self.storage.resize(new_element_len, Align16(0));
81+
grow_storage_to(&mut self.storage, new_element_len);
7882
self.base_ptr =
7983
SendSyncPtr::new(NonNull::new(self.storage.as_mut_ptr()).unwrap()).cast();
8084
}
@@ -98,3 +102,108 @@ fn byte_size_to_element_len(byte_size: usize) -> usize {
98102
// element length of our vector.
99103
byte_size_rounded_up / align
100104
}
105+
106+
/// Helper that is the equivalent of `storage.resize(new_len, Align16(0))`
107+
/// except it's also optimized to perform well in debug mode. Just using
108+
/// `resize` leads to a per-element iteration which can be quite slow in debug
109+
/// mode as it's not optimized to a memcpy, so it's manually optimized here
110+
/// instead.
111+
fn grow_storage_to(storage: &mut Vec<Align16>, new_len: usize) {
112+
debug_assert!(new_len > storage.len());
113+
assert!(new_len <= storage.capacity());
114+
let capacity_to_set = new_len - storage.len();
115+
let slice_to_initialize = &mut storage.spare_capacity_mut()[..capacity_to_set];
116+
let byte_size = mem::size_of_val(slice_to_initialize);
117+
118+
// SAFETY: The `slice_to_initialize` is guaranteed to be in the capacity of
119+
// the vector via the slicing above, so it's all owned memory by the
120+
// vector. Additionally the `byte_size` is the exact size of the
121+
// `slice_to_initialize` itself, so this `memset` should be in-bounds.
122+
// Finally the `Align16` is a simple wrapper around `u128` for which 0
123+
// is a valid byte pattern. This should make the initial `write_bytes` safe.
124+
//
125+
// Afterwards the `set_len` call should also be safe because we've
126+
// initialized the tail end of the vector with zeros so it's safe to
127+
// consider it having a new length now.
128+
unsafe {
129+
core::ptr::write_bytes(slice_to_initialize.as_mut_ptr().cast::<u8>(), 0, byte_size);
130+
storage.set_len(new_len);
131+
}
132+
}
133+
134+
#[cfg(test)]
135+
mod tests {
136+
use super::*;
137+
138+
// This is currently required by the constructor but otherwise ignored in
139+
// the creation of a `MallocMemory`, so just have a single one used in
140+
// tests below.
141+
const TY: wasmtime_environ::Memory = wasmtime_environ::Memory {
142+
idx_type: wasmtime_environ::IndexType::I32,
143+
limits: wasmtime_environ::Limits { min: 0, max: None },
144+
shared: false,
145+
page_size_log2: 16,
146+
};
147+
148+
// Valid tunables that can be used to create a `MallocMemory`.
149+
const TUNABLES: Tunables = Tunables {
150+
memory_reservation: 0,
151+
memory_guard_size: 0,
152+
memory_init_cow: false,
153+
..Tunables::default_miri()
154+
};
155+
156+
#[test]
157+
fn simple() {
158+
let mut memory = MallocMemory::new(&TY, &TUNABLES, 10).unwrap();
159+
assert_eq!(memory.storage.len(), 1);
160+
assert_valid(&memory);
161+
162+
memory.grow_to(11).unwrap();
163+
assert_eq!(memory.storage.len(), 1);
164+
assert_valid(&memory);
165+
166+
memory.grow_to(16).unwrap();
167+
assert_eq!(memory.storage.len(), 1);
168+
assert_valid(&memory);
169+
170+
memory.grow_to(17).unwrap();
171+
assert_eq!(memory.storage.len(), 2);
172+
assert_valid(&memory);
173+
174+
memory.grow_to(65).unwrap();
175+
assert_eq!(memory.storage.len(), 5);
176+
assert_valid(&memory);
177+
}
178+
179+
#[test]
180+
fn reservation_not_initialized() {
181+
let tunables = Tunables {
182+
memory_reservation_for_growth: 1 << 20,
183+
..TUNABLES
184+
};
185+
let mut memory = MallocMemory::new(&TY, &tunables, 10).unwrap();
186+
assert_eq!(memory.storage.len(), 1);
187+
assert_eq!(
188+
memory.storage.capacity(),
189+
(tunables.memory_reservation_for_growth / 16) as usize + 1,
190+
);
191+
assert_valid(&memory);
192+
193+
memory.grow_to(100).unwrap();
194+
assert_eq!(memory.storage.len(), 7);
195+
assert_eq!(
196+
memory.storage.capacity(),
197+
(tunables.memory_reservation_for_growth / 16) as usize + 1,
198+
);
199+
assert_valid(&memory);
200+
}
201+
202+
fn assert_valid(mem: &MallocMemory) {
203+
assert_eq!(mem.storage.as_ptr().cast::<u8>(), mem.base_ptr.as_ptr());
204+
assert!(mem.byte_len <= mem.storage.len() * 16);
205+
for slot in mem.storage.iter() {
206+
assert_eq!(slot.0, 0);
207+
}
208+
}
209+
}

0 commit comments

Comments
 (0)