diff --git a/crates/wasmtime/src/runtime/vm/cow.rs b/crates/wasmtime/src/runtime/vm/cow.rs index d030eea3406e..0f3f9d1bb937 100644 --- a/crates/wasmtime/src/runtime/vm/cow.rs +++ b/crates/wasmtime/src/runtime/vm/cow.rs @@ -514,21 +514,23 @@ impl MemoryImageSlot { /// argument is the maximum amount of memory to keep resident in this /// process's memory on Linux. Up to that much memory will be `memset` to /// zero where the rest of it will be reset or released with `madvise`. + /// + /// Returns the number of bytes still resident in memory after this function + /// has returned. #[allow(dead_code, reason = "only used in some cfgs")] pub(crate) fn clear_and_remain_ready( &mut self, pagemap: Option<&PageMap>, keep_resident: HostAlignedByteCount, decommit: impl FnMut(*mut u8, usize), - ) -> Result<()> { + ) -> Result { assert!(self.dirty); - unsafe { - self.reset_all_memory_contents(pagemap, keep_resident, decommit)?; - } + let bytes_resident = + unsafe { self.reset_all_memory_contents(pagemap, keep_resident, decommit)? }; self.dirty = false; - Ok(()) + Ok(bytes_resident) } #[allow(dead_code, reason = "only used in some cfgs")] @@ -537,7 +539,7 @@ impl MemoryImageSlot { pagemap: Option<&PageMap>, keep_resident: HostAlignedByteCount, decommit: impl FnMut(*mut u8, usize), - ) -> Result<()> { + ) -> Result { match vm::decommit_behavior() { DecommitBehavior::Zero => { // If we're not on Linux then there's no generic platform way to @@ -546,13 +548,13 @@ impl MemoryImageSlot { // // Additionally the previous image, if any, is dropped here // since it's no longer applicable to this mapping. - self.reset_with_anon_memory() + self.reset_with_anon_memory()?; + Ok(0) } DecommitBehavior::RestoreOriginalMapping => { - unsafe { - self.reset_with_original_mapping(pagemap, keep_resident, decommit); - } - Ok(()) + let bytes_resident = + unsafe { self.reset_with_original_mapping(pagemap, keep_resident, decommit) }; + Ok(bytes_resident) } } } @@ -563,29 +565,25 @@ impl MemoryImageSlot { pagemap: Option<&PageMap>, keep_resident: HostAlignedByteCount, decommit: impl FnMut(*mut u8, usize), - ) { + ) -> usize { assert_eq!( vm::decommit_behavior(), DecommitBehavior::RestoreOriginalMapping ); unsafe { - match &self.image { + return match &self.image { // If there's a backing image then manually resetting a region // is a bit trickier than without an image, so delegate to the // helper function below. - Some(image) => { - reset_with_pagemap( - pagemap, - self.base.as_mut_ptr(), - self.accessible, - keep_resident, - |region| { - manually_reset_region(self.base.as_mut_ptr().addr(), image, region) - }, - decommit, - ); - } + Some(image) => reset_with_pagemap( + pagemap, + self.base.as_mut_ptr(), + self.accessible, + keep_resident, + |region| manually_reset_region(self.base.as_mut_ptr().addr(), image, region), + decommit, + ), // If there's no memory image for this slot then pages are always // manually reset back to zero or given to `decommit`. @@ -597,7 +595,7 @@ impl MemoryImageSlot { |region| region.fill(0), decommit, ), - } + }; } /// Manually resets `region` back to its original contents as specified diff --git a/crates/wasmtime/src/runtime/vm/instance/allocator/pooling.rs b/crates/wasmtime/src/runtime/vm/instance/allocator/pooling.rs index 98c674589208..fed598f0b246 100644 --- a/crates/wasmtime/src/runtime/vm/instance/allocator/pooling.rs +++ b/crates/wasmtime/src/runtime/vm/instance/allocator/pooling.rs @@ -318,9 +318,13 @@ pub struct PoolingInstanceAllocator { #[cfg(feature = "gc")] gc_heaps: GcHeapPool, + #[cfg(feature = "gc")] + live_gc_heaps: AtomicUsize, #[cfg(feature = "async")] stacks: StackPool, + #[cfg(feature = "async")] + live_stacks: AtomicUsize, pagemap: Option, } @@ -350,10 +354,16 @@ impl Drop for PoolingInstanceAllocator { debug_assert!(self.tables.is_empty()); #[cfg(feature = "gc")] - debug_assert!(self.gc_heaps.is_empty()); + { + debug_assert!(self.gc_heaps.is_empty()); + debug_assert_eq!(self.live_gc_heaps.load(Ordering::Acquire), 0); + } #[cfg(feature = "async")] - debug_assert!(self.stacks.is_empty()); + { + debug_assert!(self.stacks.is_empty()); + debug_assert_eq!(self.live_stacks.load(Ordering::Acquire), 0); + } } } @@ -372,8 +382,12 @@ impl PoolingInstanceAllocator { live_tables: AtomicUsize::new(0), #[cfg(feature = "gc")] gc_heaps: GcHeapPool::new(config)?, + #[cfg(feature = "gc")] + live_gc_heaps: AtomicUsize::new(0), #[cfg(feature = "async")] stacks: StackPool::new(config)?, + #[cfg(feature = "async")] + live_stacks: AtomicUsize::new(0), pagemap: match config.pagemap_scan { Enabled::Auto => PageMap::new(), Enabled::Yes => Some(PageMap::new().ok_or_else(|| { @@ -704,7 +718,7 @@ unsafe impl InstanceAllocator for PoolingInstanceAllocator { // reservation. let mut image = memory.unwrap_static_image(); let mut queue = DecommitQueue::default(); - image + let bytes_resident = image .clear_and_remain_ready( self.pagemap.as_ref(), self.memories.keep_resident, @@ -722,7 +736,7 @@ unsafe impl InstanceAllocator for PoolingInstanceAllocator { // SAFETY: this image is not in use and its memory regions were enqueued // with `push_raw` above. unsafe { - queue.push_memory(allocation_index, image); + queue.push_memory(allocation_index, image, bytes_resident); } self.merge_or_flush(queue); } @@ -770,7 +784,7 @@ unsafe impl InstanceAllocator for PoolingInstanceAllocator { // method is called and additionally all image ranges are pushed with // the understanding that the memory won't get used until the whole // queue is flushed. - unsafe { + let bytes_resident = unsafe { self.tables.reset_table_pages_to_zero( self.pagemap.as_ref(), allocation_index, @@ -778,34 +792,37 @@ unsafe impl InstanceAllocator for PoolingInstanceAllocator { |ptr, len| { queue.push_raw(ptr, len); }, - ); - } + ) + }; // SAFETY: the table has had all its memory regions enqueued above. unsafe { - queue.push_table(allocation_index, table); + queue.push_table(allocation_index, table, bytes_resident); } self.merge_or_flush(queue); } #[cfg(feature = "async")] fn allocate_fiber_stack(&self) -> Result { - self.with_flush_and_retry(|| self.stacks.allocate()) + let ret = self.with_flush_and_retry(|| self.stacks.allocate())?; + self.live_stacks.fetch_add(1, Ordering::Relaxed); + Ok(ret) } #[cfg(feature = "async")] unsafe fn deallocate_fiber_stack(&self, mut stack: wasmtime_fiber::FiberStack) { + self.live_stacks.fetch_sub(1, Ordering::Relaxed); let mut queue = DecommitQueue::default(); // SAFETY: the stack is no longer in use by definition when this // function is called and memory ranges pushed here are otherwise no // longer in use. - unsafe { + let bytes_resident = unsafe { self.stacks - .zero_stack(&mut stack, |ptr, len| queue.push_raw(ptr, len)); - } + .zero_stack(&mut stack, |ptr, len| queue.push_raw(ptr, len)) + }; // SAFETY: this stack's memory regions were enqueued above. unsafe { - queue.push_stack(stack); + queue.push_stack(stack, bytes_resident); } self.merge_or_flush(queue); } @@ -834,8 +851,11 @@ unsafe impl InstanceAllocator for PoolingInstanceAllocator { memory_alloc_index: MemoryAllocationIndex, memory: Memory, ) -> Result<(GcHeapAllocationIndex, Box)> { - self.gc_heaps - .allocate(engine, gc_runtime, memory_alloc_index, memory) + let ret = self + .gc_heaps + .allocate(engine, gc_runtime, memory_alloc_index, memory)?; + self.live_gc_heaps.fetch_add(1, Ordering::Relaxed); + Ok(ret) } #[cfg(feature = "gc")] @@ -844,6 +864,7 @@ unsafe impl InstanceAllocator for PoolingInstanceAllocator { allocation_index: GcHeapAllocationIndex, gc_heap: Box, ) -> (MemoryAllocationIndex, Memory) { + self.live_gc_heaps.fetch_sub(1, Ordering::Relaxed); self.gc_heaps.deallocate(allocation_index, gc_heap) } diff --git a/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/decommit_queue.rs b/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/decommit_queue.rs index d2a0b0107a5d..e1ef2fd17b88 100644 --- a/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/decommit_queue.rs +++ b/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/decommit_queue.rs @@ -51,10 +51,10 @@ unsafe impl Sync for SendSyncStack {} #[derive(Default)] pub struct DecommitQueue { raw: SmallVec<[IoVec; 2]>, - memories: SmallVec<[(MemoryAllocationIndex, MemoryImageSlot); 1]>, - tables: SmallVec<[(TableAllocationIndex, Table); 1]>, + memories: SmallVec<[(MemoryAllocationIndex, MemoryImageSlot, usize); 1]>, + tables: SmallVec<[(TableAllocationIndex, Table, usize); 1]>, #[cfg(feature = "async")] - stacks: SmallVec<[SendSyncStack; 1]>, + stacks: SmallVec<[(SendSyncStack, usize); 1]>, // // TODO: GC heaps are not well-integrated with the pooling allocator // yet. Once we better integrate them, we should start (optionally) zeroing @@ -123,8 +123,10 @@ impl DecommitQueue { &mut self, allocation_index: MemoryAllocationIndex, image: MemoryImageSlot, + bytes_resident: usize, ) { - self.memories.push((allocation_index, image)); + self.memories + .push((allocation_index, image, bytes_resident)); } /// Push a table into the queue. @@ -133,8 +135,13 @@ impl DecommitQueue { /// /// This table should not be in use, and its decommit regions must have /// already been enqueued via `self.enqueue_raw`. - pub unsafe fn push_table(&mut self, allocation_index: TableAllocationIndex, table: Table) { - self.tables.push((allocation_index, table)); + pub unsafe fn push_table( + &mut self, + allocation_index: TableAllocationIndex, + table: Table, + bytes_resident: usize, + ) { + self.tables.push((allocation_index, table, bytes_resident)); } /// Push a stack into the queue. @@ -144,8 +151,8 @@ impl DecommitQueue { /// This stack should not be in use, and its decommit regions must have /// already been enqueued via `self.enqueue_raw`. #[cfg(feature = "async")] - pub unsafe fn push_stack(&mut self, stack: FiberStack) { - self.stacks.push(SendSyncStack(stack)); + pub unsafe fn push_stack(&mut self, stack: FiberStack, bytes_resident: usize) { + self.stacks.push((SendSyncStack(stack), bytes_resident)); } fn decommit_all_raw(&mut self) { @@ -174,23 +181,25 @@ impl DecommitQueue { // lists. This is safe, and they are ready for reuse, now that their // memory regions have been decommitted. let mut deallocated_any = false; - for (allocation_index, image) in self.memories { + for (allocation_index, image, bytes_resident) in self.memories { deallocated_any = true; unsafe { - pool.memories.deallocate(allocation_index, image); + pool.memories + .deallocate(allocation_index, image, bytes_resident); } } - for (allocation_index, table) in self.tables { + for (allocation_index, table, bytes_resident) in self.tables { deallocated_any = true; unsafe { - pool.tables.deallocate(allocation_index, table); + pool.tables + .deallocate(allocation_index, table, bytes_resident); } } #[cfg(feature = "async")] - for stack in self.stacks { + for (stack, bytes_resident) in self.stacks { deallocated_any = true; unsafe { - pool.stacks.deallocate(stack.0); + pool.stacks.deallocate(stack.0, bytes_resident); } } diff --git a/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/gc_heap_pool.rs b/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/gc_heap_pool.rs index dad3d22bda54..2bdb7cf67e1a 100644 --- a/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/gc_heap_pool.rs +++ b/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/gc_heap_pool.rs @@ -133,7 +133,7 @@ impl GcHeapPool { heaps[allocation_index.index()].dealloc(heap) }; - self.index_allocator.free(SlotId(allocation_index.0)); + self.index_allocator.free(SlotId(allocation_index.0), 0); (memory_alloc_index, memory) } diff --git a/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/generic_stack_pool.rs b/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/generic_stack_pool.rs index 1225636c8fdc..f7856558952c 100644 --- a/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/generic_stack_pool.rs +++ b/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/generic_stack_pool.rs @@ -28,6 +28,11 @@ pub struct StackPool { } impl StackPool { + #[cfg(test)] + pub fn enabled() -> bool { + false + } + pub fn new(config: &PoolingInstanceAllocatorConfig) -> Result { Ok(StackPool { stack_size: config.stack_size, @@ -69,15 +74,24 @@ impl StackPool { &self, _stack: &mut wasmtime_fiber::FiberStack, _decommit: impl FnMut(*mut u8, usize), - ) { + ) -> usize { // No need to actually zero the stack, since the stack won't ever be // reused on non-unix systems. + 0 } /// Safety: see the unix implementation. - pub unsafe fn deallocate(&self, stack: wasmtime_fiber::FiberStack) { + pub unsafe fn deallocate(&self, stack: wasmtime_fiber::FiberStack, _bytes_resident: usize) { self.live_stacks.fetch_sub(1, Ordering::AcqRel); // A no-op as we don't actually own the fiber stack on Windows. let _ = stack; } + + pub fn unused_warm_slots(&self) -> u32 { + 0 + } + + pub fn unused_bytes_resident(&self) -> Option { + None + } } diff --git a/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/index_allocator.rs b/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/index_allocator.rs index 7f257fde44e9..1e4fcabf20cc 100644 --- a/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/index_allocator.rs +++ b/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/index_allocator.rs @@ -39,8 +39,31 @@ impl SimpleIndexAllocator { self.0.alloc(None) } - pub(crate) fn free(&self, index: SlotId) { - self.0.free(index); + /// Frees the `index` slot to be available for allocation elsewhere. + /// + /// The `bytes_resident` argument is a counter to keep track of how many + /// bytse are still resident in this slot, if any, for reporting later via + /// the [`Self::unused_bytes_resident`] method. + pub(crate) fn free(&self, index: SlotId, bytes_resident: usize) { + self.0.free(index, bytes_resident); + } + + /// Returns the number of previously-used slots in this allocator which are + /// not currently in use. + /// + /// Note that this acquires a `Mutex` for synchronization at this time to + /// read the internal counter information. + pub fn unused_warm_slots(&self) -> u32 { + self.0.unused_warm_slots() + } + + /// Returns the number of bytes that are resident in previously-used slots + /// in this allocator which are not currently in use. + /// + /// Note that this acquires a `Mutex` for synchronization at this time to + /// read the internal counter information. + pub fn unused_bytes_resident(&self) -> usize { + self.0.unused_bytes_resident() } #[cfg(test)] @@ -96,6 +119,9 @@ struct Inner { /// The `List` here is appended to during deallocation and removal happens /// from the tail during allocation. module_affine: HashMap, + + /// Cache for the sum of the `bytes_resident` of all `UnusedWarm` slots. + unused_bytes_resident: usize, } /// A helper "linked list" data structure which is based on indices. @@ -142,6 +168,10 @@ struct Unused { /// Which module this slot was historically affine to, if any. affinity: Option, + /// Number of bytes that are part of `UnusedWarm` slots and are currently + /// kept resident (vs paged out). + bytes_resident: usize, + /// Metadata about the linked list for all slots affine to `affinity`. affine_list_link: Link, @@ -164,6 +194,7 @@ impl ModuleAffinityIndexAllocator { module_affine: HashMap::new(), slot_state: (0..capacity).map(|_| SlotState::UnusedCold).collect(), warm: List::default(), + unused_bytes_resident: 0, })) } @@ -256,7 +287,11 @@ impl ModuleAffinityIndexAllocator { } })?; - inner.slot_state[slot_id.index()] = SlotState::Used(match mode { + let slot = &mut inner.slot_state[slot_id.index()]; + if let SlotState::UnusedWarm(Unused { bytes_resident, .. }) = slot { + inner.unused_bytes_resident -= *bytes_resident; + } + *slot = SlotState::Used(match mode { AllocMode::ForceAffineAndClear => None, AllocMode::AnySlot => for_memory, }); @@ -264,7 +299,7 @@ impl ModuleAffinityIndexAllocator { Some(slot_id) } - pub(crate) fn free(&self, index: SlotId) { + pub(crate) fn free(&self, index: SlotId, bytes_resident: usize) { let mut inner = self.0.lock().unwrap(); let inner = &mut *inner; let module_memory = match inner.slot_state[index.index()] { @@ -298,8 +333,10 @@ impl ModuleAffinityIndexAllocator { None => Link::default(), }; + inner.unused_bytes_resident += bytes_resident; inner.slot_state[index.index()] = SlotState::UnusedWarm(Unused { affinity: module_memory, + bytes_resident, affine_list_link, unused_list_link, }); @@ -331,6 +368,24 @@ impl ModuleAffinityIndexAllocator { let inner = self.0.lock().unwrap(); inner.module_affine.keys().copied().collect() } + + /// Returns the number of previously-used slots in this allocator which are + /// not currently in use. + /// + /// Note that this acquires a `Mutex` for synchronization at this time to + /// read the internal counter information. + pub fn unused_warm_slots(&self) -> u32 { + self.0.lock().unwrap().unused_warm_slots + } + + /// Returns the number of bytes that are resident in previously-used slots + /// in this allocator which are not currently in use. + /// + /// Note that this acquires a `Mutex` for synchronization at this time to + /// read the internal counter information. + pub fn unused_bytes_resident(&self) -> usize { + self.0.lock().unwrap().unused_bytes_resident + } } impl Inner { @@ -507,15 +562,15 @@ mod test { assert_eq!(index2.index(), 1); assert_ne!(index1, index2); - state.free(index1); + state.free(index1, 0); assert_eq!(state.num_empty_slots(), 99); // Allocate to the same `index1` slot again. let index3 = state.alloc(Some(id1)).unwrap(); assert_eq!(index3, index1); - state.free(index3); + state.free(index3, 0); - state.free(index2); + state.free(index2, 0); // Both id1 and id2 should have some slots with affinity. let affinity_modules = state.testing_module_affinity_list(); @@ -537,7 +592,7 @@ mod test { assert_eq!(state.num_empty_slots(), 0); for i in indices { - state.free(i); + state.free(i, 0); } // Now there should be no slots left with affinity for id1. @@ -548,7 +603,7 @@ mod test { // Allocate an index we know previously had an instance but // now does not (list ran empty). let index = state.alloc(Some(id1)).unwrap(); - state.free(index); + state.free(index, 0); } #[test] @@ -561,8 +616,8 @@ mod test { let index1 = state.alloc(Some(MemoryInModule(id, memory_index))).unwrap(); let index2 = state.alloc(Some(MemoryInModule(id, memory_index))).unwrap(); - state.free(index2); - state.free(index1); + state.free(index2, 0); + state.free(index1, 0); assert!( state .alloc_affine_and_clear_affinity(id, memory_index) @@ -601,7 +656,7 @@ mod test { if !allocated.is_empty() && rng.random_bool(0.5) { let i = rng.random_range(0..allocated.len()); let to_free_idx = allocated.swap_remove(i); - state.free(to_free_idx); + state.free(to_free_idx, 0); } else { let id = ids[rng.random_range(0..ids.len())]; let index = match state.alloc(Some(id)) { @@ -636,14 +691,14 @@ mod test { // Set some slot affinities assert_eq!(state.alloc(Some(id1)), Some(SlotId(0))); - state.free(SlotId(0)); + state.free(SlotId(0), 0); assert_eq!(state.alloc(Some(id2)), Some(SlotId(1))); - state.free(SlotId(1)); + state.free(SlotId(1), 0); // Only 2 slots are allowed to be unused and warm, so we're at our // threshold, meaning one must now be evicted. assert_eq!(state.alloc(Some(id3)), Some(SlotId(0))); - state.free(SlotId(0)); + state.free(SlotId(0), 0); // pickup `id2` again, it should be affine. assert_eq!(state.alloc(Some(id2)), Some(SlotId(1))); @@ -652,17 +707,17 @@ mod test { // fresh slot assert_eq!(state.alloc(Some(id1)), Some(SlotId(2))); - state.free(SlotId(1)); - state.free(SlotId(2)); + state.free(SlotId(1), 0); + state.free(SlotId(2), 0); // ensure everything stays affine assert_eq!(state.alloc(Some(id1)), Some(SlotId(2))); assert_eq!(state.alloc(Some(id2)), Some(SlotId(1))); assert_eq!(state.alloc(Some(id3)), Some(SlotId(0))); - state.free(SlotId(1)); - state.free(SlotId(2)); - state.free(SlotId(0)); + state.free(SlotId(1), 0); + state.free(SlotId(2), 0); + state.free(SlotId(0), 0); // LRU is 1, so that should be picked assert_eq!( @@ -691,9 +746,9 @@ mod test { Some(SlotId(3)) ); - state.free(SlotId(1)); - state.free(SlotId(2)); - state.free(SlotId(3)); + state.free(SlotId(1), 0); + state.free(SlotId(2), 0); + state.free(SlotId(3), 0); // for good measure make sure id3 is still affine assert_eq!(state.alloc(Some(id3)), Some(SlotId(0))); @@ -705,15 +760,15 @@ mod test { assert_eq!(allocator.testing_freelist(), []); let a = allocator.alloc().unwrap(); assert_eq!(allocator.testing_freelist(), []); - allocator.free(a); + allocator.free(a, 0); assert_eq!(allocator.testing_freelist(), [a]); assert_eq!(allocator.alloc(), Some(a)); assert_eq!(allocator.testing_freelist(), []); let b = allocator.alloc().unwrap(); assert_eq!(allocator.testing_freelist(), []); - allocator.free(b); + allocator.free(b, 0); assert_eq!(allocator.testing_freelist(), [b]); - allocator.free(a); + allocator.free(a, 0); assert_eq!(allocator.testing_freelist(), [b, a]); } } diff --git a/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/memory_pool.rs b/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/memory_pool.rs index 2e0f3970c06b..78739b6a8939 100644 --- a/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/memory_pool.rs +++ b/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/memory_pool.rs @@ -449,7 +449,7 @@ impl MemoryPool { } self.pool.stripes[self.stripe_index] .allocator - .free(SlotId(self.striped_allocation_index.0)); + .free(SlotId(self.striped_allocation_index.0), 0); } } } @@ -468,6 +468,7 @@ impl MemoryPool { &self, allocation_index: MemoryAllocationIndex, image: MemoryImageSlot, + bytes_resident: usize, ) { self.return_memory_image_slot(allocation_index, image); @@ -475,7 +476,7 @@ impl MemoryPool { StripedAllocationIndex::from_unstriped_slot_index(allocation_index, self.stripes.len()); self.stripes[stripe_index] .allocator - .free(SlotId(striped_allocation_index.0)); + .free(SlotId(striped_allocation_index.0), bytes_resident); } /// Purging everything related to `module`. @@ -523,7 +524,7 @@ impl MemoryPool { } } - stripe.allocator.free(id); + stripe.allocator.free(id, 0); } } } @@ -599,6 +600,20 @@ impl MemoryPool { ); assert!(matches!(prev, ImageSlot::Unknown)); } + + pub fn unused_warm_slots(&self) -> u32 { + self.stripes + .iter() + .map(|i| i.allocator.unused_warm_slots()) + .sum() + } + + pub fn unused_bytes_resident(&self) -> usize { + self.stripes + .iter() + .map(|i| i.allocator.unused_bytes_resident()) + .sum() + } } /// The index of a memory allocation within an `InstanceAllocator`. diff --git a/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/metrics.rs b/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/metrics.rs index 058bbd72238f..74525fb67351 100644 --- a/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/metrics.rs +++ b/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/metrics.rs @@ -41,6 +41,74 @@ impl PoolingAllocatorMetrics { self.allocator().live_tables.load(Ordering::Relaxed) } + /// Returns the number of WebAssembly stacks currently allocated. + #[cfg(feature = "async")] + pub fn stacks(&self) -> usize { + self.allocator().live_stacks.load(Ordering::Relaxed) + } + + /// Returns the number of WebAssembly GC heaps currently allocated. + #[cfg(feature = "gc")] + pub fn gc_heaps(&self) -> usize { + self.allocator().live_gc_heaps.load(Ordering::Relaxed) + } + + /// Returns the number of slots for linear memories in this allocator which + /// are not currently in use but were previously used. + /// + /// A "warm" slot means that there was a previous instantiation of a memory + /// in that slot. Warm slots are favored in general for allocating new + /// memories over using a slot that has never been used before. + pub fn unused_warm_memories(&self) -> u32 { + self.allocator().memories.unused_warm_slots() + } + + /// Returns the number of bytes in this pooling allocator which are not part + /// of any in-used linear memory slot but were previously used and are kept + /// resident via the `*_keep_resident` configuration options. + pub fn unused_memory_bytes_resident(&self) -> usize { + self.allocator().memories.unused_bytes_resident() + } + + /// Returns the number of slots for tables in this allocator which are not + /// currently in use but were previously used. + /// + /// A "warm" slot means that there was a previous instantiation of a table + /// in that slot. Warm slots are favored in general for allocating new + /// tables over using a slot that has never been used before. + pub fn unused_warm_tables(&self) -> u32 { + self.allocator().tables.unused_warm_slots() + } + + /// Returns the number of bytes in this pooling allocator which are not part + /// of any in-used linear table slot but were previously used and are kept + /// resident via the `*_keep_resident` configuration options. + pub fn unused_table_bytes_resident(&self) -> usize { + self.allocator().tables.unused_bytes_resident() + } + + /// Returns the number of slots for stacks in this allocator which are not + /// currently in use but were previously used. + /// + /// A "warm" slot means that there was a previous use of a stack + /// in that slot. Warm slots are favored in general for allocating new + /// stacks over using a slot that has never been used before. + #[cfg(feature = "async")] + pub fn unused_warm_stacks(&self) -> u32 { + self.allocator().stacks.unused_warm_slots() + } + + /// Returns the number of bytes in this pooling allocator which are not part + /// of any in-used linear stack slot but were previously used and are kept + /// resident via the `*_keep_resident` configuration options. + /// + /// This returns `None` if the `async_stack_zeroing` option is disabled or + /// if the platform doesn't manage stacks (e.g. Windows returns `None`). + #[cfg(feature = "async")] + pub fn unused_stack_bytes_resident(&self) -> Option { + self.allocator().stacks.unused_bytes_resident() + } + fn allocator(&self) -> &PoolingInstanceAllocator { self.engine .allocator() @@ -51,10 +119,13 @@ impl PoolingAllocatorMetrics { #[cfg(test)] mod tests { + use crate::vm::instance::allocator::pooling::StackPool; use crate::{ - Config, InstanceAllocationStrategy, Store, + Config, Enabled, InstanceAllocationStrategy, Module, PoolingAllocationConfig, Result, + Store, component::{Component, Linker}, }; + use std::vec::Vec; use super::*; @@ -69,13 +140,23 @@ mod tests { ) "; + pub(crate) fn small_pool_config() -> PoolingAllocationConfig { + let mut config = PoolingAllocationConfig::new(); + + config.total_memories(10); + config.max_memory_size(2 << 16); + config.total_tables(10); + config.table_elements(10); + config.total_stacks(1); + + config + } + #[test] #[cfg_attr(miri, ignore)] fn smoke_test() { // Start with nothing - let engine = - Engine::new(&Config::new().allocation_strategy(InstanceAllocationStrategy::pooling())) - .unwrap(); + let engine = Engine::new(&Config::new().allocation_strategy(small_pool_config())).unwrap(); let metrics = engine.pooling_allocator_metrics().unwrap(); assert_eq!(metrics.core_instances(), 0); @@ -112,4 +193,169 @@ mod tests { let maybe_metrics = engine.pooling_allocator_metrics(); assert!(maybe_metrics.is_none()); } + + #[test] + #[cfg_attr(any(miri, not(target_os = "linux")), ignore)] + fn unused_memories_tables_and_more() -> Result<()> { + let mut pool = small_pool_config(); + pool.linear_memory_keep_resident(65536); + pool.table_keep_resident(65536); + pool.pagemap_scan(Enabled::Auto); + let mut config = Config::new(); + config.allocation_strategy(pool); + let engine = Engine::new(&config)?; + + let metrics = engine.pooling_allocator_metrics().unwrap(); + let host_page_size = crate::vm::host_page_size(); + + assert_eq!(metrics.memories(), 0); + assert_eq!(metrics.core_instances(), 0); + assert_eq!(metrics.component_instances(), 0); + assert_eq!(metrics.memories(), 0); + assert_eq!(metrics.tables(), 0); + assert_eq!(metrics.unused_warm_memories(), 0); + assert_eq!(metrics.unused_memory_bytes_resident(), 0); + assert_eq!(metrics.unused_warm_tables(), 0); + assert_eq!(metrics.unused_table_bytes_resident(), 0); + + let m1 = Module::new( + &engine, + r#" + (module (memory (export "m") 1) (table 1 funcref)) + "#, + )?; + + let mut store = Store::new(&engine, ()); + crate::Instance::new(&mut store, &m1, &[])?; + assert_eq!(metrics.memories(), 1); + assert_eq!(metrics.tables(), 1); + assert_eq!(metrics.core_instances(), 1); + assert_eq!(metrics.component_instances(), 0); + drop(store); + + assert_eq!(metrics.memories(), 0); + assert_eq!(metrics.tables(), 0); + assert_eq!(metrics.core_instances(), 0); + assert_eq!(metrics.unused_warm_memories(), 1); + assert_eq!(metrics.unused_warm_tables(), 1); + if PoolingAllocationConfig::is_pagemap_scan_available() { + assert_eq!(metrics.unused_memory_bytes_resident(), 0); + assert_eq!(metrics.unused_table_bytes_resident(), host_page_size); + } else { + assert_eq!(metrics.unused_memory_bytes_resident(), 65536); + assert_eq!(metrics.unused_table_bytes_resident(), host_page_size); + } + + let mut store = Store::new(&engine, ()); + let i = crate::Instance::new(&mut store, &m1, &[])?; + assert_eq!(metrics.memories(), 1); + assert_eq!(metrics.tables(), 1); + assert_eq!(metrics.core_instances(), 1); + assert_eq!(metrics.component_instances(), 0); + assert_eq!(metrics.unused_warm_memories(), 0); + assert_eq!(metrics.unused_warm_tables(), 0); + assert_eq!(metrics.unused_memory_bytes_resident(), 0); + assert_eq!(metrics.unused_table_bytes_resident(), 0); + let m = i.get_memory(&mut store, "m").unwrap(); + m.data_mut(&mut store)[0] = 1; + m.grow(&mut store, 1)?; + drop(store); + + assert_eq!(metrics.memories(), 0); + assert_eq!(metrics.tables(), 0); + assert_eq!(metrics.core_instances(), 0); + assert_eq!(metrics.unused_warm_memories(), 1); + assert_eq!(metrics.unused_warm_tables(), 1); + if PoolingAllocationConfig::is_pagemap_scan_available() { + assert_eq!(metrics.unused_memory_bytes_resident(), host_page_size); + assert_eq!(metrics.unused_table_bytes_resident(), host_page_size); + } else { + assert_eq!(metrics.unused_memory_bytes_resident(), 65536); + assert_eq!(metrics.unused_table_bytes_resident(), host_page_size); + } + + let stores = (0..10) + .map(|_| { + let mut store = Store::new(&engine, ()); + crate::Instance::new(&mut store, &m1, &[]).unwrap(); + store + }) + .collect::>(); + + assert_eq!(metrics.memories(), 10); + assert_eq!(metrics.tables(), 10); + assert_eq!(metrics.core_instances(), 10); + assert_eq!(metrics.unused_warm_memories(), 0); + assert_eq!(metrics.unused_warm_tables(), 0); + assert_eq!(metrics.unused_memory_bytes_resident(), 0); + assert_eq!(metrics.unused_table_bytes_resident(), 0); + + drop(stores); + + assert_eq!(metrics.memories(), 00); + assert_eq!(metrics.tables(), 00); + assert_eq!(metrics.core_instances(), 00); + assert_eq!(metrics.unused_warm_memories(), 10); + assert_eq!(metrics.unused_warm_tables(), 10); + if PoolingAllocationConfig::is_pagemap_scan_available() { + assert_eq!(metrics.unused_memory_bytes_resident(), host_page_size); + assert_eq!(metrics.unused_table_bytes_resident(), 10 * host_page_size); + } else { + assert_eq!(metrics.unused_memory_bytes_resident(), 10 * 65536); + assert_eq!(metrics.unused_table_bytes_resident(), 10 * host_page_size); + } + + Ok(()) + } + + #[test] + #[cfg_attr(miri, ignore)] + fn gc_heaps() -> Result<()> { + let pool = small_pool_config(); + let mut config = Config::new(); + config.allocation_strategy(pool); + let engine = Engine::new(&config)?; + + let metrics = engine.pooling_allocator_metrics().unwrap(); + + assert_eq!(metrics.gc_heaps(), 0); + let mut store = Store::new(&engine, ()); + crate::ExternRef::new(&mut store, ())?; + assert_eq!(metrics.gc_heaps(), 1); + drop(store); + assert_eq!(metrics.gc_heaps(), 0); + + Ok(()) + } + + #[tokio::test] + #[cfg_attr(miri, ignore)] + async fn stacks() -> Result<()> { + let pool = small_pool_config(); + let mut config = Config::new(); + config.allocation_strategy(pool); + config.async_support(true); + let engine = Engine::new(&config)?; + + let metrics = engine.pooling_allocator_metrics().unwrap(); + + assert_eq!(metrics.stacks(), 0); + assert_eq!(metrics.unused_warm_stacks(), 0); + let mut store = Store::new(&engine, ()); + + crate::Func::wrap(&mut store, || {}) + .call_async(&mut store, &[], &mut []) + .await?; + assert_eq!(metrics.stacks(), 1); + drop(store); + assert_eq!(metrics.stacks(), 0); + assert_eq!(metrics.unused_stack_bytes_resident(), None); + if StackPool::enabled() { + assert_eq!(metrics.unused_warm_stacks(), 1); + } else { + assert_eq!(metrics.unused_warm_stacks(), 0); + } + + Ok(()) + } } diff --git a/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/table_pool.rs b/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/table_pool.rs index 0047c427ea6a..15a1a5bda1ec 100644 --- a/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/table_pool.rs +++ b/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/table_pool.rs @@ -181,7 +181,7 @@ impl TablePool { } self.pool .index_allocator - .free(SlotId(self.allocation_index.0)); + .free(SlotId(self.allocation_index.0), 0); } } } @@ -196,10 +196,16 @@ impl TablePool { /// /// The caller must have already called `reset_table_pages_to_zero` on the /// memory and flushed any enqueued decommits for this table's memory. - pub unsafe fn deallocate(&self, allocation_index: TableAllocationIndex, table: Table) { + pub unsafe fn deallocate( + &self, + allocation_index: TableAllocationIndex, + table: Table, + bytes_resident: usize, + ) { assert!(table.is_static()); drop(table); - self.index_allocator.free(SlotId(allocation_index.0)); + self.index_allocator + .free(SlotId(allocation_index.0), bytes_resident); } /// Reset the given table's memory to zero. @@ -208,6 +214,9 @@ impl TablePool { /// needs to be decommitted. It is the caller's responsibility to actually /// perform that decommit before this table is reused. /// + /// Returns the number of bytse that are still resident in memory in this + /// table. + /// /// # Safety /// /// This table must not be in active use, and ready for returning to the @@ -218,7 +227,7 @@ impl TablePool { allocation_index: TableAllocationIndex, table: &mut Table, decommit: impl FnMut(*mut u8, usize), - ) { + ) -> usize { assert!(table.is_static()); let base = self.get(allocation_index); let table_byte_size = table.size() * table.element_type().element_size(); @@ -238,6 +247,14 @@ impl TablePool { ) } } + + pub fn unused_warm_slots(&self) -> u32 { + self.index_allocator.unused_warm_slots() + } + + pub fn unused_bytes_resident(&self) -> usize { + self.index_allocator.unused_bytes_resident() + } } #[cfg(test)] diff --git a/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/unix_stack_pool.rs b/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/unix_stack_pool.rs index 12bfe2bdb996..9652c1bc3026 100644 --- a/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/unix_stack_pool.rs +++ b/crates/wasmtime/src/runtime/vm/instance/allocator/pooling/unix_stack_pool.rs @@ -29,6 +29,11 @@ pub struct StackPool { } impl StackPool { + #[cfg(test)] + pub fn enabled() -> bool { + true + } + pub fn new(config: &PoolingInstanceAllocatorConfig) -> Result { use rustix::mm::{MprotectFlags, mprotect}; @@ -146,7 +151,7 @@ impl StackPool { &self, stack: &mut wasmtime_fiber::FiberStack, mut decommit: impl FnMut(*mut u8, usize), - ) { + ) -> usize { assert!(stack.is_from_raw_parts()); assert!( !self.stack_size.is_zero(), @@ -155,7 +160,7 @@ impl StackPool { ); if !self.async_stack_zeroing { - return; + return 0; } let top = stack @@ -203,6 +208,8 @@ impl StackPool { // Use the system to reset remaining stack pages to zero. decommit(bottom_of_stack as _, rest.byte_count()); + + size_to_memset.byte_count() } /// Deallocate a previously-allocated fiber. @@ -214,7 +221,7 @@ impl StackPool { /// /// The caller must have already called `zero_stack` on the fiber stack and /// flushed any enqueued decommits for this stack's memory. - pub unsafe fn deallocate(&self, stack: wasmtime_fiber::FiberStack) { + pub unsafe fn deallocate(&self, stack: wasmtime_fiber::FiberStack, bytes_resident: usize) { assert!(stack.is_from_raw_parts()); let top = stack @@ -239,7 +246,19 @@ impl StackPool { assert!(index < self.max_stacks); let index = u32::try_from(index).unwrap(); - self.index_allocator.free(SlotId(index)); + self.index_allocator.free(SlotId(index), bytes_resident); + } + + pub fn unused_warm_slots(&self) -> u32 { + self.index_allocator.unused_warm_slots() + } + + pub fn unused_bytes_resident(&self) -> Option { + if self.async_stack_zeroing { + Some(self.index_allocator.unused_bytes_resident()) + } else { + None + } } } @@ -286,7 +305,7 @@ mod tests { for stack in stacks { unsafe { - pool.deallocate(stack); + pool.deallocate(stack, 0); } } diff --git a/crates/wasmtime/src/runtime/vm/pagemap_disabled.rs b/crates/wasmtime/src/runtime/vm/pagemap_disabled.rs index 32f24ec08f92..39e91f279b4d 100644 --- a/crates/wasmtime/src/runtime/vm/pagemap_disabled.rs +++ b/crates/wasmtime/src/runtime/vm/pagemap_disabled.rs @@ -13,6 +13,8 @@ impl PageMap { /// Resets `ptr` for `len` bytes. /// +/// Returns the number of bytse that are still resident after this returns. +/// /// # Safety /// /// Requires that `ptr` is valid to read and write for `len` bytes. @@ -23,7 +25,7 @@ pub unsafe fn reset_with_pagemap( mut keep_resident: HostAlignedByteCount, mut reset_manually: impl FnMut(&mut [u8]), mut decommit: impl FnMut(*mut u8, usize), -) { +) -> usize { keep_resident = keep_resident.min(len); // `memset` the first `keep_resident` bytes. @@ -42,5 +44,7 @@ pub unsafe fn reset_with_pagemap( len = len.checked_sub(keep_resident).unwrap(); // decommit the rest of it. - decommit(ptr, len.byte_count()) + decommit(ptr, len.byte_count()); + + keep_resident.byte_count() } diff --git a/crates/wasmtime/src/runtime/vm/sys/unix/pagemap.rs b/crates/wasmtime/src/runtime/vm/sys/unix/pagemap.rs index db647d93667f..5e35266b7ebc 100644 --- a/crates/wasmtime/src/runtime/vm/sys/unix/pagemap.rs +++ b/crates/wasmtime/src/runtime/vm/sys/unix/pagemap.rs @@ -239,7 +239,7 @@ pub unsafe fn reset_with_pagemap( mut keep_resident: HostAlignedByteCount, mut reset_manually: impl FnMut(&mut [u8]), mut decommit: impl FnMut(*mut u8, usize), -) { +) -> usize { keep_resident = keep_resident.min(len); let host_page_size = host_page_size(); @@ -327,12 +327,14 @@ pub unsafe fn reset_with_pagemap( // For all regions that were written in the scan reset them manually, then // afterwards decommit everything else. + let mut bytes_resident = 0; for region in result.regions() { // SAFETY: we're relying on Linux to pass in valid region ranges within // the `ptr/len` we specified to the original syscall. unsafe { reset_manually(&mut *region.region().cast_mut()); } + bytes_resident += region.len(); } // Report everything after `walk_end` to the end of memory as memory that @@ -341,6 +343,8 @@ pub unsafe fn reset_with_pagemap( // memory here will be 0 meaning that this is a noop. let scan_size = result.walk_end().addr() - ptr.addr(); decommit(result.walk_end().cast_mut(), len.byte_count() - scan_size); + + bytes_resident } mod ioctl {