From 8e2fba79c6379d9484ff5555c610cd0c3691b40b Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Wed, 25 Sep 2024 12:19:21 +0100 Subject: [PATCH 1/3] refactor: Eliminate `VmResources::track_dirty_pages` getter All other fields of `VmResources` and the contained `VmConfig` struct are accessed directly, with `track_dirty_pages` being the odd one out. Let's make it uniform. Signed-off-by: Patrick Roy --- src/vmm/src/builder.rs | 8 +++----- src/vmm/src/resources.rs | 5 ----- src/vmm/src/rpc_interface.rs | 2 +- 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 9ef8b4f620c..c006c5f4e83 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -252,8 +252,6 @@ pub fn build_microvm_for_boot( .boot_source_builder() .ok_or(MissingKernelConfig)?; - let track_dirty_pages = vm_resources.track_dirty_pages(); - let vhost_user_device_used = vm_resources .block .devices @@ -272,7 +270,7 @@ pub fn build_microvm_for_boot( let guest_memory = if vhost_user_device_used { GuestMemoryMmap::memfd_backed( vm_resources.vm_config.mem_size_mib, - track_dirty_pages, + vm_resources.vm_config.track_dirty_pages, vm_resources.vm_config.huge_pages, ) .map_err(StartMicrovmError::GuestMemory)? @@ -280,7 +278,7 @@ pub fn build_microvm_for_boot( let regions = crate::arch::arch_memory_regions(vm_resources.vm_config.mem_size_mib << 20); GuestMemoryMmap::from_raw_regions( ®ions, - track_dirty_pages, + vm_resources.vm_config.track_dirty_pages, vm_resources.vm_config.huge_pages, ) .map_err(StartMicrovmError::GuestMemory)? @@ -299,7 +297,7 @@ pub fn build_microvm_for_boot( event_manager, guest_memory, None, - track_dirty_pages, + vm_resources.vm_config.track_dirty_pages, vm_resources.vm_config.vcpu_count, cpu_template.kvm_capabilities.clone(), )?; diff --git a/src/vmm/src/resources.rs b/src/vmm/src/resources.rs index c2d2b1c7a9e..431b9f53c89 100644 --- a/src/vmm/src/resources.rs +++ b/src/vmm/src/resources.rs @@ -234,11 +234,6 @@ impl VmResources { Ok(()) } - /// Returns whether dirty page tracking is enabled or not. - pub fn track_dirty_pages(&self) -> bool { - self.vm_config.track_dirty_pages - } - /// Add a custom CPU template to the VM resources /// to configure vCPUs. pub fn set_custom_cpu_template(&mut self, cpu_template: CustomCpuTemplate) { diff --git a/src/vmm/src/rpc_interface.rs b/src/vmm/src/rpc_interface.rs index 68fd1a7c37b..566228fd53a 100644 --- a/src/vmm/src/rpc_interface.rs +++ b/src/vmm/src/rpc_interface.rs @@ -755,7 +755,7 @@ impl RuntimeApiController { log_dev_preview_warning("Virtual machine snapshots", None); if create_params.snapshot_type == SnapshotType::Diff - && !self.vm_resources.track_dirty_pages() + && !self.vm_resources.vm_config.track_dirty_pages { return Err(VmmActionError::NotSupported( "Diff snapshots are not allowed on uVMs with dirty page tracking disabled." From 945d9f3b18a8c9a8d2d26ff4eac961a855f93668 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Wed, 25 Sep 2024 12:30:06 +0100 Subject: [PATCH 2/3] refactor: Extract guest memory allocation into function Move guest memory allocation into a function of `VmResources`. The configuration we end up allocating depends solely on the information in `VmResources`, and this allows us to easily use production guest memory configurations in benchmarks. Signed-off-by: Patrick Roy --- src/vmm/src/builder.rs | 36 ++++-------------------------------- src/vmm/src/resources.rs | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 32 deletions(-) diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index c006c5f4e83..6bbfec6fabb 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -64,7 +64,7 @@ use crate::utils::u64_to_usize; use crate::vmm_config::boot_source::BootConfig; use crate::vmm_config::instance_info::InstanceInfo; use crate::vmm_config::machine_config::{VmConfig, VmConfigError}; -use crate::vstate::memory::{GuestAddress, GuestMemory, GuestMemoryExtension, GuestMemoryMmap}; +use crate::vstate::memory::{GuestAddress, GuestMemory, GuestMemoryMmap}; use crate::vstate::vcpu::{Vcpu, VcpuConfig, VcpuError}; use crate::vstate::vm::Vm; use crate::{device_manager, EventManager, Vmm, VmmError}; @@ -252,37 +252,9 @@ pub fn build_microvm_for_boot( .boot_source_builder() .ok_or(MissingKernelConfig)?; - let vhost_user_device_used = vm_resources - .block - .devices - .iter() - .any(|b| b.lock().expect("Poisoned lock").is_vhost_user()); - - // Page faults are more expensive for shared memory mapping, including memfd. - // For this reason, we only back guest memory with a memfd - // if a vhost-user-blk device is configured in the VM, otherwise we fall back to - // an anonymous private memory. - // - // The vhost-user-blk branch is not currently covered by integration tests in Rust, - // because that would require running a backend process. If in the future we converge to - // a single way of backing guest memory for vhost-user and non-vhost-user cases, - // that would not be worth the effort. - let guest_memory = if vhost_user_device_used { - GuestMemoryMmap::memfd_backed( - vm_resources.vm_config.mem_size_mib, - vm_resources.vm_config.track_dirty_pages, - vm_resources.vm_config.huge_pages, - ) - .map_err(StartMicrovmError::GuestMemory)? - } else { - let regions = crate::arch::arch_memory_regions(vm_resources.vm_config.mem_size_mib << 20); - GuestMemoryMmap::from_raw_regions( - ®ions, - vm_resources.vm_config.track_dirty_pages, - vm_resources.vm_config.huge_pages, - ) - .map_err(StartMicrovmError::GuestMemory)? - }; + let guest_memory = vm_resources + .allocate_guest_memory() + .map_err(StartMicrovmError::GuestMemory)?; let entry_addr = load_kernel(boot_config, &guest_memory)?; let initrd = load_initrd_from_config(boot_config, &guest_memory)?; diff --git a/src/vmm/src/resources.rs b/src/vmm/src/resources.rs index 431b9f53c89..a4d15641975 100644 --- a/src/vmm/src/resources.rs +++ b/src/vmm/src/resources.rs @@ -28,6 +28,7 @@ use crate::vmm_config::metrics::{init_metrics, MetricsConfig, MetricsConfigError use crate::vmm_config::mmds::{MmdsConfig, MmdsConfigError}; use crate::vmm_config::net::*; use crate::vmm_config::vsock::*; +use crate::vstate::memory::{GuestMemoryExtension, GuestMemoryMmap, MemoryError}; /// Errors encountered when configuring microVM resources. #[derive(Debug, thiserror::Error, displaydoc::Display)] @@ -468,6 +469,42 @@ impl VmResources { Ok(()) } + + /// Allocates guest memory in a configuration most appropriate for these [`VmResources`]. + /// + /// If vhost-user-blk devices are in use, allocates memfd-backed shared memory, otherwise + /// prefers anonymous memory for performance reasons. + pub fn allocate_guest_memory(&self) -> Result { + let vhost_user_device_used = self + .block + .devices + .iter() + .any(|b| b.lock().expect("Poisoned lock").is_vhost_user()); + + // Page faults are more expensive for shared memory mapping, including memfd. + // For this reason, we only back guest memory with a memfd + // if a vhost-user-blk device is configured in the VM, otherwise we fall back to + // an anonymous private memory. + // + // The vhost-user-blk branch is not currently covered by integration tests in Rust, + // because that would require running a backend process. If in the future we converge to + // a single way of backing guest memory for vhost-user and non-vhost-user cases, + // that would not be worth the effort. + if vhost_user_device_used { + GuestMemoryMmap::memfd_backed( + self.vm_config.mem_size_mib, + self.vm_config.track_dirty_pages, + self.vm_config.huge_pages, + ) + } else { + let regions = crate::arch::arch_memory_regions(self.vm_config.mem_size_mib << 20); + GuestMemoryMmap::from_raw_regions( + ®ions, + self.vm_config.track_dirty_pages, + self.vm_config.huge_pages, + ) + } + } } impl From<&VmResources> for VmmConfig { From 1bcd9ed81b250374527122ed9f8a4fdd664b6a50 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Wed, 25 Sep 2024 13:16:21 +0100 Subject: [PATCH 3/3] bench: add benchmark for page fault latencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a benchmark that attempts to capture the latency of a single page fault in a guest memory configuration that we also use in production. This should catch regressions such as the 1.6 memfd/vhost post snapshot-restore latency regression, and does indeed do so: MAP_ANONYMOUS: Running benches/memory_access.rs page_fault time: [2.8611 µs 2.9327 µs 3.0170 µs] Found 9 outliers among 100 measurements (9.00%) 4 (4.00%) high mild 5 (5.00%) high severe memfd_create: Running benches/memory_access.rs page_fault time: [5.7449 µs 5.8779 µs 6.0450 µs] change: [+85.649% +92.231% +98.074%] (p = 0.00 < 0.05) Performance has regressed. Found 8 outliers among 100 measurements (8.00%) 5 (5.00%) low mild 2 (2.00%) high mild 1 (1.00%) high severe Add the same benchmark for huge pages, just in case. Funnily enough, there is no vhost-tax to be paid for huge pages. Signed-off-by: Patrick Roy --- src/vmm/Cargo.toml | 4 ++ src/vmm/benches/memory_access.rs | 70 ++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 src/vmm/benches/memory_access.rs diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index bee314c0ac1..267e27624f5 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -70,5 +70,9 @@ harness = false name = "block_request" harness = false +[[bench]] +name = "memory_access" +harness = false + [lints] workspace = true diff --git a/src/vmm/benches/memory_access.rs b/src/vmm/benches/memory_access.rs new file mode 100644 index 00000000000..81742d1ce37 --- /dev/null +++ b/src/vmm/benches/memory_access.rs @@ -0,0 +1,70 @@ +// Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use criterion::{criterion_group, criterion_main, BatchSize, Criterion}; +use vm_memory::{GuestAddress, GuestMemory}; +use vmm::resources::VmResources; +use vmm::vmm_config::machine_config::{HugePageConfig, VmConfig}; + +fn bench_single_page_fault(c: &mut Criterion, configuration: VmResources) { + c.bench_function("page_fault", |b| { + b.iter_batched( + || { + let memory = configuration.allocate_guest_memory().unwrap(); + let ptr = memory + .get_slice(GuestAddress(0), 1) + .unwrap() + .ptr_guard_mut() + .as_ptr(); + + // fine to return both here, because ptr is not a reference into `memory` (e.g. no + // self-referential structs are happening here) + (memory, ptr) + }, + |(_, ptr)| unsafe { + // Cause a single page fault + ptr.write_volatile(1); + }, + BatchSize::SmallInput, + ) + }); +} + +pub fn bench_4k_page_fault(c: &mut Criterion) { + bench_single_page_fault( + c, + VmResources { + vm_config: VmConfig { + vcpu_count: 1, + mem_size_mib: 2, + ..Default::default() + }, + ..Default::default() + }, + ) +} + +pub fn bench_2m_page_fault(c: &mut Criterion) { + bench_single_page_fault( + c, + VmResources { + vm_config: VmConfig { + vcpu_count: 1, + mem_size_mib: 2, + huge_pages: HugePageConfig::Hugetlbfs2M, + ..Default::default() + }, + ..Default::default() + }, + ) +} + +criterion_group! { + name = memory_access_benches; + config = Criterion::default().noise_threshold(0.05); + targets = bench_4k_page_fault, bench_2m_page_fault +} + +criterion_main! { + memory_access_benches +}