From 3b02ec39aa5c6769b2178d2c06039f9a3249ea97 Mon Sep 17 00:00:00 2001 From: Dakshin Devanand Date: Thu, 24 Apr 2025 21:36:54 +0000 Subject: [PATCH 1/3] feat: Add PVTime support for ARM Adds functionality for pvtime, which displays steal time to guest on ARM machines. PVTime is persisted across snapshots as well (snapshot ver updated). - Added ipa per vCPU for mem region storing steal time info. - Persists this ipa per vCPU across snapshots. - Shared steal time mem region is setup on boot and restore from snapshot in builder.rs. Signed-off-by: Dakshin Devanand --- src/vmm/src/arch/aarch64/vcpu.rs | 49 ++++++++++++++++++++++++++ src/vmm/src/builder.rs | 60 ++++++++++++++++++++++++++++++++ src/vmm/src/persist.rs | 2 +- 3 files changed, 110 insertions(+), 1 deletion(-) diff --git a/src/vmm/src/arch/aarch64/vcpu.rs b/src/vmm/src/arch/aarch64/vcpu.rs index 2c4c55375ed..59c00c3ff86 100644 --- a/src/vmm/src/arch/aarch64/vcpu.rs +++ b/src/vmm/src/arch/aarch64/vcpu.rs @@ -11,6 +11,7 @@ use std::mem::offset_of; use kvm_bindings::*; use kvm_ioctls::{VcpuExit, VcpuFd, VmFd}; use serde::{Deserialize, Serialize}; +use vm_memory::GuestAddress; use super::get_fdt_addr; use super::regs::*; @@ -42,6 +43,8 @@ pub enum VcpuArchError { Fam(vmm_sys_util::fam::Error), /// {0} GetMidrEl1(String), + /// Failed to set/get device attributes for vCPU: {0} + DeviceAttribute(kvm_ioctls::Error), } /// Extract the Manufacturer ID from the host. @@ -115,6 +118,8 @@ pub struct KvmVcpu { /// Vcpu peripherals, such as buses pub peripherals: Peripherals, kvi: kvm_vcpu_init, + /// IPA of steal_time region + pub pvtime_ipa: Option, } /// Vcpu peripherals @@ -148,6 +153,7 @@ impl KvmVcpu { fd: kvm_vcpu, peripherals: Default::default(), kvi, + pvtime_ipa: None, }) } @@ -243,6 +249,8 @@ impl KvmVcpu { // the boot state and turned secondary vcpus on. state.kvi.features[0] &= !(1 << KVM_ARM_VCPU_POWER_OFF); + state.pvtime_ipa = self.pvtime_ipa.map(|guest_addr| guest_addr.0); + Ok(state) } @@ -276,6 +284,13 @@ impl KvmVcpu { } self.set_mpstate(state.mp_state) .map_err(KvmVcpuError::RestoreState)?; + + // Assumes that steal time memory region was set up already + if let Some(pvtime_ipa) = state.pvtime_ipa { + self.enable_pvtime(GuestAddress(pvtime_ipa)) + .map_err(KvmVcpuError::RestoreState)?; + } + Ok(()) } @@ -439,6 +454,38 @@ impl KvmVcpu { pub fn set_mpstate(&self, state: kvm_mp_state) -> Result<(), VcpuArchError> { self.fd.set_mp_state(state).map_err(VcpuArchError::SetMp) } + + /// Check if pvtime (steal time on ARM) is supported for vcpu + pub fn supports_pvtime(&self) -> bool { + let pvtime_device_attr = kvm_bindings::kvm_device_attr { + group: kvm_bindings::KVM_ARM_VCPU_PVTIME_CTRL, + attr: kvm_bindings::KVM_ARM_VCPU_PVTIME_IPA as u64, + addr: 0, + flags: 0, + }; + + // Use kvm_has_device_attr to check if PVTime is supported + self.fd.has_device_attr(&pvtime_device_attr).is_ok() + } + + /// Enables pvtime for vcpu + pub fn enable_pvtime(&mut self, ipa: GuestAddress) -> Result<(), VcpuArchError> { + self.pvtime_ipa = Some(ipa); + + // Use KVM syscall (kvm_set_device_attr) to register the vCPU with the steal_time region + let vcpu_device_attr = kvm_bindings::kvm_device_attr { + group: KVM_ARM_VCPU_PVTIME_CTRL, + attr: KVM_ARM_VCPU_PVTIME_IPA as u64, + addr: &ipa.0 as *const u64 as u64, // userspace address of attr data + flags: 0, + }; + + self.fd + .set_device_attr(&vcpu_device_attr) + .map_err(VcpuArchError::DeviceAttribute)?; + + Ok(()) + } } impl Peripherals { @@ -467,6 +514,8 @@ pub struct VcpuState { pub mpidr: u64, /// kvi states for vcpu initialization. pub kvi: kvm_vcpu_init, + /// ipa for steal_time region + pub pvtime_ipa: Option, } impl Debug for VcpuState { diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 398c25ba056..4a810ee083a 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -15,6 +15,8 @@ use linux_loader::cmdline::Cmdline as LoaderKernelCmdline; use userfaultfd::Uffd; use utils::time::TimestampUs; #[cfg(target_arch = "aarch64")] +use vm_memory::GuestAddress; +#[cfg(target_arch = "aarch64")] use vm_superio::Rtc; use vm_superio::Serial; use vmm_sys_util::eventfd::EventFd; @@ -82,6 +84,9 @@ pub enum StartMicrovmError { CreateLegacyDevice(device_manager::legacy::LegacyDeviceError), /// Error creating VMGenID device: {0} CreateVMGenID(VmGenIdError), + /// Error enabling pvtime on vcpu: {0} + #[cfg(target_arch = "aarch64")] + EnablePVTime(crate::arch::VcpuArchError), /// Invalid Memory Configuration: {0} GuestMemory(crate::vstate::memory::MemoryError), /// Error with initrd initialization: {0}. @@ -289,6 +294,13 @@ pub fn build_microvm_for_boot( attach_vmgenid_device(&mut vmm)?; + #[cfg(target_arch = "aarch64")] + if vcpus[0].kvm_vcpu.supports_pvtime() { + setup_pvtime(&mut vmm, &mut vcpus)?; + } else { + log::warn!("Vcpus do not support pvtime, steal time will not be reported to guest"); + } + configure_system_for_boot( &mut vmm, vcpus.as_mut(), @@ -449,6 +461,16 @@ pub fn build_microvm_from_snapshot( } } + // Restore allocator state + #[cfg(target_arch = "aarch64")] + if let Some(pvtime_ipa) = vcpus[0].kvm_vcpu.pvtime_ipa { + allocate_pvtime_region( + &mut vmm, + vcpus.len(), + vm_allocator::AllocPolicy::ExactMatch(pvtime_ipa.0), + )?; + } + // Restore vcpus kvm state. for (vcpu, state) in vcpus.iter_mut().zip(microvm_state.vcpu_states.iter()) { vcpu.kvm_vcpu @@ -552,6 +574,44 @@ pub fn setup_serial_device( Ok(serial) } +/// 64 bytes due to alignment requirement in 3.1 of https://www.kernel.org/doc/html/v5.8/virt/kvm/devices/vcpu.html#attribute-kvm-arm-vcpu-pvtime-ipa +#[cfg(target_arch = "aarch64")] +const STEALTIME_STRUCT_MEM_SIZE: u64 = 64; + +/// Helper method to allocate steal time region +#[cfg(target_arch = "aarch64")] +fn allocate_pvtime_region( + vmm: &mut Vmm, + vcpu_count: usize, + policy: vm_allocator::AllocPolicy, +) -> Result { + let size = STEALTIME_STRUCT_MEM_SIZE * vcpu_count as u64; + let addr = vmm + .resource_allocator + .allocate_system_memory(size, STEALTIME_STRUCT_MEM_SIZE, policy) + .map_err(StartMicrovmError::AllocateResources)?; + Ok(GuestAddress(addr)) +} + +/// Sets up pvtime for all vcpus +#[cfg(target_arch = "aarch64")] +fn setup_pvtime(vmm: &mut Vmm, vcpus: &mut [Vcpu]) -> Result<(), StartMicrovmError> { + // Alloc sys mem for steal time region + let pvtime_mem: GuestAddress = + allocate_pvtime_region(vmm, vcpus.len(), vm_allocator::AllocPolicy::LastMatch)?; + + // Register all vcpus with pvtime device + for (i, vcpu) in vcpus.iter_mut().enumerate() { + vcpu.kvm_vcpu + .enable_pvtime(GuestAddress( + pvtime_mem.0 + i as u64 * STEALTIME_STRUCT_MEM_SIZE, + )) + .map_err(StartMicrovmError::EnablePVTime)?; + } + + Ok(()) +} + #[cfg(target_arch = "aarch64")] fn attach_legacy_devices_aarch64( event_manager: &mut EventManager, diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index aeacadeb66e..4111d8d6c34 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -148,7 +148,7 @@ pub enum CreateSnapshotError { } /// Snapshot version -pub const SNAPSHOT_VERSION: Version = Version::new(6, 0, 0); +pub const SNAPSHOT_VERSION: Version = Version::new(7, 0, 0); /// Creates a Microvm snapshot. pub fn create_snapshot( From de0aa1b7a866d06413aa817ddbc863c4705af020 Mon Sep 17 00:00:00 2001 From: Dakshin Devanand Date: Thu, 24 Apr 2025 21:42:47 +0000 Subject: [PATCH 2/3] test: Add steal time integration tests Added integration tests checking: - steal time increase - steal time persistence across snapshots - pvtime existence on ARM Motivated by addition of PVTime functionality for ARM. Signed-off-by: Dakshin Devanand --- .../functional/test_steal_time.py | 121 ++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 tests/integration_tests/functional/test_steal_time.py diff --git a/tests/integration_tests/functional/test_steal_time.py b/tests/integration_tests/functional/test_steal_time.py new file mode 100644 index 00000000000..5c607ae4201 --- /dev/null +++ b/tests/integration_tests/functional/test_steal_time.py @@ -0,0 +1,121 @@ +# Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for verifying the PVTime device behavior under contention and across snapshots.""" + +import time + +import pytest + +from framework.properties import global_props + + +def get_steal_time_ms(vm): + """Returns total steal time of vCPUs in VM in milliseconds""" + _, out, _ = vm.ssh.run("grep -w '^cpu' /proc/stat") + steal_time_tck = int(out.strip().split()[8]) + clk_tck = int(vm.ssh.run("getconf CLK_TCK").stdout) + return steal_time_tck / clk_tck * 1000 + + +@pytest.mark.skipif( + global_props.cpu_architecture != "aarch64", reason="Only run in aarch64" +) +def test_guest_has_pvtime_enabled(uvm_plain): + """ + Check that the guest kernel has enabled PV steal time. + """ + vm = uvm_plain + vm.spawn() + vm.basic_config() + vm.add_net_iface() + vm.start() + + _, stdout, _ = vm.ssh.run("dmesg | grep 'stolen time PV'") + assert ( + "stolen time PV" in stdout + ), "Guest kernel did not report PV steal time enabled" + + +def test_pvtime_steal_time_increases(uvm_plain): + """ + Test that PVTime steal time increases when both vCPUs are contended on the same pCPU. + """ + vm = uvm_plain + vm.spawn() + vm.basic_config() + vm.add_net_iface() + vm.start() + + # Pin both vCPUs to the same physical CPU to induce contention + vm.pin_vcpu(0, 0) + vm.pin_vcpu(1, 0) + + # Start two infinite loops to hog CPU time + hog_cmd = "nohup bash -c 'while true; do :; done' >/dev/null 2>&1 &" + vm.ssh.run(hog_cmd) + vm.ssh.run(hog_cmd) + + # Measure before and after steal time + steal_before = get_steal_time_ms(vm) + time.sleep(2) + steal_after = get_steal_time_ms(vm) + + # Require increase in steal time + assert ( + steal_after > steal_before + ), f"Steal time did not increase as expected. Before: {steal_before}, After: {steal_after}" + + +def test_pvtime_snapshot(uvm_plain, microvm_factory): + """ + Test that PVTime steal time is preserved across snapshot/restore + and continues increasing post-resume. + """ + vm = uvm_plain + vm.spawn() + vm.basic_config() + vm.add_net_iface() + vm.start() + + vm.pin_vcpu(0, 0) + vm.pin_vcpu(1, 0) + + hog_cmd = "nohup bash -c 'while true; do :; done' >/dev/null 2>&1 &" + vm.ssh.run(hog_cmd) + vm.ssh.run(hog_cmd) + + # Snapshot pre-steal time + steal_before = get_steal_time_ms(vm) + + snapshot = vm.snapshot_full() + vm.kill() + + # Restore microVM from snapshot and resume + restored_vm = microvm_factory.build() + restored_vm.spawn() + restored_vm.restore_from_snapshot(snapshot, resume=False) + snapshot.delete() + + restored_vm.pin_vcpu(0, 0) + restored_vm.pin_vcpu(1, 0) + restored_vm.resume() + + # Steal time just after restoring + steal_after_snap = get_steal_time_ms(restored_vm) + + time.sleep(2) + + # Steal time after running resumed VM + steal_after_resume = get_steal_time_ms(restored_vm) + + # Ensure steal time persisted and continued increasing + tolerance = 2000 # 2.0 seconds tolerance for persistence check + persisted = ( + steal_before < steal_after_snap and steal_after_snap - steal_before < tolerance + ) + increased = steal_after_resume > steal_after_snap + + assert ( + persisted and increased + ), "Steal time did not persist through snapshot or failed to increase after resume" From 7a0cadf0882ba25e3960d59ed89bf05cad7ff24b Mon Sep 17 00:00:00 2001 From: Dakshin Devanand Date: Thu, 24 Apr 2025 21:46:26 +0000 Subject: [PATCH 3/3] doc: Add changelog entry for PVTime Add a changelog entry to inform about addition of PVTime (steal time) functionality on ARM. Signed-off-by: Dakshin Devanand --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ead297b7b84..c05641e1780 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,9 @@ and this project adheres to ### Added +- [#5139](https://github.com/firecracker-microvm/firecracker/pull/5139): Added + support for [PVTime](https://docs.kernel.org/virt/kvm/arm/pvtime.html). This + is used to support steal time on ARM machines. - [#5048](https://github.com/firecracker-microvm/firecracker/pull/5048): Added support for [PVH boot mode](docs/pvh.md). This is used when an x86 kernel provides the appropriate ELF Note to indicate that PVH boot mode is supported.