diff --git a/src/vmm/src/arch/aarch64/vcpu.rs b/src/vmm/src/arch/aarch64/vcpu.rs index 59c00c3ff86..5d49dacac19 100644 --- a/src/vmm/src/arch/aarch64/vcpu.rs +++ b/src/vmm/src/arch/aarch64/vcpu.rs @@ -41,25 +41,18 @@ pub enum VcpuArchError { SetMp(kvm_ioctls::Error), /// Failed FamStructWrapper operation: {0} Fam(vmm_sys_util::fam::Error), - /// {0} - GetMidrEl1(String), /// Failed to set/get device attributes for vCPU: {0} DeviceAttribute(kvm_ioctls::Error), } /// Extract the Manufacturer ID from the host. /// The ID is found between bits 24-31 of MIDR_EL1 register. -pub fn get_manufacturer_id_from_host() -> Result { +pub fn get_manufacturer_id_from_host() -> Option { let midr_el1_path = "/sys/devices/system/cpu/cpu0/regs/identification/midr_el1"; - let midr_el1 = std::fs::read_to_string(midr_el1_path).map_err(|err| { - VcpuArchError::GetMidrEl1(format!("Failed to get MIDR_EL1 from host path: {err}")) - })?; + let midr_el1 = std::fs::read_to_string(midr_el1_path).ok()?; let midr_el1_trimmed = midr_el1.trim_end().trim_start_matches("0x"); - let manufacturer_id = u32::from_str_radix(midr_el1_trimmed, 16).map_err(|err| { - VcpuArchError::GetMidrEl1(format!("Invalid MIDR_EL1 found on host: {err}",)) - })?; - - Ok(manufacturer_id >> 24) + let manufacturer_id = u32::from_str_radix(midr_el1_trimmed, 16).ok()?; + Some(manufacturer_id >> 24) } /// Saves states of registers into `state`. diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index 29f3b0148ac..2a923637e93 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -375,8 +375,6 @@ impl Vmm { })?; } - Vcpu::register_kick_signal_handler(); - self.vcpus_handles.reserve(vcpu_count); for mut vcpu in vcpus.drain(..) { diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index 4699b80b185..a31febcfb13 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -255,22 +255,22 @@ pub fn validate_cpu_manufacturer_id(microvm_state: &MicrovmState) { let host_cpu_id = get_manufacturer_id_from_host(); let snapshot_cpu_id = microvm_state.vcpu_states[0].regs.manifacturer_id(); match (host_cpu_id, snapshot_cpu_id) { - (Ok(host_id), Some(snapshot_id)) => { + (Some(host_id), Some(snapshot_id)) => { info!("Host CPU manufacturer ID: {host_id:?}"); info!("Snapshot CPU manufacturer ID: {snapshot_id:?}"); if host_id != snapshot_id { warn!("Host CPU manufacturer ID differs from the snapshotted one",); } } - (Ok(host_id), None) => { + (Some(host_id), None) => { info!("Host CPU manufacturer ID: {host_id:?}"); warn!("Snapshot CPU manufacturer ID: couldn't get from the snapshot"); } - (Err(_), Some(snapshot_id)) => { + (None, Some(snapshot_id)) => { warn!("Host CPU manufacturer ID: couldn't get from the host"); info!("Snapshot CPU manufacturer ID: {snapshot_id:?}"); } - (Err(_), None) => { + (None, None) => { warn!("Host CPU manufacturer ID: couldn't get from the host"); warn!("Snapshot CPU manufacturer ID: couldn't get from the snapshot"); } diff --git a/src/vmm/src/vstate/kvm.rs b/src/vmm/src/vstate/kvm.rs index fb192d724d1..371b01cf70c 100644 --- a/src/vmm/src/vstate/kvm.rs +++ b/src/vmm/src/vstate/kvm.rs @@ -80,8 +80,11 @@ impl Kvm { } /// Returns the maximal number of memslots allowed in a [`Vm`] - pub fn max_nr_memslots(&self) -> usize { - self.fd.get_nr_memslots() + pub fn max_nr_memslots(&self) -> u32 { + self.fd + .get_nr_memslots() + .try_into() + .expect("Number of vcpus reported by KVM exceeds u32::MAX") } } diff --git a/src/vmm/src/vstate/vcpu.rs b/src/vmm/src/vstate/vcpu.rs index fdd73b9c175..8b6298079f3 100644 --- a/src/vmm/src/vstate/vcpu.rs +++ b/src/vmm/src/vstate/vcpu.rs @@ -5,7 +5,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. -use std::cell::Cell; +use std::cell::RefCell; #[cfg(feature = "gdb")] use std::os::fd::AsRawFd; use std::sync::atomic::{Ordering, fence}; @@ -14,9 +14,9 @@ use std::sync::{Arc, Barrier}; use std::{fmt, io, thread}; use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN}; -use kvm_ioctls::VcpuExit; #[cfg(feature = "gdb")] use kvm_ioctls::VcpuFd; +use kvm_ioctls::{KvmRunWrapper, VcpuExit}; use libc::{c_int, c_void, siginfo_t}; use log::{error, info, warn}; use vmm_sys_util::errno; @@ -51,8 +51,6 @@ pub enum VcpuError { VcpuResponse(KvmVcpuError), /// Cannot spawn a new vCPU thread: {0} VcpuSpawn(io::Error), - /// Cannot clean init vcpu TLS - VcpuTlsInit, /// Vcpu not present in TLS VcpuTlsNotPresent, /// Error with gdb request sent @@ -71,9 +69,6 @@ pub struct VcpuConfig { pub cpu_config: CpuConfiguration, } -// Using this for easier explicit type-casting to help IDEs interpret the code. -type VcpuCell = Cell>; - /// Error type for [`Vcpu::start_threaded`]. #[derive(Debug, derive_more::From, thiserror::Error)] #[error("Failed to spawn vCPU thread: {0}")] @@ -90,6 +85,11 @@ pub enum CopyKvmFdError { CreateVcpuError(#[from] kvm_ioctls::Error), } +// Stores the mmap region of `kvm_run` struct for the current Vcpu. This allows for the +// signal handler to safely access the `kvm_run` even when Vcpu is dropped and vcpu fd +// is closed. +thread_local!(static TLS_VCPU_PTR: RefCell> = const { RefCell::new(None) }); + /// A wrapper around creating and using a vcpu. #[derive(Debug)] pub struct Vcpu { @@ -112,82 +112,37 @@ pub struct Vcpu { } impl Vcpu { - thread_local!(static TLS_VCPU_PTR: VcpuCell = const { Cell::new(None) }); - /// Associates `self` with the current thread. /// /// It is a prerequisite to successfully run `init_thread_local_data()` before using /// `run_on_thread_local()` on the current thread. - /// This function will return an error if there already is a `Vcpu` present in the TLS. - fn init_thread_local_data(&mut self) -> Result<(), VcpuError> { - Self::TLS_VCPU_PTR.with(|cell: &VcpuCell| { - if cell.get().is_some() { - return Err(VcpuError::VcpuTlsInit); - } - cell.set(Some(self as *mut Vcpu)); - Ok(()) - }) - } - - /// Deassociates `self` from the current thread. - /// - /// Should be called if the current `self` had called `init_thread_local_data()` and - /// now needs to move to a different thread. - /// - /// Fails if `self` was not previously associated with the current thread. - fn reset_thread_local_data(&mut self) -> Result<(), VcpuError> { - // Best-effort to clean up TLS. If the `Vcpu` was moved to another thread - // _before_ running this, then there is nothing we can do. - Self::TLS_VCPU_PTR.with(|cell: &VcpuCell| { - if let Some(vcpu_ptr) = cell.get() { - if std::ptr::eq(vcpu_ptr, self) { - Self::TLS_VCPU_PTR.with(|cell: &VcpuCell| cell.take()); - return Ok(()); - } - } - Err(VcpuError::VcpuTlsNotPresent) - }) - } - - /// Runs `func` for the `Vcpu` associated with the current thread. - /// - /// It requires that `init_thread_local_data()` was run on this thread. - /// - /// Fails if there is no `Vcpu` associated with the current thread. - /// - /// # Safety - /// - /// This is marked unsafe as it allows temporary aliasing through - /// dereferencing from pointer an already borrowed `Vcpu`. - unsafe fn run_on_thread_local(func: F) -> Result<(), VcpuError> - where - F: FnOnce(&mut Vcpu), - { - Self::TLS_VCPU_PTR.with(|cell: &VcpuCell| { - if let Some(vcpu_ptr) = cell.get() { - // SAFETY: Dereferencing here is safe since `TLS_VCPU_PTR` is populated/non-empty, - // and it is being cleared on `Vcpu::drop` so there is no dangling pointer. - let vcpu_ref = unsafe { &mut *vcpu_ptr }; - func(vcpu_ref); - Ok(()) - } else { - Err(VcpuError::VcpuTlsNotPresent) - } + /// This function will panic if there already is a `Vcpu` present in the TLS. + fn init_thread_local_data(&mut self) { + // Use of `kvm_run` size here is safe because we only + // care about `immediate_exit` field which is at byte offset of 2. + let kvm_run_ptr = KvmRunWrapper::mmap_from_fd( + &self.kvm_vcpu.fd, + std::mem::size_of::(), + ) + .unwrap(); + TLS_VCPU_PTR.with(|cell| { + assert!(cell.borrow().is_none()); + *cell.borrow_mut() = Some(kvm_run_ptr); }) } /// Registers a signal handler which makes use of TLS and kvm immediate exit to /// kick the vcpu running on the current thread, if there is one. - pub fn register_kick_signal_handler() { + fn register_kick_signal_handler(&mut self) { + self.init_thread_local_data(); + extern "C" fn handle_signal(_: c_int, _: *mut siginfo_t, _: *mut c_void) { - // SAFETY: This is safe because it's temporarily aliasing the `Vcpu` object, but we are - // only reading `vcpu.fd` which does not change for the lifetime of the `Vcpu`. - unsafe { - let _ = Vcpu::run_on_thread_local(|vcpu| { - vcpu.kvm_vcpu.fd.set_kvm_immediate_exit(1); + TLS_VCPU_PTR.with(|cell| { + if let Some(kvm_run_ptr) = &mut *cell.borrow_mut() { + kvm_run_ptr.as_mut_ref().immediate_exit = 1; fence(Ordering::Release); - }); - } + } + }) } register_signal_handler(sigrtmin() + VCPU_RTSIG_OFFSET, handle_signal) @@ -254,8 +209,7 @@ impl Vcpu { .name(format!("fc_vcpu {}", self.kvm_vcpu.index)) .spawn(move || { let filter = &*seccomp_filter; - self.init_thread_local_data() - .expect("Cannot cleanly initialize vcpu TLS."); + self.register_kick_signal_handler(); // Synchronization to make sure thread local data is initialized. barrier.wait(); self.run(filter); @@ -617,12 +571,6 @@ fn handle_kvm_exit( } } -impl Drop for Vcpu { - fn drop(&mut self) { - let _ = self.reset_thread_local_data(); - } -} - /// List of events that the Vcpu can receive. #[derive(Debug, Clone)] pub enum VcpuEvent { @@ -959,7 +907,6 @@ pub(crate) mod tests { } fn vcpu_configured_for_boot() -> (Vm, VcpuHandle, EventFd) { - Vcpu::register_kick_signal_handler(); // Need enough mem to boot linux. let mem_size = mib_to_bytes(64); let (kvm, vm, mut vcpu) = setup_vcpu(mem_size); @@ -1025,48 +972,15 @@ pub(crate) mod tests { } #[test] - fn test_vcpu_tls() { - let (_, _, mut vcpu) = setup_vcpu(0x1000); - - // Running on the TLS vcpu should fail before we actually initialize it. - unsafe { - Vcpu::run_on_thread_local(|_| ()).unwrap_err(); - } - - // Initialize vcpu TLS. - vcpu.init_thread_local_data().unwrap(); - - // Validate TLS vcpu is the local vcpu by changing the `id` then validating against - // the one in TLS. - vcpu.kvm_vcpu.index = 12; - unsafe { - Vcpu::run_on_thread_local(|v| assert_eq!(v.kvm_vcpu.index, 12)).unwrap(); - } - - // Reset vcpu TLS. - vcpu.reset_thread_local_data().unwrap(); - - // Running on the TLS vcpu after TLS reset should fail. - unsafe { - Vcpu::run_on_thread_local(|_| ()).unwrap_err(); - } - - // Second reset should return error. - vcpu.reset_thread_local_data().unwrap_err(); - } - - #[test] - fn test_invalid_tls() { + #[should_panic] + fn test_tls_double_init() { let (_, _, mut vcpu) = setup_vcpu(0x1000); - // Initialize vcpu TLS. - vcpu.init_thread_local_data().unwrap(); - // Trying to initialize non-empty TLS should error. - vcpu.init_thread_local_data().unwrap_err(); + vcpu.init_thread_local_data(); + vcpu.init_thread_local_data(); } #[test] fn test_vcpu_kick() { - Vcpu::register_kick_signal_handler(); let (_, vm, mut vcpu) = setup_vcpu(0x1000); let mut kvm_run = @@ -1080,7 +994,7 @@ pub(crate) mod tests { let handle = std::thread::Builder::new() .name("test_vcpu_kick".to_string()) .spawn(move || { - vcpu.init_thread_local_data().unwrap(); + vcpu.register_kick_signal_handler(); // Notify TLS was populated. vcpu_barrier.wait(); // Loop for max 1 second to check if the signal handler has run. diff --git a/src/vmm/src/vstate/vm.rs b/src/vmm/src/vstate/vm.rs index 7a8965a4b9a..168d4a1cfdd 100644 --- a/src/vmm/src/vstate/vm.rs +++ b/src/vmm/src/vstate/vm.rs @@ -31,7 +31,7 @@ use crate::{DirtyBitmap, Vcpu, mem_size_mib}; pub struct VmCommon { /// The KVM file descriptor used to access this Vm. pub fd: VmFd, - max_memslots: usize, + max_memslots: u32, /// The guest memory of this Vm. pub guest_memory: GuestMemoryMmap, } @@ -51,8 +51,8 @@ pub enum VmError { EventFd(std::io::Error), /// Failed to create vcpu: {0} CreateVcpu(VcpuError), - /// The number of configured slots is bigger than the maximum reported by KVM - NotEnoughMemorySlots, + /// The number of configured slots is bigger than the maximum reported by KVM: {0} + NotEnoughMemorySlots(u32), /// Memory Error: {0} VmMemory(#[from] vm_memory::Error), } @@ -142,9 +142,9 @@ impl Vm { .guest_memory() .num_regions() .try_into() - .map_err(|_| VmError::NotEnoughMemorySlots)?; - if next_slot as usize >= self.common.max_memslots { - return Err(VmError::NotEnoughMemorySlots); + .expect("Number of existing memory regions exceeds u32::MAX"); + if self.common.max_memslots <= next_slot { + return Err(VmError::NotEnoughMemorySlots(self.common.max_memslots)); } let flags = if region.bitmap().is_some() { @@ -362,13 +362,12 @@ pub(crate) mod tests { let res = vm.register_memory_region(region); - if i >= max_nr_regions { + if max_nr_regions <= i { assert!( - matches!(res, Err(VmError::NotEnoughMemorySlots)), - "{:?} at iteration {} - max_nr_memslots: {}", + matches!(res, Err(VmError::NotEnoughMemorySlots(v)) if v == max_nr_regions), + "{:?} at iteration {}", res, - i, - max_nr_regions + i ); } else { res.unwrap_or_else(|_| {