diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index dbfe4232381..6bd81c46f18 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -182,18 +182,6 @@ pub fn build_microvm_for_boot( let entry_point = load_kernel(&boot_config.kernel_file, vm.guest_memory())?; let initrd = InitrdConfig::from_config(boot_config, vm.guest_memory())?; - #[cfg(feature = "gdb")] - let (gdb_tx, gdb_rx) = mpsc::channel(); - #[cfg(feature = "gdb")] - vcpus - .iter_mut() - .for_each(|vcpu| vcpu.attach_debug_info(gdb_tx.clone())); - #[cfg(feature = "gdb")] - let vcpu_fds = vcpus - .iter() - .map(|vcpu| vcpu.copy_kvm_vcpu_fd(&vm)) - .collect::, _>>()?; - if vm_resources.pci_enabled { device_manager.enable_pci(&vm)?; } else { @@ -291,21 +279,21 @@ pub fn build_microvm_for_boot( vcpus_exit_evt, device_manager, }; - let vmm = Arc::new(Mutex::new(vmm)); #[cfg(feature = "gdb")] - if let Some(gdb_socket_path) = &vm_resources.machine_config.gdb_socket_path { - gdb::gdb_thread( - vmm.clone(), - vcpu_fds, - gdb_rx, - entry_point.entry_addr, - gdb_socket_path, - ) - .map_err(StartMicrovmError::GdbServer)?; - } else { - debug!("No GDB socket provided not starting gdb server."); + { + let (gdb_tx, gdb_rx) = mpsc::channel(); + vcpus + .iter_mut() + .for_each(|vcpu| vcpu.attach_debug_info(gdb_tx.clone())); + + if let Some(gdb_socket_path) = &vm_resources.machine_config.gdb_socket_path { + gdb::gdb_thread(vmm.clone(), gdb_rx, entry_point.entry_addr, gdb_socket_path) + .map_err(StartMicrovmError::GdbServer)?; + } else { + debug!("No GDB socket provided not starting gdb server."); + } } // Move vcpus to their own threads and start their state machine in the 'Paused' state. diff --git a/src/vmm/src/gdb/event_loop.rs b/src/vmm/src/gdb/event_loop.rs index 35fe2039f77..13d1f438a47 100644 --- a/src/vmm/src/gdb/event_loop.rs +++ b/src/vmm/src/gdb/event_loop.rs @@ -11,7 +11,6 @@ use gdbstub::conn::{Connection, ConnectionExt}; use gdbstub::stub::run_blocking::{self, WaitForStopReasonError}; use gdbstub::stub::{DisconnectReason, GdbStub, MultiThreadStopReason}; use gdbstub::target::Target; -use kvm_ioctls::VcpuFd; use vm_memory::GuestAddress; use super::target::{FirecrackerTarget, GdbTargetError, vcpuid_to_tid}; @@ -22,11 +21,10 @@ use crate::logger::{error, trace}; pub fn event_loop( connection: UnixStream, vmm: Arc>, - vcpu_fds: Vec, gdb_event_receiver: Receiver, entry_addr: GuestAddress, ) { - let target = FirecrackerTarget::new(vmm, vcpu_fds, gdb_event_receiver, entry_addr); + let target = FirecrackerTarget::new(vmm, gdb_event_receiver, entry_addr); let connection: Box> = { Box::new(connection) }; let debugger = GdbStub::new(connection); diff --git a/src/vmm/src/gdb/mod.rs b/src/vmm/src/gdb/mod.rs index 23d9965b1aa..61c5c6c6bf1 100644 --- a/src/vmm/src/gdb/mod.rs +++ b/src/vmm/src/gdb/mod.rs @@ -15,7 +15,6 @@ use std::sync::{Arc, Mutex}; use arch::vcpu_set_debug; use event_loop::event_loop; -use kvm_ioctls::VcpuFd; use target::GdbTargetError; use vm_memory::GuestAddress; @@ -35,7 +34,6 @@ use crate::logger::trace; /// communcation to the GDB server pub fn gdb_thread( vmm: Arc>, - vcpu_fds: Vec, gdb_event_receiver: Receiver, entry_addr: GuestAddress, socket_addr: &str, @@ -44,10 +42,12 @@ pub fn gdb_thread( // to be stopped as it connects. This also allows us to set breakpoints before kernel starts. // This entry adddress is automatically used as it is not tracked inside the target state, so // when resumed will be removed - vcpu_set_debug(&vcpu_fds[0], &[entry_addr], false)?; - - for vcpu_fd in &vcpu_fds[1..] { - vcpu_set_debug(vcpu_fd, &[], false)?; + { + let vmm = vmm.lock().unwrap(); + vcpu_set_debug(&vmm.vcpus_handles[0].vcpu_fd, &[entry_addr], false)?; + for handle in &vmm.vcpus_handles[1..] { + vcpu_set_debug(&handle.vcpu_fd, &[], false)?; + } } let path = Path::new(socket_addr); @@ -59,7 +59,7 @@ pub fn gdb_thread( std::thread::Builder::new() .name("gdb".into()) - .spawn(move || event_loop(connection, vmm, vcpu_fds, gdb_event_receiver, entry_addr)) + .spawn(move || event_loop(connection, vmm, gdb_event_receiver, entry_addr)) .map_err(|_| GdbTargetError::GdbThreadError)?; Ok(()) diff --git a/src/vmm/src/gdb/target.rs b/src/vmm/src/gdb/target.rs index 666861a0adb..7c943f8efd1 100644 --- a/src/vmm/src/gdb/target.rs +++ b/src/vmm/src/gdb/target.rs @@ -27,7 +27,6 @@ use gdbstub_arch::aarch64::reg::AArch64CoreRegs as CoreRegs; use gdbstub_arch::x86::X86_64_SSE as GdbArch; #[cfg(target_arch = "x86_64")] use gdbstub_arch::x86::reg::X86_64CoreRegs as CoreRegs; -use kvm_ioctls::VcpuFd; use vm_memory::{Bytes, GuestAddress, GuestMemoryError}; use super::arch; @@ -39,38 +38,18 @@ use crate::utils::u64_to_usize; use crate::vstate::vcpu::VcpuSendEventError; use crate::{FcExitCode, VcpuEvent, VcpuResponse, Vmm}; -#[derive(Debug)] +#[derive(Debug, Default, Clone, Copy)] /// Stores the current state of a Vcpu with a copy of the Vcpu file descriptor struct VcpuState { single_step: bool, paused: bool, - vcpu_fd: VcpuFd, } impl VcpuState { - /// Constructs a new instance of a VcpuState from a VcpuFd - fn from_vcpu_fd(vcpu_fd: VcpuFd) -> Self { - Self { - single_step: false, - paused: false, - vcpu_fd, - } - } - /// Disables single stepping on the Vcpu state fn reset_vcpu_state(&mut self) { self.single_step = false; } - - /// Updates the kvm debug flags set against the Vcpu with a check - fn update_kvm_debug(&self, hw_breakpoints: &[GuestAddress]) -> Result<(), GdbTargetError> { - if !self.paused { - info!("Attempted to update kvm debug on a non paused Vcpu"); - return Ok(()); - } - - arch::vcpu_set_debug(&self.vcpu_fd, hw_breakpoints, self.single_step) - } } /// Errors from interactions between GDB and the VMM @@ -179,13 +158,8 @@ impl FirecrackerTarget { /// Creates a new Target for GDB stub. This is used as the layer between GDB and the VMM it /// will handle requests from GDB and perform the appropriate actions, while also updating GDB /// with the state of the VMM / Vcpu's as we hit debug events - pub fn new( - vmm: Arc>, - vcpu_fds: Vec, - gdb_event: Receiver, - entry_addr: GuestAddress, - ) -> Self { - let mut vcpu_state: Vec<_> = vcpu_fds.into_iter().map(VcpuState::from_vcpu_fd).collect(); + pub fn new(vmm: Arc>, gdb_event: Receiver, entry_addr: GuestAddress) -> Self { + let mut vcpu_state = vec![VcpuState::default(); vmm.lock().unwrap().vcpus_handles.len()]; // By default vcpu 1 will be paused at the entry point vcpu_state[0].paused = true; @@ -202,16 +176,37 @@ impl FirecrackerTarget { } } + // Update KVM debug info for a specific vcpu index. + fn update_vcpu_kvm_debug( + &self, + vcpu_idx: usize, + hw_breakpoints: &[GuestAddress], + ) -> Result<(), GdbTargetError> { + let state = &self.vcpu_state[vcpu_idx]; + if !state.paused { + info!("Attempted to update kvm debug on a non paused Vcpu"); + return Ok(()); + } + + let vcpu_fd = &self.vmm.lock().unwrap().vcpus_handles[vcpu_idx].vcpu_fd; + arch::vcpu_set_debug(vcpu_fd, hw_breakpoints, state.single_step) + } + + /// Translate guest virtual address to guest pysical address. + fn translate_gva(&self, vcpu_idx: usize, addr: u64) -> Result { + let vmm = self.vmm.lock().unwrap(); + let vcpu_fd = &vmm.vcpus_handles[vcpu_idx].vcpu_fd; + arch::translate_gva(vcpu_fd, addr, &vmm) + } + /// Retrieves the currently paused Vcpu id returns an error if there is no currently paused Vcpu fn get_paused_vcpu_id(&self) -> Result { self.paused_vcpu.ok_or(GdbTargetError::NoPausedVcpu) } - /// Retrieves the currently paused Vcpu state returns an error if there is no currently paused - /// Vcpu - fn get_paused_vcpu(&self) -> Result<&VcpuState, GdbTargetError> { - let vcpu_index = tid_to_vcpuid(self.get_paused_vcpu_id()?); - Ok(&self.vcpu_state[vcpu_index]) + /// Returns the index of the paused vcpu. + fn get_paused_vcpu_idx(&self) -> Result { + Ok(tid_to_vcpuid(self.get_paused_vcpu_id()?)) } /// Updates state to reference the currently paused Vcpu and store that the cpu is currently @@ -224,9 +219,9 @@ impl FirecrackerTarget { /// Resumes execution of all paused Vcpus, update them with current kvm debug info /// and resumes fn resume_all_vcpus(&mut self) -> Result<(), GdbTargetError> { - self.vcpu_state - .iter() - .try_for_each(|state| state.update_kvm_debug(&self.hw_breakpoints))?; + for idx in 0..self.vcpu_state.len() { + self.update_vcpu_kvm_debug(idx, &self.hw_breakpoints)?; + } for cpu_id in 0..self.vcpu_state.len() { let tid = vcpuid_to_tid(cpu_id)?; @@ -263,7 +258,7 @@ impl FirecrackerTarget { return Ok(()); } - let cpu_handle = &self.vmm.lock()?.vcpus_handles[tid_to_vcpuid(tid)]; + let cpu_handle = &mut self.vmm.lock()?.vcpus_handles[tid_to_vcpuid(tid)]; cpu_handle.send_event(VcpuEvent::Pause)?; let _ = cpu_handle.response_receiver().recv()?; @@ -274,8 +269,10 @@ impl FirecrackerTarget { /// A helper function to allow the event loop to inject this breakpoint back into the Vcpu pub fn inject_bp_to_guest(&mut self, tid: Tid) -> Result<(), GdbTargetError> { - let vcpu_state = &mut self.vcpu_state[tid_to_vcpuid(tid)]; - arch::vcpu_inject_bp(&vcpu_state.vcpu_fd, &self.hw_breakpoints, false) + let vmm = self.vmm.lock().unwrap(); + let vcpu_idx = tid_to_vcpuid(tid); + let vcpu_fd = &vmm.vcpus_handles[vcpu_idx].vcpu_fd; + arch::vcpu_inject_bp(vcpu_fd, &self.hw_breakpoints, false) } /// Resumes the Vcpu, will return early if the Vcpu is already running @@ -288,7 +285,7 @@ impl FirecrackerTarget { return Ok(()); } - let cpu_handle = &self.vmm.lock()?.vcpus_handles[tid_to_vcpuid(tid)]; + let cpu_handle = &mut self.vmm.lock()?.vcpus_handles[tid_to_vcpuid(tid)]; cpu_handle.send_event(VcpuEvent::Resume)?; let response = cpu_handle.response_receiver().recv()?; @@ -307,7 +304,8 @@ impl FirecrackerTarget { &self, tid: Tid, ) -> Result>, GdbTargetError> { - let vcpu_state = &self.vcpu_state[tid_to_vcpuid(tid)]; + let vcpu_idx = tid_to_vcpuid(tid); + let vcpu_state = &self.vcpu_state[vcpu_idx]; if vcpu_state.single_step { return Ok(Some(MultiThreadStopReason::SignalWithThread { tid, @@ -315,13 +313,15 @@ impl FirecrackerTarget { })); } - let Ok(ip) = arch::get_instruction_pointer(&vcpu_state.vcpu_fd) else { + let vmm = self.vmm.lock().unwrap(); + let vcpu_fd = &vmm.vcpus_handles[vcpu_idx].vcpu_fd; + let Ok(ip) = arch::get_instruction_pointer(vcpu_fd) else { // If we error here we return an arbitrary Software Breakpoint, GDB will handle // this gracefully return Ok(Some(MultiThreadStopReason::SwBreak(tid))); }; - let gpa = arch::translate_gva(&vcpu_state.vcpu_fd, ip, &self.vmm.lock().unwrap())?; + let gpa = arch::translate_gva(vcpu_fd, ip, &vmm)?; if self.sw_breakpoints.contains_key(&gpa) { return Ok(Some(MultiThreadStopReason::SwBreak(tid))); } @@ -364,14 +364,20 @@ impl Target for FirecrackerTarget { impl MultiThreadBase for FirecrackerTarget { /// Reads the registers for the Vcpu fn read_registers(&mut self, regs: &mut CoreRegs, tid: Tid) -> TargetResult<(), Self> { - arch::read_registers(&self.vcpu_state[tid_to_vcpuid(tid)].vcpu_fd, regs)?; + let vmm = self.vmm.lock().unwrap(); + let vcpu_idx = tid_to_vcpuid(tid); + let vcpu_fd = &vmm.vcpus_handles[vcpu_idx].vcpu_fd; + arch::read_registers(vcpu_fd, regs)?; Ok(()) } /// Writes to the registers for the Vcpu fn write_registers(&mut self, regs: &CoreRegs, tid: Tid) -> TargetResult<(), Self> { - arch::write_registers(&self.vcpu_state[tid_to_vcpuid(tid)].vcpu_fd, regs)?; + let vmm = self.vmm.lock().unwrap(); + let vcpu_idx = tid_to_vcpuid(tid); + let vcpu_fd = &vmm.vcpus_handles[vcpu_idx].vcpu_fd; + arch::write_registers(vcpu_fd, regs)?; Ok(()) } @@ -384,12 +390,14 @@ impl MultiThreadBase for FirecrackerTarget { tid: Tid, ) -> TargetResult { let data_len = data.len(); - let vcpu_state = &self.vcpu_state[tid_to_vcpuid(tid)]; + let vmm = self.vmm.lock().unwrap(); + let vcpu_idx = tid_to_vcpuid(tid); + let vcpu_fd = &vmm.vcpus_handles[vcpu_idx].vcpu_fd; let vmm = &self.vmm.lock().expect("Error locking vmm in read addr"); while !data.is_empty() { - let gpa = arch::translate_gva(&vcpu_state.vcpu_fd, gva, &vmm).map_err(|e| { + let gpa = arch::translate_gva(vcpu_fd, gva, &vmm).map_err(|e| { error!("Error {e:?} translating gva on read address: {gva:#X}"); })?; @@ -420,11 +428,12 @@ impl MultiThreadBase for FirecrackerTarget { mut data: &[u8], tid: Tid, ) -> TargetResult<(), Self> { - let vcpu_state = &self.vcpu_state[tid_to_vcpuid(tid)]; - let vmm = &self.vmm.lock().expect("Error locking vmm in write addr"); + let vmm = self.vmm.lock().unwrap(); + let vcpu_idx = tid_to_vcpuid(tid); + let vcpu_fd = &vmm.vcpus_handles[vcpu_idx].vcpu_fd; while !data.is_empty() { - let gpa = arch::translate_gva(&vcpu_state.vcpu_fd, gva, &vmm).map_err(|e| { + let gpa = arch::translate_gva(vcpu_fd, gva, &vmm).map_err(|e| { error!("Error {e:?} translating gva on read address: {gva:#X}"); })?; @@ -546,8 +555,8 @@ impl HwBreakpoint for FirecrackerTarget { return Ok(false); } - let state = self.get_paused_vcpu()?; - state.update_kvm_debug(&self.hw_breakpoints)?; + let vcpu_idx = self.get_paused_vcpu_idx()?; + self.update_vcpu_kvm_debug(vcpu_idx, &self.hw_breakpoints)?; Ok(true) } @@ -563,8 +572,8 @@ impl HwBreakpoint for FirecrackerTarget { Some(pos) => self.hw_breakpoints.remove(pos), }; - let state = self.get_paused_vcpu()?; - state.update_kvm_debug(&self.hw_breakpoints)?; + let vcpu_idx = self.get_paused_vcpu_idx()?; + self.update_vcpu_kvm_debug(vcpu_idx, &self.hw_breakpoints)?; Ok(true) } @@ -580,11 +589,8 @@ impl SwBreakpoint for FirecrackerTarget { addr: ::Usize, _kind: ::BreakpointKind, ) -> TargetResult { - let gpa = arch::translate_gva( - &self.get_paused_vcpu()?.vcpu_fd, - addr, - &self.vmm.lock().unwrap(), - )?; + let vcpu_idx = self.get_paused_vcpu_idx()?; + let gpa = self.translate_gva(vcpu_idx, addr)?; if self.sw_breakpoints.contains_key(&gpa) { return Ok(true); @@ -608,11 +614,8 @@ impl SwBreakpoint for FirecrackerTarget { addr: ::Usize, _kind: ::BreakpointKind, ) -> TargetResult { - let gpa = arch::translate_gva( - &self.get_paused_vcpu()?.vcpu_fd, - addr, - &self.vmm.lock().unwrap(), - )?; + let vcpu_idx = self.get_paused_vcpu_idx()?; + let gpa = self.translate_gva(vcpu_idx, addr)?; if let Some(removed) = self.sw_breakpoints.remove(&gpa) { self.write_addrs(addr, &removed, self.get_paused_vcpu_id()?)?; diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index c3b2410dfe1..7a168db0146 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -297,7 +297,8 @@ pub struct Vmm { // Save UFFD in order to keep it open in the Firecracker process, as well. #[allow(unused)] uffd: Option, - vcpus_handles: Vec, + /// Handles to the vcpu threads with vcpu_fds inside them. + pub vcpus_handles: Vec, // Used by Vcpus and devices to initiate teardown; Vmm should never write here. vcpus_exit_evt: EventFd, // Device manager @@ -353,8 +354,11 @@ impl Vmm { #[cfg(target_arch = "x86_64")] vcpu.kvm_vcpu.set_pio_bus(self.vm.pio_bus.clone()); - self.vcpus_handles - .push(vcpu.start_threaded(vcpu_seccomp_filter.clone(), barrier.clone())?); + self.vcpus_handles.push(vcpu.start_threaded( + &self.vm, + vcpu_seccomp_filter.clone(), + barrier.clone(), + )?); } self.instance_info.state = VmState::Paused; // Wait for vCPUs to initialize their TLS before moving forward. @@ -369,7 +373,7 @@ impl Vmm { // Send the events. self.vcpus_handles - .iter() + .iter_mut() .try_for_each(|handle| handle.send_event(VcpuEvent::Resume)) .map_err(|_| VmmError::VcpuMessage)?; @@ -391,7 +395,7 @@ impl Vmm { pub fn pause_vm(&mut self) -> Result<(), VmmError> { // Send the events. self.vcpus_handles - .iter() + .iter_mut() .try_for_each(|handle| handle.send_event(VcpuEvent::Pause)) .map_err(|_| VmmError::VcpuMessage)?; @@ -450,7 +454,7 @@ impl Vmm { } fn save_vcpu_states(&mut self) -> Result, MicrovmStateError> { - for handle in self.vcpus_handles.iter() { + for handle in self.vcpus_handles.iter_mut() { handle .send_event(VcpuEvent::SaveState) .map_err(MicrovmStateError::SignalVcpu)?; @@ -479,7 +483,7 @@ impl Vmm { /// Dumps CPU configuration. pub fn dump_cpu_config(&mut self) -> Result, DumpCpuConfigError> { - for handle in self.vcpus_handles.iter() { + for handle in self.vcpus_handles.iter_mut() { handle .send_event(VcpuEvent::DumpCpuConfig) .map_err(DumpCpuConfigError::SendEvent)?; @@ -615,7 +619,7 @@ impl Vmm { // We send a "Finish" event. If a VCPU has already exited, this is the only // message it will accept... but running and paused will take it as well. // It breaks out of the state machine loop so that the thread can be joined. - for (idx, handle) in self.vcpus_handles.iter().enumerate() { + for (idx, handle) in self.vcpus_handles.iter_mut().enumerate() { if let Err(err) = handle.send_event(VcpuEvent::Finish) { error!("Failed to send VcpuEvent::Finish to vCPU {}: {}", idx, err); } diff --git a/src/vmm/src/vstate/vcpu.rs b/src/vmm/src/vstate/vcpu.rs index 642b2fd2352..b0f12990710 100644 --- a/src/vmm/src/vstate/vcpu.rs +++ b/src/vmm/src/vstate/vcpu.rs @@ -5,8 +5,6 @@ // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. -use std::cell::RefCell; -#[cfg(feature = "gdb")] use std::os::fd::AsRawFd; use std::sync::atomic::{Ordering, fence}; use std::sync::mpsc::{Receiver, Sender, TryRecvError, channel}; @@ -14,9 +12,7 @@ use std::sync::{Arc, Barrier}; use std::{fmt, io, thread}; use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN}; -#[cfg(feature = "gdb")] -use kvm_ioctls::VcpuFd; -use kvm_ioctls::{KvmRunWrapper, VcpuExit}; +use kvm_ioctls::{VcpuExit, VcpuFd}; use libc::{c_int, c_void, siginfo_t}; use log::{error, info, warn}; use vmm_sys_util::errno; @@ -70,14 +66,16 @@ pub struct VcpuConfig { } /// Error type for [`Vcpu::start_threaded`]. -#[derive(Debug, derive_more::From, thiserror::Error)] -#[error("Failed to spawn vCPU thread: {0}")] -pub struct StartThreadedError(std::io::Error); +#[derive(Debug, thiserror::Error, displaydoc::Display)] +pub enum StartThreadedError { + /// Failed to spawn vCPU thread: {0} + Spawn(std::io::Error), + /// Failed to clone kvm Vcpu fd: {0} + CopyFd(CopyKvmFdError), +} /// Error type for [`Vcpu::copy_kvm_vcpu_fd`]. -#[cfg(feature = "gdb")] -#[derive(Debug, thiserror::Error)] -#[error("Failed to clone kvm Vcpu fd: {0}")] +#[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum CopyKvmFdError { /// Error with libc dup of kvm Vcpu fd DupError(#[from] std::io::Error), @@ -85,11 +83,6 @@ pub enum CopyKvmFdError { CreateVcpuError(#[from] kvm_ioctls::Error), } -// Stores the mmap region of `kvm_run` struct for the current Vcpu. This allows for the -// signal handler to safely access the `kvm_run` even when Vcpu is dropped and vcpu fd -// is closed. -thread_local!(static TLS_VCPU_PTR: RefCell> = const { RefCell::new(None) }); - /// A wrapper around creating and using a vcpu. #[derive(Debug)] pub struct Vcpu { @@ -112,39 +105,14 @@ pub struct Vcpu { } impl Vcpu { - /// Associates `self` with the current thread. - /// - /// It is a prerequisite to successfully run `init_thread_local_data()` before using - /// `run_on_thread_local()` on the current thread. - /// This function will panic if there already is a `Vcpu` present in the TLS. - fn init_thread_local_data(&mut self) { - // Use of `kvm_run` size here is safe because we only - // care about `immediate_exit` field which is at byte offset of 2. - let kvm_run_ptr = KvmRunWrapper::mmap_from_fd( - &self.kvm_vcpu.fd, - std::mem::size_of::(), - ) - .unwrap(); - TLS_VCPU_PTR.with(|cell| { - assert!(cell.borrow().is_none()); - *cell.borrow_mut() = Some(kvm_run_ptr); - }) - } - - /// Registers a signal handler which makes use of TLS and kvm immediate exit to - /// kick the vcpu running on the current thread, if there is one. + /// Registers a signal handler which kicks the vcpu running on the current thread, if there is + /// one. fn register_kick_signal_handler(&mut self) { - self.init_thread_local_data(); - extern "C" fn handle_signal(_: c_int, _: *mut siginfo_t, _: *mut c_void) { - TLS_VCPU_PTR.with(|cell| { - if let Some(kvm_run_ptr) = &mut *cell.borrow_mut() { - kvm_run_ptr.as_mut_ref().immediate_exit = 1; - fence(Ordering::Release); - } - }) + // We write to the immediate_exit from other thread, so make sure the read in the + // KVM_RUN sees the up to date value + fence(Ordering::Acquire); } - register_signal_handler(sigrtmin() + VCPU_RTSIG_OFFSET, handle_signal) .expect("Failed to register vcpu signal handler"); } @@ -185,7 +153,6 @@ impl Vcpu { } /// Obtains a copy of the VcpuFd - #[cfg(feature = "gdb")] pub fn copy_kvm_vcpu_fd(&self, vm: &Vm) -> Result { // SAFETY: We own this fd so it is considered safe to clone let r = unsafe { libc::dup(self.kvm_vcpu.fd.as_raw_fd()) }; @@ -200,11 +167,15 @@ impl Vcpu { /// The handle can be used to control the remote vcpu. pub fn start_threaded( mut self, + vm: &Vm, seccomp_filter: Arc, barrier: Arc, ) -> Result { let event_sender = self.event_sender.take().expect("vCPU already started"); let response_receiver = self.response_receiver.take().unwrap(); + let vcpu_fd = self + .copy_kvm_vcpu_fd(vm) + .map_err(StartThreadedError::CopyFd)?; let vcpu_thread = thread::Builder::new() .name(format!("fc_vcpu {}", self.kvm_vcpu.index)) .spawn(move || { @@ -213,11 +184,13 @@ impl Vcpu { // Synchronization to make sure thread local data is initialized. barrier.wait(); self.run(filter); - })?; + }) + .map_err(StartThreadedError::Spawn)?; Ok(VcpuHandle::new( event_sender, response_receiver, + vcpu_fd, vcpu_thread, )) } @@ -628,6 +601,8 @@ impl fmt::Debug for VcpuResponse { pub struct VcpuHandle { event_sender: Sender, response_receiver: Receiver, + /// VcpuFd + pub vcpu_fd: VcpuFd, // Rust JoinHandles have to be wrapped in Option if you ever plan on 'join()'ing them. // We want to be able to join these threads in tests. vcpu_thread: Option>, @@ -648,11 +623,13 @@ impl VcpuHandle { pub fn new( event_sender: Sender, response_receiver: Receiver, + vcpu_fd: VcpuFd, vcpu_thread: thread::JoinHandle<()>, ) -> Self { Self { event_sender, response_receiver, + vcpu_fd, vcpu_thread: Some(vcpu_thread), } } @@ -661,12 +638,15 @@ impl VcpuHandle { /// # Errors /// /// When [`vmm_sys_util::linux::signal::Killable::kill`] errors. - pub fn send_event(&self, event: VcpuEvent) -> Result<(), VcpuSendEventError> { + pub fn send_event(&mut self, event: VcpuEvent) -> Result<(), VcpuSendEventError> { // Use expect() to crash if the other thread closed this channel. self.event_sender .send(event) .expect("event sender channel closed on vcpu end."); // Kick the vcpu so it picks up the message. + // Add a fence to ensure the write is visible to the vpu thread + self.vcpu_fd.set_kvm_immediate_exit(1); + fence(Ordering::Release); self.vcpu_thread .as_ref() // Safe to unwrap since constructor make this 'Some'. @@ -715,6 +695,7 @@ pub(crate) mod tests { #[cfg(target_arch = "x86_64")] use std::collections::BTreeMap; + use std::sync::atomic::Ordering; use std::sync::{Arc, Barrier, Mutex}; use linux_loader::loader::KernelLoader; @@ -968,7 +949,11 @@ pub(crate) mod tests { let mut seccomp_filters = get_empty_filters(); let barrier = Arc::new(Barrier::new(2)); let vcpu_handle = vcpu - .start_threaded(seccomp_filters.remove("vcpu").unwrap(), barrier.clone()) + .start_threaded( + &vm, + seccomp_filters.remove("vcpu").unwrap(), + barrier.clone(), + ) .expect("failed to start vcpu"); // Wait for vCPUs to initialize their TLS before moving forward. barrier.wait(); @@ -984,14 +969,6 @@ pub(crate) mod tests { assert!(vcpu.kvm_vcpu.peripherals.mmio_bus.is_some()); } - #[test] - #[should_panic] - fn test_tls_double_init() { - let (_, _, mut vcpu) = setup_vcpu(0x1000); - vcpu.init_thread_local_data(); - vcpu.init_thread_local_data(); - } - #[test] fn test_vcpu_kick() { let (_, vm, mut vcpu) = setup_vcpu(0x1000); @@ -999,6 +976,9 @@ pub(crate) mod tests { let mut kvm_run = kvm_ioctls::KvmRunWrapper::mmap_from_fd(&vcpu.kvm_vcpu.fd, vm.fd().run_size()) .expect("cannot mmap kvm-run"); + let vcpu_kvm_run = + kvm_ioctls::KvmRunWrapper::mmap_from_fd(&vcpu.kvm_vcpu.fd, vm.fd().run_size()) + .expect("cannot mmap kvm-run"); let success = Arc::new(std::sync::atomic::AtomicBool::new(false)); let vcpu_success = success.clone(); let barrier = Arc::new(Barrier::new(2)); @@ -1012,7 +992,7 @@ pub(crate) mod tests { vcpu_barrier.wait(); // Loop for max 1 second to check if the signal handler has run. for _ in 0..10 { - if kvm_run.as_mut_ref().immediate_exit == 1 { + if vcpu_kvm_run.as_ref().immediate_exit == 1 { // Signal handler has run and set immediate_exit to 1. vcpu_success.store(true, Ordering::Release); break; @@ -1021,10 +1001,10 @@ pub(crate) mod tests { } }) .expect("cannot start thread"); - - // Wait for the vcpu to initialize its TLS. barrier.wait(); - // Kick the Vcpu using the custom signal. + + // Set immediate_exit and kick the Vcpu using the custom signal. + kvm_run.as_mut_ref().immediate_exit = 1; handle .kill(sigrtmin() + VCPU_RTSIG_OFFSET) .expect("failed to signal thread"); @@ -1034,7 +1014,11 @@ pub(crate) mod tests { } // Sends an event to a vcpu and expects a particular response. - fn queue_event_expect_response(handle: &VcpuHandle, event: VcpuEvent, response: VcpuResponse) { + fn queue_event_expect_response( + handle: &mut VcpuHandle, + event: VcpuEvent, + response: VcpuResponse, + ) { handle .send_event(event) .expect("failed to send event to vcpu"); @@ -1074,52 +1058,52 @@ pub(crate) mod tests { #[test] fn test_vcpu_pause_resume() { - let (_vm, vcpu_handle, vcpu_exit_evt) = vcpu_configured_for_boot(); + let (_vm, mut vcpu_handle, vcpu_exit_evt) = vcpu_configured_for_boot(); // Queue a Resume event, expect a response. - queue_event_expect_response(&vcpu_handle, VcpuEvent::Resume, VcpuResponse::Resumed); + queue_event_expect_response(&mut vcpu_handle, VcpuEvent::Resume, VcpuResponse::Resumed); // Queue a Pause event, expect a response. - queue_event_expect_response(&vcpu_handle, VcpuEvent::Pause, VcpuResponse::Paused); + queue_event_expect_response(&mut vcpu_handle, VcpuEvent::Pause, VcpuResponse::Paused); // Validate vcpu handled the EINTR gracefully and didn't exit. let err = vcpu_exit_evt.read().unwrap_err(); assert_eq!(err.raw_os_error().unwrap(), libc::EAGAIN); // Queue another Pause event, expect a response. - queue_event_expect_response(&vcpu_handle, VcpuEvent::Pause, VcpuResponse::Paused); + queue_event_expect_response(&mut vcpu_handle, VcpuEvent::Pause, VcpuResponse::Paused); // Queue a Resume event, expect a response. - queue_event_expect_response(&vcpu_handle, VcpuEvent::Resume, VcpuResponse::Resumed); + queue_event_expect_response(&mut vcpu_handle, VcpuEvent::Resume, VcpuResponse::Resumed); // Queue another Resume event, expect a response. - queue_event_expect_response(&vcpu_handle, VcpuEvent::Resume, VcpuResponse::Resumed); + queue_event_expect_response(&mut vcpu_handle, VcpuEvent::Resume, VcpuResponse::Resumed); // Queue another Pause event, expect a response. - queue_event_expect_response(&vcpu_handle, VcpuEvent::Pause, VcpuResponse::Paused); + queue_event_expect_response(&mut vcpu_handle, VcpuEvent::Pause, VcpuResponse::Paused); // Queue a Resume event, expect a response. - queue_event_expect_response(&vcpu_handle, VcpuEvent::Resume, VcpuResponse::Resumed); + queue_event_expect_response(&mut vcpu_handle, VcpuEvent::Resume, VcpuResponse::Resumed); vcpu_handle.send_event(VcpuEvent::Finish).unwrap(); } #[test] fn test_vcpu_save_state_events() { - let (_vm, vcpu_handle, _vcpu_exit_evt) = vcpu_configured_for_boot(); + let (_vm, mut vcpu_handle, _vcpu_exit_evt) = vcpu_configured_for_boot(); // Queue a Resume event, expect a response. - queue_event_expect_response(&vcpu_handle, VcpuEvent::Resume, VcpuResponse::Resumed); + queue_event_expect_response(&mut vcpu_handle, VcpuEvent::Resume, VcpuResponse::Resumed); // Queue a SaveState event, expect a response. queue_event_expect_response( - &vcpu_handle, + &mut vcpu_handle, VcpuEvent::SaveState, VcpuResponse::NotAllowed(String::new()), ); // Queue another Pause event, expect a response. - queue_event_expect_response(&vcpu_handle, VcpuEvent::Pause, VcpuResponse::Paused); + queue_event_expect_response(&mut vcpu_handle, VcpuEvent::Pause, VcpuResponse::Paused); // Queue a SaveState event, get the response. vcpu_handle @@ -1139,7 +1123,7 @@ pub(crate) mod tests { #[test] fn test_vcpu_dump_cpu_config() { - let (_vm, vcpu_handle, _) = vcpu_configured_for_boot(); + let (_vm, mut vcpu_handle, _) = vcpu_configured_for_boot(); // Queue a DumpCpuConfig event, expect a DumpedCpuConfig response. vcpu_handle @@ -1156,12 +1140,12 @@ pub(crate) mod tests { } // Queue a Resume event, expect a response. - queue_event_expect_response(&vcpu_handle, VcpuEvent::Resume, VcpuResponse::Resumed); + queue_event_expect_response(&mut vcpu_handle, VcpuEvent::Resume, VcpuResponse::Resumed); // Queue a DumpCpuConfig event, expect a NotAllowed respoonse. // The DumpCpuConfig event is only allowed while paused. queue_event_expect_response( - &vcpu_handle, + &mut vcpu_handle, VcpuEvent::DumpCpuConfig, VcpuResponse::NotAllowed(String::new()), );