diff --git a/src/hyperlight_host/Cargo.toml b/src/hyperlight_host/Cargo.toml index 0b003b20d..acde6b9d6 100644 --- a/src/hyperlight_host/Cargo.toml +++ b/src/hyperlight_host/Cargo.toml @@ -65,7 +65,7 @@ windows = { version = "0.62", features = [ "Win32_System_JobObjects", "Win32_System_SystemServices", ] } -windows-sys = { version = "0.61", features = ["Win32"] } +windows-sys = { version = "0.61", features = ["Win32", "Win32_System", "Win32_System_Threading"] } windows-result = "0.4" rust-embed = { version = "8.7.2", features = ["debug-embed", "include-exclude", "interpolate-folder-path"] } sha256 = "1.6.0" diff --git a/src/hyperlight_host/src/hypervisor/hyperv_linux.rs b/src/hyperlight_host/src/hypervisor/hyperv_linux.rs index 80ee0b16b..42835c16f 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_linux.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_linux.rs @@ -403,7 +403,8 @@ impl HypervLinuxDriver { let interrupt_handle = Arc::new(LinuxInterruptHandle { running: AtomicU64::new(0), - cancel_requested: AtomicBool::new(false), + cancel_requested: AtomicU64::new(0), + call_active: AtomicBool::new(false), #[cfg(gdb)] debug_interrupt: AtomicBool::new(false), #[cfg(all( @@ -669,16 +670,13 @@ impl Hypervisor for HypervLinuxDriver { self.interrupt_handle .tid .store(unsafe { libc::pthread_self() as u64 }, Ordering::Relaxed); - // Note: if a `InterruptHandle::kill()` called while this thread is **here** - // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call - self.interrupt_handle - .set_running_and_increment_generation() - .map_err(|e| { - new_error!( - "Error setting running state and incrementing generation: {}", - e - ) - })?; + // Note: if `InterruptHandle::kill()` is called while this thread is **here** + // (after set_running_bit but before checking cancel_requested): + // - kill() will stamp cancel_requested with the current generation + // - We will check cancel_requested below and skip the VcpuFd::run() call + // - This is the desired behavior - the kill takes effect immediately + let generation = self.interrupt_handle.set_running_bit(); + #[cfg(not(gdb))] let debug_interrupt = false; #[cfg(gdb)] @@ -687,14 +685,16 @@ impl Hypervisor for HypervLinuxDriver { .debug_interrupt .load(Ordering::Relaxed); - // Don't run the vcpu if `cancel_requested` is true + // Don't run the vcpu if `cancel_requested` is set for our generation // - // Note: if a `InterruptHandle::kill()` called while this thread is **here** - // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call + // Note: if `InterruptHandle::kill()` is called while this thread is **here** + // (after checking cancel_requested but before vcpu.run()): + // - kill() will stamp cancel_requested with the current generation + // - We will proceed with vcpu.run(), but signals will be sent to interrupt it + // - The vcpu will be interrupted and return EINTR (handled below) let exit_reason = if self .interrupt_handle - .cancel_requested - .load(Ordering::Relaxed) + .is_cancel_requested_for_generation(generation) || debug_interrupt { Err(mshv_ioctls::MshvError::from(libc::EINTR)) @@ -707,11 +707,12 @@ impl Hypervisor for HypervLinuxDriver { Some(hyperlight_guest_tracing::invariant_tsc::read_tsc()); self.trace_info.guest_start_epoch = Some(std::time::Instant::now()); } - // Note: if a `InterruptHandle::kill()` called while this thread is **here** - // Then the vcpu will run, but we will keep sending signals to this thread - // to interrupt it until `running` is set to false. The `vcpu_fd::run()` call will - // return either normally with an exit reason, or from being "kicked" by out signal handler, with an EINTR error, - // both of which are fine. + // Note: if `InterruptHandle::kill()` is called while this thread is **here** + // (during vcpu.run() execution): + // - kill() stamps cancel_requested with the current generation + // - kill() sends signals (SIGRTMIN+offset) to this thread repeatedly + // - The signal handler is a no-op, but it causes vcpu.run() to return EINTR + // - We check cancel_requested below and return Cancelled if generation matches #[cfg(mshv2)] { let hv_message: hv_message = Default::default(); @@ -720,27 +721,32 @@ impl Hypervisor for HypervLinuxDriver { #[cfg(mshv3)] self.vcpu_fd.run() }; - // Note: if a `InterruptHandle::kill()` called while this thread is **here** - // Then signals will be sent to this thread until `running` is set to false. - // This is fine since the signal handler is a no-op. + // Note: if `InterruptHandle::kill()` is called while this thread is **here** + // (after vcpu.run() returns but before clear_running_bit): + // - kill() continues sending signals to this thread (running bit is still set) + // - The signals are harmless (no-op handler), we just need to check cancel_requested + // - We load cancel_requested below to determine if this run was cancelled let cancel_requested = self .interrupt_handle - .cancel_requested - .load(Ordering::Relaxed); + .is_cancel_requested_for_generation(generation); #[cfg(gdb)] let debug_interrupt = self .interrupt_handle .debug_interrupt .load(Ordering::Relaxed); - // Note: if a `InterruptHandle::kill()` called while this thread is **here** - // Then `cancel_requested` will be set to true again, which will cancel the **next vcpu run**. - // Additionally signals will be sent to this thread until `running` is set to false. - // This is fine since the signal handler is a no-op. + // Note: if `InterruptHandle::kill()` is called while this thread is **here** + // (after loading cancel_requested but before clear_running_bit): + // - kill() stamps cancel_requested with the CURRENT generation (not the one we just loaded) + // - kill() continues sending signals until running bit is cleared + // - The newly stamped cancel_requested will affect the NEXT vcpu.run() call + // - Signals sent now are harmless (no-op handler) self.interrupt_handle.clear_running_bit(); - // At this point, `running` is false so no more signals will be sent to this thread, - // but we may still receive async signals that were sent before this point. - // To prevent those signals from interrupting subsequent calls to `run()`, - // we make sure to check `cancel_requested` before cancelling (see `libc::EINTR` match-arm below). + // At this point, running bit is clear so kill() will stop sending signals. + // However, we may still receive delayed signals that were sent before clear_running_bit. + // These stale signals are harmless because: + // - The signal handler is a no-op + // - We check generation matching in cancel_requested before treating EINTR as cancellation + // - If generation doesn't match, we return Retry instead of Cancelled let result = match exit_reason { Ok(m) => match m.header.message_type { HALT_MESSAGE => { @@ -820,14 +826,16 @@ impl Hypervisor for HypervLinuxDriver { } }, Err(e) => match e.errno() { - // we send a signal to the thread to cancel execution this results in EINTR being returned by KVM so we return Cancelled + // We send a signal (SIGRTMIN+offset) to interrupt the vcpu, which causes EINTR libc::EINTR => { - // If cancellation was not requested for this specific vm, the vcpu was interrupted because of debug interrupt or - // a stale signal that meant to be delivered to a previous/other vcpu on this same thread, so let's ignore it + // Check if cancellation was requested for THIS specific generation. + // If not, the EINTR came from: + // - A debug interrupt (if GDB is enabled) + // - A stale signal from a previous guest call (generation mismatch) + // - A signal meant for a different sandbox on the same thread + // In these cases, we return Retry to continue execution. if cancel_requested { - self.interrupt_handle - .cancel_requested - .store(false, Ordering::Relaxed); + self.interrupt_handle.clear_cancel_requested(); HyperlightExit::Cancelled() } else { #[cfg(gdb)] diff --git a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs index 490e1f72e..8376101e2 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs @@ -17,7 +17,7 @@ limitations under the License. use std::fmt; use std::fmt::{Debug, Formatter}; use std::string::String; -use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; use std::sync::{Arc, Mutex}; use log::LevelFilter; @@ -328,10 +328,11 @@ impl HypervWindowsDriver { }; let interrupt_handle = Arc::new(WindowsInterruptHandle { - running: AtomicBool::new(false), - cancel_requested: AtomicBool::new(false), + running: AtomicU64::new(0), + cancel_requested: AtomicU64::new(0), #[cfg(gdb)] debug_interrupt: AtomicBool::new(false), + call_active: AtomicBool::new(false), partition_handle, dropped: AtomicBool::new(false), }); @@ -547,7 +548,8 @@ impl Hypervisor for HypervWindowsDriver { #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] fn run(&mut self) -> Result { - self.interrupt_handle.running.store(true, Ordering::Relaxed); + // Get current generation and set running bit + let generation = self.interrupt_handle.set_running_bit(); #[cfg(not(gdb))] let debug_interrupt = false; @@ -557,13 +559,13 @@ impl Hypervisor for HypervWindowsDriver { .debug_interrupt .load(Ordering::Relaxed); - // Don't run the vcpu if `cancel_requested` is true + // Check if cancellation was requested for THIS generation let exit_context = if self .interrupt_handle - .cancel_requested - .load(Ordering::Relaxed) + .is_cancel_requested_for_generation(generation) || debug_interrupt { + println!("already cancelled"); WHV_RUN_VP_EXIT_CONTEXT { ExitReason: WHV_RUN_VP_EXIT_REASON(8193i32), // WHvRunVpExitReasonCanceled VpContext: Default::default(), @@ -581,12 +583,20 @@ impl Hypervisor for HypervWindowsDriver { } self.processor.run()? }; - self.interrupt_handle - .cancel_requested - .store(false, Ordering::Relaxed); - self.interrupt_handle - .running - .store(false, Ordering::Relaxed); + + // Clear running bit + self.interrupt_handle.clear_running_bit(); + + let exit_reason = exit_context.ExitReason; + let is_canceled = exit_reason.0 == 8193; // WHvRunVpExitReasonCanceled + + // Check if this was a manual cancellation (vs internal Windows cancellation) + let cancel_was_requested_manually = self.interrupt_handle.is_cancel_requested_for_generation(generation); + + // Only clear cancel_requested if we're actually processing a cancellation for this generation + if is_canceled && cancel_was_requested_manually { + self.interrupt_handle.clear_cancel_requested(); + } #[cfg(gdb)] let debug_interrupt = self @@ -662,12 +672,30 @@ impl Hypervisor for HypervWindowsDriver { // return a special exit reason so that the gdb thread can handle it // and resume execution HyperlightExit::Debug(VcpuStopReason::Interrupt) + } else if !cancel_was_requested_manually { + // This was an internal cancellation + // The virtualization stack can use this function to return the control + // of a virtual processor back to the virtualization stack in case it + // needs to change the state of a VM or to inject an event into the processor + debug!("Internal cancellation detected, returning Retry error"); + HyperlightExit::Retry() } else { HyperlightExit::Cancelled() } #[cfg(not(gdb))] - HyperlightExit::Cancelled() + { + if !cancel_was_requested_manually { + // This was an internal cancellation + // The virtualization stack can use this function to return the control + // of a virtual processor back to the virtualization stack in case it + // needs to change the state of a VM or to inject an event into the processor + println!("Internal cancellation detected, returning Retry error"); + HyperlightExit::Retry() + } else { + HyperlightExit::Cancelled() + } + } } #[cfg(gdb)] WHV_RUN_VP_EXIT_REASON(4098i32) => { @@ -951,30 +979,146 @@ impl Drop for HypervWindowsDriver { #[derive(Debug)] pub struct WindowsInterruptHandle { - // `WHvCancelRunVirtualProcessor()` will return Ok even if the vcpu is not running, which is the reason we need this flag. - running: AtomicBool, - cancel_requested: AtomicBool, + /// Combined running flag (bit 63) and generation counter (bits 0-62). + /// + /// The generation increments with each guest function call to prevent + /// stale cancellations from affecting new calls (ABA problem). + /// + /// Layout: `[running:1 bit][generation:63 bits]` + running: AtomicU64, + + /// Combined cancel_requested flag (bit 63) and generation counter (bits 0-62). + /// + /// When kill() is called, this stores the current generation along with + /// the cancellation flag. The VCPU only honors the cancellation if the + /// generation matches its current generation. + /// + /// Layout: `[cancel_requested:1 bit][generation:63 bits]` + cancel_requested: AtomicU64, + // This is used to signal the GDB thread to stop the vCPU #[cfg(gdb)] debug_interrupt: AtomicBool, + /// Flag indicating whether a guest function call is currently in progress. + /// + /// **true**: A guest function call is active (between call start and completion) + /// **false**: No guest function call is active + /// + /// # Purpose + /// + /// This flag prevents kill() from having any effect when called outside of a + /// guest function call. This solves the "kill-in-advance" problem where kill() + /// could be called before a guest function starts and would incorrectly cancel it. + call_active: AtomicBool, partition_handle: WHV_PARTITION_HANDLE, dropped: AtomicBool, } +impl WindowsInterruptHandle { + const RUNNING_BIT: u64 = 1 << 63; + const MAX_GENERATION: u64 = Self::RUNNING_BIT - 1; + const CANCEL_REQUESTED_BIT: u64 = 1 << 63; + + /// Set cancel_requested to true with the given generation + fn set_cancel_requested(&self, generation: u64) { + let value = Self::CANCEL_REQUESTED_BIT | (generation & Self::MAX_GENERATION); + self.cancel_requested.store(value, Ordering::Release); + } + + /// Clear cancel_requested (reset to no cancellation) + pub(crate) fn clear_cancel_requested(&self) { + self.cancel_requested.store(0, Ordering::Release); + } + + /// Check if cancel_requested is set for the given generation + fn is_cancel_requested_for_generation(&self, generation: u64) -> bool { + let raw = self.cancel_requested.load(Ordering::Acquire); + let is_set = raw & Self::CANCEL_REQUESTED_BIT != 0; + let stored_generation = raw & Self::MAX_GENERATION; + is_set && stored_generation == generation + } + + /// Increment the generation for a new guest function call + pub(crate) fn increment_generation(&self) -> u64 { + self.running + .fetch_update(Ordering::Release, Ordering::Acquire, |raw| { + let current_generation = raw & !Self::RUNNING_BIT; + let running_bit = raw & Self::RUNNING_BIT; + if current_generation == Self::MAX_GENERATION { + // Wrap around to 0 + return Some(running_bit); + } + Some((current_generation + 1) | running_bit) + }) + .map(|raw| { + let old_gen = raw & !Self::RUNNING_BIT; + if old_gen == Self::MAX_GENERATION { + 0 + } else { + old_gen + 1 + } + }) + .unwrap_or(0) + } + + /// Set running bit to true, return current generation + fn set_running_bit(&self) -> u64 { + self.running + .fetch_update(Ordering::Release, Ordering::Acquire, |raw| { + Some(raw | Self::RUNNING_BIT) + }) + .map(|raw| raw & !Self::RUNNING_BIT) + .unwrap_or(0) + } + + /// Clear running bit, return current generation + fn clear_running_bit(&self) -> u64 { + self.running + .fetch_and(!Self::RUNNING_BIT, Ordering::Relaxed) + & !Self::RUNNING_BIT + } + + fn get_running_and_generation(&self) -> (bool, u64) { + let raw = self.running.load(Ordering::Relaxed); + let running = raw & Self::RUNNING_BIT != 0; + let generation = raw & !Self::RUNNING_BIT; + (running, generation) + } +} + impl InterruptHandle for WindowsInterruptHandle { fn kill(&self) -> bool { - self.cancel_requested.store(true, Ordering::Relaxed); - self.running.load(Ordering::Relaxed) - && unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() } + // Check if a call is actually active first + if !self.call_active.load(Ordering::Acquire) { + return false; + } + + // Get the current running state and generation + let (running, generation) = self.get_running_and_generation(); + + // Set cancel_requested with the current generation + self.set_cancel_requested(generation); + + // Only call WHvCancelRunVirtualProcessor if VCPU is actually running in guest mode + running && unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() } } #[cfg(gdb)] fn kill_from_debugger(&self) -> bool { self.debug_interrupt.store(true, Ordering::Relaxed); - self.running.load(Ordering::Relaxed) - && unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() } + let (running, _) = self.get_running_and_generation(); + running && unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() } } fn dropped(&self) -> bool { self.dropped.load(Ordering::Relaxed) } + + fn set_call_active(&self) { + self.increment_generation(); + self.call_active.store(true, Ordering::Release); + } + + fn clear_call_active(&self) { + self.call_active.store(false, Ordering::Release); + } } diff --git a/src/hyperlight_host/src/hypervisor/kvm.rs b/src/hyperlight_host/src/hypervisor/kvm.rs index a42538b33..96b9b3e04 100644 --- a/src/hyperlight_host/src/hypervisor/kvm.rs +++ b/src/hyperlight_host/src/hypervisor/kvm.rs @@ -343,7 +343,8 @@ impl KVMDriver { let interrupt_handle = Arc::new(LinuxInterruptHandle { running: AtomicU64::new(0), - cancel_requested: AtomicBool::new(false), + cancel_requested: AtomicU64::new(0), + call_active: AtomicBool::new(false), #[cfg(gdb)] debug_interrupt: AtomicBool::new(false), #[cfg(all( @@ -627,16 +628,13 @@ impl Hypervisor for KVMDriver { self.interrupt_handle .tid .store(unsafe { libc::pthread_self() as u64 }, Ordering::Relaxed); - // Note: if a `InterruptHandle::kill()` called while this thread is **here** - // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call - self.interrupt_handle - .set_running_and_increment_generation() - .map_err(|e| { - new_error!( - "Error setting running state and incrementing generation: {}", - e - ) - })?; + // Note: if `InterruptHandle::kill()` is called while this thread is **here** + // (after set_running_bit but before checking cancel_requested): + // - kill() will stamp cancel_requested with the current generation + // - We will check cancel_requested below and skip the VcpuFd::run() call + // - This is the desired behavior - the kill takes effect immediately + let generation = self.interrupt_handle.set_running_bit(); + #[cfg(not(gdb))] let debug_interrupt = false; #[cfg(gdb)] @@ -644,14 +642,16 @@ impl Hypervisor for KVMDriver { .interrupt_handle .debug_interrupt .load(Ordering::Relaxed); - // Don't run the vcpu if `cancel_requested` is true + // Don't run the vcpu if `cancel_requested` is set for our generation // - // Note: if a `InterruptHandle::kill()` called while this thread is **here** - // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call + // Note: if `InterruptHandle::kill()` is called while this thread is **here** + // (after checking cancel_requested but before vcpu.run()): + // - kill() will stamp cancel_requested with the current generation + // - We will proceed with vcpu.run(), but signals will be sent to interrupt it + // - The vcpu will be interrupted and return EINTR (handled below) let exit_reason = if self .interrupt_handle - .cancel_requested - .load(Ordering::Relaxed) + .is_cancel_requested_for_generation(generation) || debug_interrupt { Err(kvm_ioctls::Error::new(libc::EINTR)) @@ -665,34 +665,40 @@ impl Hypervisor for KVMDriver { Some(hyperlight_guest_tracing::invariant_tsc::read_tsc()); } - // Note: if a `InterruptHandle::kill()` called while this thread is **here** - // Then the vcpu will run, but we will keep sending signals to this thread - // to interrupt it until `running` is set to false. The `vcpu_fd::run()` call will - // return either normally with an exit reason, or from being "kicked" by out signal handler, with an EINTR error, - // both of which are fine. + // Note: if `InterruptHandle::kill()` is called while this thread is **here** + // (during vcpu.run() execution): + // - kill() stamps cancel_requested with the current generation + // - kill() sends signals (SIGRTMIN+offset) to this thread repeatedly + // - The signal handler is a no-op, but it causes vcpu.run() to return EINTR + // - We check cancel_requested below and return Cancelled if generation matches self.vcpu_fd.run() }; - // Note: if a `InterruptHandle::kill()` called while this thread is **here** - // Then signals will be sent to this thread until `running` is set to false. - // This is fine since the signal handler is a no-op. + // Note: if `InterruptHandle::kill()` is called while this thread is **here** + // (after vcpu.run() returns but before clear_running_bit): + // - kill() continues sending signals to this thread (running bit is still set) + // - The signals are harmless (no-op handler), we just need to check cancel_requested + // - We load cancel_requested below to determine if this run was cancelled let cancel_requested = self .interrupt_handle - .cancel_requested - .load(Ordering::Relaxed); + .is_cancel_requested_for_generation(generation); #[cfg(gdb)] let debug_interrupt = self .interrupt_handle .debug_interrupt .load(Ordering::Relaxed); - // Note: if a `InterruptHandle::kill()` called while this thread is **here** - // Then `cancel_requested` will be set to true again, which will cancel the **next vcpu run**. - // Additionally signals will be sent to this thread until `running` is set to false. - // This is fine since the signal handler is a no-op. + // Note: if `InterruptHandle::kill()` is called while this thread is **here** + // (after loading cancel_requested but before clear_running_bit): + // - kill() stamps cancel_requested with the CURRENT generation (not the one we just loaded) + // - kill() continues sending signals until running bit is cleared + // - The newly stamped cancel_requested will affect the NEXT vcpu.run() call + // - Signals sent now are harmless (no-op handler) self.interrupt_handle.clear_running_bit(); - // At this point, `running` is false so no more signals will be sent to this thread, - // but we may still receive async signals that were sent before this point. - // To prevent those signals from interrupting subsequent calls to `run()` (on other vms!), - // we make sure to check `cancel_requested` before cancelling (see `libc::EINTR` match-arm below). + // At this point, running bit is clear so kill() will stop sending signals. + // However, we may still receive delayed signals that were sent before clear_running_bit. + // These stale signals are harmless because: + // - The signal handler is a no-op + // - We check generation matching in cancel_requested before treating EINTR as cancellation + // - If generation doesn't match, we return Retry instead of Cancelled let result = match exit_reason { Ok(VcpuExit::Hlt) => { crate::debug!("KVM - Halt Details : {:#?}", &self); @@ -741,14 +747,16 @@ impl Hypervisor for KVMDriver { } }, Err(e) => match e.errno() { - // we send a signal to the thread to cancel execution this results in EINTR being returned by KVM so we return Cancelled + // We send a signal (SIGRTMIN+offset) to interrupt the vcpu, which causes EINTR libc::EINTR => { - // If cancellation was not requested for this specific vm, the vcpu was interrupted because of debug interrupt or - // a stale signal that meant to be delivered to a previous/other vcpu on this same thread, so let's ignore it + // Check if cancellation was requested for THIS specific generation. + // If not, the EINTR came from: + // - A debug interrupt (if GDB is enabled) + // - A stale signal from a previous guest call (generation mismatch) + // - A signal meant for a different sandbox on the same thread + // In these cases, we return Retry to continue execution. if cancel_requested { - self.interrupt_handle - .cancel_requested - .store(false, Ordering::Relaxed); + self.interrupt_handle.clear_cancel_requested(); HyperlightExit::Cancelled() } else { #[cfg(gdb)] diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs index 781099134..7d6e08bb7 100644 --- a/src/hyperlight_host/src/hypervisor/mod.rs +++ b/src/hyperlight_host/src/hypervisor/mod.rs @@ -114,7 +114,9 @@ pub enum HyperlightExit { Cancelled(), /// The vCPU has exited for a reason that is not handled by Hyperlight Unknown(String), - /// The operation should be retried, for example this can happen on Linux where a call to run the CPU can return EAGAIN + /// The operation should be retried + /// On Linux this can happen where a call to run the CPU can return EAGAIN + /// On Windows the platform could cause a cancelation of the VM run Retry(), } @@ -413,7 +415,10 @@ impl VirtualCPU { log_then_return!("Unexpected VM Exit {:?}", reason); } - Ok(HyperlightExit::Retry()) => continue, + Ok(HyperlightExit::Retry()) => { + println!("retrying vm run"); + continue; + } Err(e) => { #[cfg(crashdump)] crashdump::generate_crashdump(hv)?; @@ -435,13 +440,50 @@ impl VirtualCPU { pub trait InterruptHandle: Debug + Send + Sync { /// Interrupt the corresponding sandbox from running. /// - /// - If this is called while the vcpu is running, then it will interrupt the vcpu and return `true`. - /// - If this is called while the vcpu is not running, (for example during a host call), the - /// vcpu will not immediately be interrupted, but will prevent the vcpu from running **the next time** - /// it's scheduled, and returns `false`. + /// This method attempts to cancel a currently executing guest function call by sending + /// a signal to the VCPU thread. It uses generation tracking (linux only) and call_active flag to + /// ensure the interruption is safe and precise. + /// + /// # Behavior + /// + /// - **Guest function running**: If called while a guest function is executing (VCPU running + /// or in a host function call), this stamps the current generation into cancel_requested + /// and sends a signal to interrupt the VCPU. Returns `true`. + /// + /// - **No active call**: If called when no guest function call is in progress (call_active=false), + /// this has no effect and returns `false`. This prevents "kill-in-advance" where kill() + /// is called before a guest function starts. + /// + /// - **During host function**: If the guest call is currently executing a host function + /// (VCPU not running but call_active=true), this stamps cancel_requested. When the + /// host function returns and attempts to re-enter the guest, the cancellation will + /// be detected and the call will abort. Returns `true`. + /// + /// # Generation Tracking (linux only) + /// + /// The method stamps the current generation number along with the cancellation request. + /// This ensures that: + /// - Stale signals from previous calls are ignored (generation mismatch) + /// - Only the intended guest function call is affected + /// - Multiple rapid kill() calls on the same generation are idempotent + /// + /// # Blocking Behavior + /// + /// This function will block while attempting to deliver the signal to the VCPU thread, + /// retrying until either: + /// - The signal is successfully delivered (VCPU transitions from running to not running) + /// - The VCPU stops running for another reason (e.g., call completes normally) + /// + /// # Returns + /// + /// - `true`: Cancellation request was stamped (kill will take effect) + /// - `false`: No active call, cancellation request was not stamped (no effect) /// /// # Note - /// This function will block for the duration of the time it takes for the vcpu thread to be interrupted. + /// + /// To reliably interrupt a guest call, ensure `kill()` is called while the guest + /// function is actually executing. Calling kill() before call_guest_function() will + /// have no effect. fn kill(&self) -> bool; /// Used by a debugger to interrupt the corresponding sandbox from running. @@ -458,50 +500,167 @@ pub trait InterruptHandle: Debug + Send + Sync { /// Returns true if the corresponding sandbox has been dropped fn dropped(&self) -> bool; + + /// Increment the generation counter for a new guest function call (Linux only). + /// + /// This must be called exactly once at the start of each guest function call, + /// before any VCPU execution begins. The returned generation number will be + /// used throughout the entire guest call, even if the VCPU is run multiple + /// times (due to host function calls, retries, etc.). + /// + /// # Returns + /// + /// The new generation number assigned to this guest function call. + /// + /// # Note + /// + /// This is only called on Linux (KVM/MSHV). Windows uses a different interrupt mechanism. + #[cfg(any(kvm, mshv))] + fn increment_call_generation(&self) -> u64; + + /// Mark that a guest function call is starting. + /// + /// Sets the call_active flag to true, indicating that a guest function call + /// is now in progress. This allows kill() to stamp cancel_requested. + /// + /// Must be called immediately after increment_call_generation() and before + /// any VCPU execution begins. + fn set_call_active(&self); + + /// Mark that a guest function call has completed. + /// + /// Clears the call_active flag, indicating that no guest function call is + /// in progress. After this, kill() will have no effect and will return false. + /// + /// Must be called at the end of call_guest_function_by_name_no_reset(), + /// after the guest call has fully completed (whether successfully or with error). + fn clear_call_active(&self); } #[cfg(any(kvm, mshv))] #[derive(Debug)] pub(super) struct LinuxInterruptHandle { - /// Invariant: vcpu is running => most significant bit (63) of `running` is set. (Neither converse nor inverse is true) + /// Atomic flag combining running state and generation counter. + /// + /// **Bit 63**: VCPU running state (1 = running, 0 = not running) + /// **Bits 0-62**: Generation counter (incremented once per guest function call) + /// + /// # Generation Tracking + /// + /// The generation counter is incremented once at the start of each guest function call + /// and remains constant throughout that call, even if the VCPU is run multiple times + /// (due to host function calls, retries, etc.). This design solves the race condition + /// where a kill() from a previous call could spuriously cancel a new call. /// - /// Additionally, bit 0-62 tracks how many times the VCPU has been run. Incremented each time `run()` is called. + /// ## Why Generations Are Needed /// - /// This prevents an ABA problem where: - /// 1. The VCPU is running (generation N), - /// 2. It gets cancelled, - /// 3. Then quickly restarted (generation N+1), - /// before the original thread has observed that it was cancelled. + /// Consider this scenario WITHOUT generation tracking: + /// 1. Thread A starts guest call 1, VCPU runs + /// 2. Thread B calls kill(), sends signal to Thread A + /// 3. Guest call 1 completes before signal arrives + /// 4. Thread A starts guest call 2, VCPU runs again + /// 5. Stale signal from step 2 arrives and incorrectly cancels call 2 /// - /// Without this generation counter, the interrupt logic might assume the VCPU is still - /// in the *original* run (generation N), see that it's `running`, and re-send the signal. - /// But the new VCPU run (generation N+1) would treat this as a stale signal and ignore it, - /// potentially causing an infinite loop where no effective interrupt is delivered. + /// WITH generation tracking: + /// 1. Thread A starts guest call 1 (generation N), VCPU runs + /// 2. Thread B calls kill(), stamps cancel_requested with generation N + /// 3. Guest call 1 completes, signal may or may not have arrived yet + /// 4. Thread A starts guest call 2 (generation N+1), VCPU runs again + /// 5. If stale signal arrives, signal handler checks: cancel_requested.generation (N) != current generation (N+1) + /// 6. Stale signal is ignored, call 2 continues normally /// - /// Invariant: If the VCPU is running, `run_generation[bit 0-62]` matches the current run's generation. + /// ## Per-Call vs Per-Run Generation + /// + /// It's critical that generation is incremented per GUEST FUNCTION CALL, not per vcpu.run(): + /// - A single guest function call may invoke vcpu.run() multiple times (host calls, retries) + /// - All run() calls within the same guest call must share the same generation + /// - This ensures kill() affects the entire guest function call atomically + /// + /// # Invariants + /// + /// - If VCPU is running: bit 63 is set (neither converse nor inverse holds) + /// - If VCPU is running: bits 0-62 match the current guest call's generation running: AtomicU64, - /// Invariant: vcpu is running => `tid` is the thread on which it is running. - /// Note: multiple vms may have the same `tid`, but at most one vm will have `running` set to true. + + /// Thread ID where the VCPU is currently running. + /// + /// # Invariants + /// + /// - If VCPU is running: tid contains the thread ID of the executing thread + /// - Multiple VMs may share the same tid, but at most one will have running=true tid: AtomicU64, - /// True when an "interruptor" has requested the VM to be cancelled. Set immediately when - /// `kill()` is called, and cleared when the vcpu is no longer running. - /// This is used to - /// 1. make sure stale signals do not interrupt the - /// the wrong vcpu (a vcpu may only be interrupted iff `cancel_requested` is true), - /// 2. ensure that if a vm is killed while a host call is running, - /// the vm will not re-enter the guest after the host call returns. - cancel_requested: AtomicBool, - /// True when the debugger has requested the VM to be interrupted. Set immediately when - /// `kill_from_debugger()` is called, and cleared when the vcpu is no longer running. - /// This is used to make sure stale signals do not interrupt the the wrong vcpu - /// (a vcpu may only be interrupted by a debugger if `debug_interrupt` is true), + + /// Generation-aware cancellation request flag. + /// + /// **Bit 63**: Cancellation requested flag (1 = kill requested, 0 = no kill) + /// **Bits 0-62**: Generation number when cancellation was requested + /// + /// # Purpose + /// + /// This flag serves three critical functions: + /// + /// 1. **Prevent stale signals**: A VCPU may only be interrupted if cancel_requested + /// is set AND the generation matches the current call's generation + /// + /// 2. **Handle host function calls**: If kill() is called while a host function is + /// executing (VCPU not running but call is active), cancel_requested is stamped + /// with the current generation. When the host function returns and the VCPU + /// attempts to re-enter the guest, it will see the cancellation and abort. + /// + /// 3. **Detect stale kills**: If cancel_requested.generation doesn't match the + /// current generation, it's from a previous call and should be ignored + /// + /// # States and Transitions + /// + /// - **No cancellation**: cancel_requested = 0 (bit 63 clear) + /// - **Cancellation for generation N**: cancel_requested = (1 << 63) | N + /// - Signal handler checks: (cancel_requested & 0x7FFFFFFFFFFFFFFF) == current_generation + cancel_requested: AtomicU64, + + /// Flag indicating whether a guest function call is currently in progress. + /// + /// **true**: A guest function call is active (between call start and completion) + /// **false**: No guest function call is active + /// + /// # Purpose + /// + /// This flag prevents kill() from having any effect when called outside of a + /// guest function call. This solves the "kill-in-advance" problem where kill() + /// could be called before a guest function starts and would incorrectly cancel it. + /// + /// # Behavior + /// + /// - Set to true at the start of call_guest_function_by_name_no_reset() + /// - Cleared at the end of call_guest_function_by_name_no_reset() + /// - kill() only stamps cancel_requested if call_active is true + /// - If kill() is called when call_active=false, it returns false and has no effect + /// + /// # Why AtomicBool is Safe + /// + /// Although there's a theoretical race where: + /// 1. Thread A checks call_active (false) + /// 2. Thread B sets call_active (true) and starts guest call + /// 3. Thread A's kill() returns false (no effect) + /// + /// This is acceptable because the generation tracking provides an additional + /// safety layer. Even if a stale kill somehow stamped cancel_requested, the + /// generation mismatch would cause it to be ignored. + call_active: AtomicBool, + + /// Debugger interrupt request flag (GDB only). + /// + /// Set when kill_from_debugger() is called, cleared when VCPU stops running. + /// Used to distinguish debugger interrupts from normal kill() interrupts. #[cfg(gdb)] debug_interrupt: AtomicBool, - /// Whether the corresponding vm is dropped + + /// Whether the corresponding VM has been dropped. dropped: AtomicBool, - /// Retry delay between signals sent to the vcpu thread + + /// Delay between retry attempts when sending signals to the VCPU thread. retry_delay: Duration, - /// The offset of the SIGRTMIN signal used to interrupt the vcpu thread + + /// Offset from SIGRTMIN for the signal used to interrupt the VCPU thread. sig_rt_min_offset: u8, } @@ -509,18 +668,51 @@ pub(super) struct LinuxInterruptHandle { impl LinuxInterruptHandle { const RUNNING_BIT: u64 = 1 << 63; const MAX_GENERATION: u64 = Self::RUNNING_BIT - 1; + const CANCEL_REQUESTED_BIT: u64 = 1 << 63; - // set running to true and increment the generation. Generation will wrap around at `MAX_GENERATION`. - fn set_running_and_increment_generation(&self) -> std::result::Result { + // Set cancel_requested to true with the given generation + fn set_cancel_requested(&self, generation: u64) { + let value = Self::CANCEL_REQUESTED_BIT | (generation & Self::MAX_GENERATION); + self.cancel_requested.store(value, Ordering::Release); + } + + // Clear cancel_requested (reset to no cancellation) + pub(crate) fn clear_cancel_requested(&self) { + self.cancel_requested.store(0, Ordering::Release); + } + + // Check if cancel_requested is set for the given generation + fn is_cancel_requested_for_generation(&self, generation: u64) -> bool { + let raw = self.cancel_requested.load(Ordering::Acquire); + let is_set = raw & Self::CANCEL_REQUESTED_BIT != 0; + let stored_generation = raw & Self::MAX_GENERATION; + is_set && stored_generation == generation + } + + // Increment the generation for a new guest function call. Generation will wrap around at `MAX_GENERATION`. + pub(crate) fn increment_generation(&self) -> u64 { self.running - .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |raw| { - let generation = raw & !Self::RUNNING_BIT; - if generation == Self::MAX_GENERATION { + .fetch_update(Ordering::Release, Ordering::Acquire, |raw| { + let current_generation = raw & !Self::RUNNING_BIT; + let running_bit = raw & Self::RUNNING_BIT; + if current_generation == Self::MAX_GENERATION { // restart generation from 0 - return Some(Self::RUNNING_BIT); + return Some(running_bit); } - Some((generation + 1) | Self::RUNNING_BIT) + Some((current_generation + 1) | running_bit) }) + .map(|raw| (raw & !Self::RUNNING_BIT) + 1) // Return the NEW generation + .unwrap_or(1) // If wrapped, return 1 + } + + // set running to true without incrementing generation + fn set_running_bit(&self) -> u64 { + self.running + .fetch_update(Ordering::Release, Ordering::Acquire, |raw| { + Some(raw | Self::RUNNING_BIT) + }) + .map(|raw| raw & !Self::RUNNING_BIT) // Return the current generation + .unwrap_or(0) } // clear the running bit and return the generation @@ -536,14 +728,34 @@ impl LinuxInterruptHandle { (running, generation) } - fn send_signal(&self) -> bool { + fn send_signal(&self, stamp_generation: bool) -> bool { let signal_number = libc::SIGRTMIN() + self.sig_rt_min_offset as libc::c_int; let mut sent_signal = false; let mut target_generation: Option = None; loop { + + if (!self.call_active.load(Ordering::Acquire)) { + // No active call, so no need to send signal + break; + } + let (running, generation) = self.get_running_and_generation(); + // Stamp generation into cancel_requested if requested and this is the first iteration + // We stamp even when running=false to support killing during host function calls + // The generation tracking will prevent stale kills from affecting new calls + // Only stamp if a call is actually active (call_active=true) + if stamp_generation + && target_generation.is_none() + && self.call_active.load(Ordering::Acquire) + { + self.set_cancel_requested(generation); + target_generation = Some(generation); + } + + // If not running, we've stamped the generation (if requested), so we're done + // This handles the host function call scenario if !running { break; } @@ -570,18 +782,40 @@ impl LinuxInterruptHandle { #[cfg(any(kvm, mshv))] impl InterruptHandle for LinuxInterruptHandle { fn kill(&self) -> bool { - self.cancel_requested.store(true, Ordering::Relaxed); - self.send_signal() + if !(self.call_active.load(Ordering::Acquire)) + { + // No active call, so no effect + return false; + } + + // send_signal will stamp the generation into cancel_requested + // right before sending each signal, ensuring they're always in sync + self.send_signal(true) } #[cfg(gdb)] fn kill_from_debugger(&self) -> bool { self.debug_interrupt.store(true, Ordering::Relaxed); - self.send_signal() + self.send_signal(false) } fn dropped(&self) -> bool { self.dropped.load(Ordering::Relaxed) } + + #[cfg(any(kvm, mshv))] + fn increment_call_generation(&self) -> u64 { + self.increment_generation() + } + + #[cfg(any(kvm, mshv))] + fn set_call_active(&self) { + self.call_active.store(true, Ordering::Release); + } + + #[cfg(any(kvm, mshv))] + fn clear_call_active(&self) { + self.call_active.store(false, Ordering::Release); + } } #[cfg(all(test, any(target_os = "windows", kvm)))] diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs index 6dc02d71f..129d4a938 100644 --- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs +++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs @@ -390,6 +390,13 @@ impl MultiUseSandbox { return_type: ReturnType, args: Vec, ) -> Result { + // Increment generation for this guest function call (Linux only) + #[cfg(any(kvm, mshv))] + let _generation = self.vm.interrupt_handle().increment_call_generation(); + + // Mark that a guest function call is now active + self.vm.interrupt_handle().set_call_active(); + let res = (|| { let estimated_capacity = estimate_flatbuffer_capacity(function_name, &args); @@ -405,6 +412,10 @@ impl MultiUseSandbox { self.mem_mgr.write_guest_function_call(buffer)?; + // Increment generation for this guest function call (Linux only) + #[cfg(any(kvm, mshv))] + self.vm.interrupt_handle().increment_call_generation(); + self.vm.dispatch_call_from_host( self.dispatch_ptr.clone(), #[cfg(gdb)] @@ -440,6 +451,10 @@ impl MultiUseSandbox { if res.is_err() { self.mem_mgr.clear_io_buffers(); } + + // Mark that the guest function call has completed + self.vm.interrupt_handle().clear_call_active(); + res } diff --git a/src/hyperlight_host/tests/integration_test.rs b/src/hyperlight_host/tests/integration_test.rs index 0869a9d5a..0f65f05bf 100644 --- a/src/hyperlight_host/tests/integration_test.rs +++ b/src/hyperlight_host/tests/integration_test.rs @@ -68,7 +68,9 @@ fn interrupt_host_call() { } }); - let result = sandbox.call::("CallHostSpin", ()).unwrap_err(); + let result = sandbox.call::("CallHostSpin", ()); + println!("Result: {:?}", result); + let result = result.unwrap_err(); assert!(matches!(result, HyperlightError::ExecutionCanceledByHost())); thread.join().unwrap(); @@ -105,7 +107,8 @@ fn interrupt_in_progress_guest_call() { thread.join().expect("Thread should finish"); } -/// Makes sure interrupting a vm before the guest call has started also prevents the guest call from being executed +/// Makes sure interrupting a vm before the guest call has started has no effect, +/// but a second kill after the call starts will interrupt it #[test] fn interrupt_guest_call_in_advance() { let mut sbox1: MultiUseSandbox = new_uninit_rust().unwrap().evolve().unwrap(); @@ -114,15 +117,20 @@ fn interrupt_guest_call_in_advance() { let interrupt_handle = sbox1.interrupt_handle(); assert!(!interrupt_handle.dropped()); // not yet dropped - // kill vm before the guest call has started + // First kill before the guest call has started - should have no effect + // Then kill again after a delay to interrupt the actual call let thread = thread::spawn(move || { - assert!(!interrupt_handle.kill()); // should return false since vcpu is not running yet + assert!(!interrupt_handle.kill()); // should return false since no call is active barrier2.wait(); + // Wait a bit for the Spin call to actually start + thread::sleep(Duration::from_millis(100)); + assert!(interrupt_handle.kill()); // this should succeed and interrupt the Spin call barrier2.wait(); // wait here until main thread has dropped the sandbox assert!(interrupt_handle.dropped()); }); - barrier.wait(); // wait until `kill()` is called before starting the guest call + barrier.wait(); // wait until first `kill()` is called before starting the guest call + // The Spin call should be interrupted by the second kill() let res = sbox1.call::("Spin", ()).unwrap_err(); assert!(matches!(res, HyperlightError::ExecutionCanceledByHost())); @@ -857,3 +865,369 @@ fn test_if_guest_is_able_to_get_string_return_values_from_host() { "Guest Function, string added by Host Function".to_string() ); } + +/// Test that monitors CPU time usage and can interrupt a guest based on CPU time limits +/// Uses a pool of 100 sandboxes, 100 threads, and 500 iterations per thread +/// Some sandboxes are expected to complete normally, some are expected to be killed +/// This test makes sure that a reused sandbox is not killed in the case where the previous +/// execution was killed due to CPU time limit but the invocation completed normally before the cancel was processed. +#[test] +fn test_cpu_time_interrupt() { + use std::collections::VecDeque; + use std::mem::MaybeUninit; + use std::sync::Mutex; + use std::sync::atomic::AtomicUsize; + use std::sync::mpsc::channel; + + const POOL_SIZE: usize = 100; + const NUM_THREADS: usize = 100; + const ITERATIONS_PER_THREAD: usize = 500; + + // Create a pool of 100 sandboxes + println!("Creating pool of {} sandboxes...", POOL_SIZE); + let mut sandbox_pool: Vec = Vec::with_capacity(POOL_SIZE); + for i in 0..POOL_SIZE { + let sandbox = new_uninit_rust().unwrap().evolve().unwrap(); + if (i + 1) % 10 == 0 { + println!("Created {}/{} sandboxes", i + 1, POOL_SIZE); + } + sandbox_pool.push(sandbox); + } + + // Wrap the pool in Arc> for thread-safe access + let pool = Arc::new(Mutex::new(VecDeque::from(sandbox_pool))); + + // Counters for statistics + let total_iterations = Arc::new(AtomicUsize::new(0)); + let killed_count = Arc::new(AtomicUsize::new(0)); + let completed_count = Arc::new(AtomicUsize::new(0)); + let errors_count = Arc::new(AtomicUsize::new(0)); + + println!( + "Starting {} threads with {} iterations each...", + NUM_THREADS, ITERATIONS_PER_THREAD + ); + + // Spawn worker threads + let mut thread_handles = vec![]; + for thread_id in 0..NUM_THREADS { + let pool_clone = Arc::clone(&pool); + let total_iterations_clone = Arc::clone(&total_iterations); + let killed_count_clone = Arc::clone(&killed_count); + let completed_count_clone = Arc::clone(&completed_count); + let errors_count_clone = Arc::clone(&errors_count); + + let handle = thread::spawn(move || { + for iteration in 0..ITERATIONS_PER_THREAD { + // === START OF ITERATION === + // Get a fresh sandbox from the pool for this iteration + let mut sandbox = { + let mut pool_guard = pool_clone.lock().unwrap(); + // Wait if pool is empty (shouldn't happen with proper design) + while pool_guard.is_empty() { + drop(pool_guard); + thread::sleep(Duration::from_micros(100)); + pool_guard = pool_clone.lock().unwrap(); + } + pool_guard.pop_front().unwrap() + }; + + // Vary CPU time between 3ms and 7ms to ensure some get killed and some complete + // The CPU limit is 5ms, so: + // - 3-4ms should complete normally + // - 6-7ms should be killed + // - 5ms is borderline and could go either way + let cpu_time_ms = + 3 + (((thread_id * ITERATIONS_PER_THREAD + iteration) % 5) as u32); + + let interrupt_handle = sandbox.interrupt_handle(); + + // Channel to send the thread ID + let (tx, rx) = channel(); + + // Flag to signal monitoring start + let should_monitor = Arc::new(AtomicBool::new(false)); + let should_monitor_clone = should_monitor.clone(); + + // Flag to signal monitoring stop (when guest execution completes) + let stop_monitoring = Arc::new(AtomicBool::new(false)); + let stop_monitoring_clone = stop_monitoring.clone(); + + // Flag to track if we actually sent a kill signal + let was_killed = Arc::new(AtomicBool::new(false)); + let was_killed_clone = was_killed.clone(); + + // Spawn CPU time monitor thread + let monitor_thread = thread::spawn(move || { + let main_thread_id = match rx.recv() { + Ok(tid) => tid, + Err(_) => return, + }; + + while !should_monitor_clone.load(Ordering::Acquire) { + thread::sleep(Duration::from_micros(50)); + } + + #[cfg(target_os = "linux")] + unsafe { + let mut clock_id: libc::clockid_t = 0; + if libc::pthread_getcpuclockid(main_thread_id, &mut clock_id) != 0 { + return; + } + + let cpu_limit_ns = 5_000_000; // 5ms CPU time limit + let mut start_time = MaybeUninit::::uninit(); + + if libc::clock_gettime(clock_id, start_time.as_mut_ptr()) != 0 { + return; + } + let start_time = start_time.assume_init(); + + loop { + // Check if we should stop monitoring (guest completed) + if stop_monitoring_clone.load(Ordering::Acquire) { + break; + } + + let mut current_time = MaybeUninit::::uninit(); + if libc::clock_gettime(clock_id, current_time.as_mut_ptr()) != 0 { + break; + } + let current_time = current_time.assume_init(); + + let elapsed_ns = (current_time.tv_sec - start_time.tv_sec) + * 1_000_000_000 + + (current_time.tv_nsec - start_time.tv_nsec); + + if elapsed_ns > cpu_limit_ns { + // Double-check that monitoring should still continue before killing + // The guest might have completed between our last check and now + if stop_monitoring_clone.load(Ordering::Acquire) { + break; + } + + // Mark that we sent a kill signal BEFORE calling kill + // to avoid race conditions + was_killed_clone.store(true, Ordering::Release); + interrupt_handle.kill(); + break; + } + + thread::sleep(Duration::from_micros(50)); + } + } + + #[cfg(target_os = "windows")] + unsafe { + use std::ffi::c_void; + + // On Windows, we use GetThreadTimes to get CPU time + // main_thread_id is a HANDLE on Windows + let thread_handle = main_thread_id as *mut c_void; + + let cpu_limit_ns: i64 = 1_000_000; // 5ms CPU time limit (in nanoseconds) + + let mut creation_time = + MaybeUninit::::uninit(); + let mut exit_time = + MaybeUninit::::uninit(); + let mut kernel_time_start = + MaybeUninit::::uninit(); + let mut user_time_start = + MaybeUninit::::uninit(); + + // Get initial CPU times + if windows_sys::Win32::System::Threading::GetThreadTimes( + thread_handle, + creation_time.as_mut_ptr(), + exit_time.as_mut_ptr(), + kernel_time_start.as_mut_ptr(), + user_time_start.as_mut_ptr(), + ) == 0 + { + return; + } + + let kernel_time_start = kernel_time_start.assume_init(); + let user_time_start = user_time_start.assume_init(); + + // Convert FILETIME to u64 (100-nanosecond intervals) + let start_cpu_time = ((kernel_time_start.dwHighDateTime as u64) << 32 + | kernel_time_start.dwLowDateTime as u64) + + ((user_time_start.dwHighDateTime as u64) << 32 + | user_time_start.dwLowDateTime as u64); + + loop { + // Check if we should stop monitoring (guest completed) + if stop_monitoring_clone.load(Ordering::Acquire) { + break; + } + + let mut kernel_time_current = + MaybeUninit::::uninit(); + let mut user_time_current = + MaybeUninit::::uninit(); + + if windows_sys::Win32::System::Threading::GetThreadTimes( + thread_handle, + creation_time.as_mut_ptr(), + exit_time.as_mut_ptr(), + kernel_time_current.as_mut_ptr(), + user_time_current.as_mut_ptr(), + ) == 0 + { + break; + } + + let kernel_time_current = kernel_time_current.assume_init(); + let user_time_current = user_time_current.assume_init(); + + // Convert FILETIME to u64 + let current_cpu_time = ((kernel_time_current.dwHighDateTime as u64) + << 32 + | kernel_time_current.dwLowDateTime as u64) + + ((user_time_current.dwHighDateTime as u64) << 32 + | user_time_current.dwLowDateTime as u64); + + // FILETIME is in 100-nanosecond intervals, convert to nanoseconds + let elapsed_ns = ((current_cpu_time - start_cpu_time) * 100) as i64; + + if elapsed_ns > cpu_limit_ns { + // Double-check that monitoring should still continue before killing + // The guest might have completed between our last check and now + if stop_monitoring_clone.load(Ordering::Acquire) { + break; + } + + // Mark that we sent a kill signal BEFORE calling kill + // to avoid race conditions + println!( + "Thread {} iteration {}: CPU time exceeded, sending kill signal", + thread_id, iteration + ); + was_killed_clone.store(true, Ordering::Release); + interrupt_handle.kill(); + break; + } + + thread::sleep(Duration::from_micros(50)); + } + } + }); + + // Send thread ID and start monitoring + #[cfg(target_os = "linux")] + unsafe { + let thread_id = libc::pthread_self(); + let _ = tx.send(thread_id); + } + + #[cfg(target_os = "windows")] + unsafe { + // On Windows, get the current thread's pseudo-handle + let thread_handle = windows_sys::Win32::System::Threading::GetCurrentThread(); + let _ = tx.send(thread_handle as usize); + } + + should_monitor.store(true, Ordering::Release); + + // Call the guest function + let result = sandbox.call::("SpinForMs", cpu_time_ms); + + // Signal the monitor to stop + stop_monitoring.store(true, Ordering::Release); + + // Wait for monitor thread to complete to ensure was_killed flag is set + let _ = monitor_thread.join(); + + // NOW check if we sent a kill signal (after monitor thread has completed) + let kill_was_sent = was_killed.load(Ordering::Acquire); + + // Process the result and validate correctness + match result { + Err(HyperlightError::ExecutionCanceledByHost()) => { + // We received a cancellation error + if !kill_was_sent { + // ERROR: We got a cancellation but never sent a kill! + panic!( + "Thread {} iteration {}: Got ExecutionCanceledByHost but no kill signal was sent!", + thread_id, iteration + ); + } + // This is correct - we sent kill and got the error + killed_count_clone.fetch_add(1, Ordering::Relaxed); + } + Ok(_) => { + // Execution completed normally + // This is OK whether or not we sent a kill - the guest might have + // finished just before the kill signal arrived + completed_count_clone.fetch_add(1, Ordering::Relaxed); + } + Err(e) => { + // Unexpected error + eprintln!( + "Thread {} iteration {}: Unexpected error: {:?}, kill_sent: {}", + thread_id, iteration, e, kill_was_sent + ); + errors_count_clone.fetch_add(1, Ordering::Relaxed); + } + } + + total_iterations_clone.fetch_add(1, Ordering::Relaxed); + + // Progress reporting + let current_total = total_iterations_clone.load(Ordering::Relaxed); + if current_total % 500 == 0 { + println!( + "Progress: {}/{} iterations completed", + current_total, + NUM_THREADS * ITERATIONS_PER_THREAD + ); + } + + // === END OF ITERATION === + // Return sandbox to pool for reuse by other threads/iterations + { + let mut pool_guard = pool_clone.lock().unwrap(); + pool_guard.push_back(sandbox); + } + } + }); + + thread_handles.push(handle); + } + + // Wait for all threads to complete + for handle in thread_handles { + handle.join().unwrap(); + } + + // Print statistics + let total = total_iterations.load(Ordering::Relaxed); + let killed = killed_count.load(Ordering::Relaxed); + let completed = completed_count.load(Ordering::Relaxed); + let errors = errors_count.load(Ordering::Relaxed); + + println!("\n=== Test Statistics ==="); + println!("Total iterations: {}", total); + println!("Killed (CPU limit exceeded): {}", killed); + println!("Completed normally: {}", completed); + println!("Errors: {}", errors); + println!("Kill rate: {:.1}%", (killed as f64 / total as f64) * 100.0); + + // Verify we had both kills and completions + assert!( + killed > 0, + "Expected some executions to be killed, but none were" + ); + assert!( + completed > 0, + "Expected some executions to complete, but none did" + ); + assert_eq!(errors, 0, "Expected no errors, but got {}", errors); + assert_eq!( + total, + NUM_THREADS * ITERATIONS_PER_THREAD, + "Not all iterations completed" + ); +} diff --git a/src/tests/rust_guests/simpleguest/src/main.rs b/src/tests/rust_guests/simpleguest/src/main.rs index bdf8d5f62..a761a1358 100644 --- a/src/tests/rust_guests/simpleguest/src/main.rs +++ b/src/tests/rust_guests/simpleguest/src/main.rs @@ -583,6 +583,39 @@ fn spin(_: &FunctionCall) -> Result> { Ok(get_flatbuffer_result(())) } +/// Spins the CPU for approximately the specified number of milliseconds +fn spin_for_ms(fc: &FunctionCall) -> Result> { + let milliseconds = if let ParameterValue::UInt(ms) = fc.parameters.clone().unwrap()[0].clone() { + ms + } else { + return Err(HyperlightGuestError::new( + ErrorCode::GuestFunctionParameterTypeMismatch, + "Expected UInt parameter".to_string(), + )); + }; + + // Simple busy-wait loop - not precise but good enough for testing + // Different iteration counts for debug vs release mode to ensure reasonable CPU usage + #[cfg(debug_assertions)] + let iterations_per_ms = 120_000; // Debug mode - less optimized, tuned for ~50% kill rate + + #[cfg(not(debug_assertions))] + let iterations_per_ms = 1_000_000; // Release mode - highly optimized + + let total_iterations = milliseconds * iterations_per_ms; + + let mut counter: u64 = 0; + for _ in 0..total_iterations { + // Prevent the compiler from optimizing away the loop + counter = counter.wrapping_add(1); + core::hint::black_box(counter); + } + + // Calculate the actual number of milliseconds spun for, based on the counter and iterations per ms + let ms_spun = counter / iterations_per_ms as u64; + Ok(get_flatbuffer_result(ms_spun)) +} + #[hyperlight_guest_tracing::trace_function] fn test_abort(function_call: &FunctionCall) -> Result> { if let ParameterValue::Int(code) = function_call.parameters.clone().unwrap()[0].clone() { @@ -1366,6 +1399,14 @@ pub extern "C" fn hyperlight_main() { ); register_function(spin_def); + let spin_for_ms_def = GuestFunctionDefinition::new( + "SpinForMs".to_string(), + Vec::from(&[ParameterType::UInt]), + ReturnType::ULong, + spin_for_ms as usize, + ); + register_function(spin_for_ms_def); + let abort_def = GuestFunctionDefinition::new( "GuestAbortWithCode".to_string(), Vec::from(&[ParameterType::Int]),