From d200c7c23c5f14cbb259b11b7011cda473e00c38 Mon Sep 17 00:00:00 2001 From: Simon Davies Date: Thu, 16 Oct 2025 23:53:13 +0100 Subject: [PATCH 01/16] Fixes a race condition in killing Sandboxes Signed-off-by: Simon Davies --- src/hyperlight_host/Cargo.toml | 2 +- .../src/hypervisor/hyperv_linux.rs | 88 ++-- src/hyperlight_host/src/hypervisor/kvm.rs | 88 ++-- src/hyperlight_host/src/hypervisor/mod.rs | 319 ++++++++++++--- .../src/sandbox/initialized_multi_use.rs | 17 + src/hyperlight_host/tests/integration_test.rs | 380 +++++++++++++++++- src/tests/rust_guests/simpleguest/src/main.rs | 39 ++ 7 files changed, 801 insertions(+), 132 deletions(-) diff --git a/src/hyperlight_host/Cargo.toml b/src/hyperlight_host/Cargo.toml index 0b003b20d..acde6b9d6 100644 --- a/src/hyperlight_host/Cargo.toml +++ b/src/hyperlight_host/Cargo.toml @@ -65,7 +65,7 @@ windows = { version = "0.62", features = [ "Win32_System_JobObjects", "Win32_System_SystemServices", ] } -windows-sys = { version = "0.61", features = ["Win32"] } +windows-sys = { version = "0.61", features = ["Win32", "Win32_System", "Win32_System_Threading"] } windows-result = "0.4" rust-embed = { version = "8.7.2", features = ["debug-embed", "include-exclude", "interpolate-folder-path"] } sha256 = "1.6.0" diff --git a/src/hyperlight_host/src/hypervisor/hyperv_linux.rs b/src/hyperlight_host/src/hypervisor/hyperv_linux.rs index 80ee0b16b..42835c16f 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_linux.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_linux.rs @@ -403,7 +403,8 @@ impl HypervLinuxDriver { let interrupt_handle = Arc::new(LinuxInterruptHandle { running: AtomicU64::new(0), - cancel_requested: AtomicBool::new(false), + cancel_requested: AtomicU64::new(0), + call_active: AtomicBool::new(false), #[cfg(gdb)] debug_interrupt: AtomicBool::new(false), #[cfg(all( @@ -669,16 +670,13 @@ impl Hypervisor for HypervLinuxDriver { self.interrupt_handle .tid .store(unsafe { libc::pthread_self() as u64 }, Ordering::Relaxed); - // Note: if a `InterruptHandle::kill()` called while this thread is **here** - // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call - self.interrupt_handle - .set_running_and_increment_generation() - .map_err(|e| { - new_error!( - "Error setting running state and incrementing generation: {}", - e - ) - })?; + // Note: if `InterruptHandle::kill()` is called while this thread is **here** + // (after set_running_bit but before checking cancel_requested): + // - kill() will stamp cancel_requested with the current generation + // - We will check cancel_requested below and skip the VcpuFd::run() call + // - This is the desired behavior - the kill takes effect immediately + let generation = self.interrupt_handle.set_running_bit(); + #[cfg(not(gdb))] let debug_interrupt = false; #[cfg(gdb)] @@ -687,14 +685,16 @@ impl Hypervisor for HypervLinuxDriver { .debug_interrupt .load(Ordering::Relaxed); - // Don't run the vcpu if `cancel_requested` is true + // Don't run the vcpu if `cancel_requested` is set for our generation // - // Note: if a `InterruptHandle::kill()` called while this thread is **here** - // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call + // Note: if `InterruptHandle::kill()` is called while this thread is **here** + // (after checking cancel_requested but before vcpu.run()): + // - kill() will stamp cancel_requested with the current generation + // - We will proceed with vcpu.run(), but signals will be sent to interrupt it + // - The vcpu will be interrupted and return EINTR (handled below) let exit_reason = if self .interrupt_handle - .cancel_requested - .load(Ordering::Relaxed) + .is_cancel_requested_for_generation(generation) || debug_interrupt { Err(mshv_ioctls::MshvError::from(libc::EINTR)) @@ -707,11 +707,12 @@ impl Hypervisor for HypervLinuxDriver { Some(hyperlight_guest_tracing::invariant_tsc::read_tsc()); self.trace_info.guest_start_epoch = Some(std::time::Instant::now()); } - // Note: if a `InterruptHandle::kill()` called while this thread is **here** - // Then the vcpu will run, but we will keep sending signals to this thread - // to interrupt it until `running` is set to false. The `vcpu_fd::run()` call will - // return either normally with an exit reason, or from being "kicked" by out signal handler, with an EINTR error, - // both of which are fine. + // Note: if `InterruptHandle::kill()` is called while this thread is **here** + // (during vcpu.run() execution): + // - kill() stamps cancel_requested with the current generation + // - kill() sends signals (SIGRTMIN+offset) to this thread repeatedly + // - The signal handler is a no-op, but it causes vcpu.run() to return EINTR + // - We check cancel_requested below and return Cancelled if generation matches #[cfg(mshv2)] { let hv_message: hv_message = Default::default(); @@ -720,27 +721,32 @@ impl Hypervisor for HypervLinuxDriver { #[cfg(mshv3)] self.vcpu_fd.run() }; - // Note: if a `InterruptHandle::kill()` called while this thread is **here** - // Then signals will be sent to this thread until `running` is set to false. - // This is fine since the signal handler is a no-op. + // Note: if `InterruptHandle::kill()` is called while this thread is **here** + // (after vcpu.run() returns but before clear_running_bit): + // - kill() continues sending signals to this thread (running bit is still set) + // - The signals are harmless (no-op handler), we just need to check cancel_requested + // - We load cancel_requested below to determine if this run was cancelled let cancel_requested = self .interrupt_handle - .cancel_requested - .load(Ordering::Relaxed); + .is_cancel_requested_for_generation(generation); #[cfg(gdb)] let debug_interrupt = self .interrupt_handle .debug_interrupt .load(Ordering::Relaxed); - // Note: if a `InterruptHandle::kill()` called while this thread is **here** - // Then `cancel_requested` will be set to true again, which will cancel the **next vcpu run**. - // Additionally signals will be sent to this thread until `running` is set to false. - // This is fine since the signal handler is a no-op. + // Note: if `InterruptHandle::kill()` is called while this thread is **here** + // (after loading cancel_requested but before clear_running_bit): + // - kill() stamps cancel_requested with the CURRENT generation (not the one we just loaded) + // - kill() continues sending signals until running bit is cleared + // - The newly stamped cancel_requested will affect the NEXT vcpu.run() call + // - Signals sent now are harmless (no-op handler) self.interrupt_handle.clear_running_bit(); - // At this point, `running` is false so no more signals will be sent to this thread, - // but we may still receive async signals that were sent before this point. - // To prevent those signals from interrupting subsequent calls to `run()`, - // we make sure to check `cancel_requested` before cancelling (see `libc::EINTR` match-arm below). + // At this point, running bit is clear so kill() will stop sending signals. + // However, we may still receive delayed signals that were sent before clear_running_bit. + // These stale signals are harmless because: + // - The signal handler is a no-op + // - We check generation matching in cancel_requested before treating EINTR as cancellation + // - If generation doesn't match, we return Retry instead of Cancelled let result = match exit_reason { Ok(m) => match m.header.message_type { HALT_MESSAGE => { @@ -820,14 +826,16 @@ impl Hypervisor for HypervLinuxDriver { } }, Err(e) => match e.errno() { - // we send a signal to the thread to cancel execution this results in EINTR being returned by KVM so we return Cancelled + // We send a signal (SIGRTMIN+offset) to interrupt the vcpu, which causes EINTR libc::EINTR => { - // If cancellation was not requested for this specific vm, the vcpu was interrupted because of debug interrupt or - // a stale signal that meant to be delivered to a previous/other vcpu on this same thread, so let's ignore it + // Check if cancellation was requested for THIS specific generation. + // If not, the EINTR came from: + // - A debug interrupt (if GDB is enabled) + // - A stale signal from a previous guest call (generation mismatch) + // - A signal meant for a different sandbox on the same thread + // In these cases, we return Retry to continue execution. if cancel_requested { - self.interrupt_handle - .cancel_requested - .store(false, Ordering::Relaxed); + self.interrupt_handle.clear_cancel_requested(); HyperlightExit::Cancelled() } else { #[cfg(gdb)] diff --git a/src/hyperlight_host/src/hypervisor/kvm.rs b/src/hyperlight_host/src/hypervisor/kvm.rs index a42538b33..96b9b3e04 100644 --- a/src/hyperlight_host/src/hypervisor/kvm.rs +++ b/src/hyperlight_host/src/hypervisor/kvm.rs @@ -343,7 +343,8 @@ impl KVMDriver { let interrupt_handle = Arc::new(LinuxInterruptHandle { running: AtomicU64::new(0), - cancel_requested: AtomicBool::new(false), + cancel_requested: AtomicU64::new(0), + call_active: AtomicBool::new(false), #[cfg(gdb)] debug_interrupt: AtomicBool::new(false), #[cfg(all( @@ -627,16 +628,13 @@ impl Hypervisor for KVMDriver { self.interrupt_handle .tid .store(unsafe { libc::pthread_self() as u64 }, Ordering::Relaxed); - // Note: if a `InterruptHandle::kill()` called while this thread is **here** - // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call - self.interrupt_handle - .set_running_and_increment_generation() - .map_err(|e| { - new_error!( - "Error setting running state and incrementing generation: {}", - e - ) - })?; + // Note: if `InterruptHandle::kill()` is called while this thread is **here** + // (after set_running_bit but before checking cancel_requested): + // - kill() will stamp cancel_requested with the current generation + // - We will check cancel_requested below and skip the VcpuFd::run() call + // - This is the desired behavior - the kill takes effect immediately + let generation = self.interrupt_handle.set_running_bit(); + #[cfg(not(gdb))] let debug_interrupt = false; #[cfg(gdb)] @@ -644,14 +642,16 @@ impl Hypervisor for KVMDriver { .interrupt_handle .debug_interrupt .load(Ordering::Relaxed); - // Don't run the vcpu if `cancel_requested` is true + // Don't run the vcpu if `cancel_requested` is set for our generation // - // Note: if a `InterruptHandle::kill()` called while this thread is **here** - // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call + // Note: if `InterruptHandle::kill()` is called while this thread is **here** + // (after checking cancel_requested but before vcpu.run()): + // - kill() will stamp cancel_requested with the current generation + // - We will proceed with vcpu.run(), but signals will be sent to interrupt it + // - The vcpu will be interrupted and return EINTR (handled below) let exit_reason = if self .interrupt_handle - .cancel_requested - .load(Ordering::Relaxed) + .is_cancel_requested_for_generation(generation) || debug_interrupt { Err(kvm_ioctls::Error::new(libc::EINTR)) @@ -665,34 +665,40 @@ impl Hypervisor for KVMDriver { Some(hyperlight_guest_tracing::invariant_tsc::read_tsc()); } - // Note: if a `InterruptHandle::kill()` called while this thread is **here** - // Then the vcpu will run, but we will keep sending signals to this thread - // to interrupt it until `running` is set to false. The `vcpu_fd::run()` call will - // return either normally with an exit reason, or from being "kicked" by out signal handler, with an EINTR error, - // both of which are fine. + // Note: if `InterruptHandle::kill()` is called while this thread is **here** + // (during vcpu.run() execution): + // - kill() stamps cancel_requested with the current generation + // - kill() sends signals (SIGRTMIN+offset) to this thread repeatedly + // - The signal handler is a no-op, but it causes vcpu.run() to return EINTR + // - We check cancel_requested below and return Cancelled if generation matches self.vcpu_fd.run() }; - // Note: if a `InterruptHandle::kill()` called while this thread is **here** - // Then signals will be sent to this thread until `running` is set to false. - // This is fine since the signal handler is a no-op. + // Note: if `InterruptHandle::kill()` is called while this thread is **here** + // (after vcpu.run() returns but before clear_running_bit): + // - kill() continues sending signals to this thread (running bit is still set) + // - The signals are harmless (no-op handler), we just need to check cancel_requested + // - We load cancel_requested below to determine if this run was cancelled let cancel_requested = self .interrupt_handle - .cancel_requested - .load(Ordering::Relaxed); + .is_cancel_requested_for_generation(generation); #[cfg(gdb)] let debug_interrupt = self .interrupt_handle .debug_interrupt .load(Ordering::Relaxed); - // Note: if a `InterruptHandle::kill()` called while this thread is **here** - // Then `cancel_requested` will be set to true again, which will cancel the **next vcpu run**. - // Additionally signals will be sent to this thread until `running` is set to false. - // This is fine since the signal handler is a no-op. + // Note: if `InterruptHandle::kill()` is called while this thread is **here** + // (after loading cancel_requested but before clear_running_bit): + // - kill() stamps cancel_requested with the CURRENT generation (not the one we just loaded) + // - kill() continues sending signals until running bit is cleared + // - The newly stamped cancel_requested will affect the NEXT vcpu.run() call + // - Signals sent now are harmless (no-op handler) self.interrupt_handle.clear_running_bit(); - // At this point, `running` is false so no more signals will be sent to this thread, - // but we may still receive async signals that were sent before this point. - // To prevent those signals from interrupting subsequent calls to `run()` (on other vms!), - // we make sure to check `cancel_requested` before cancelling (see `libc::EINTR` match-arm below). + // At this point, running bit is clear so kill() will stop sending signals. + // However, we may still receive delayed signals that were sent before clear_running_bit. + // These stale signals are harmless because: + // - The signal handler is a no-op + // - We check generation matching in cancel_requested before treating EINTR as cancellation + // - If generation doesn't match, we return Retry instead of Cancelled let result = match exit_reason { Ok(VcpuExit::Hlt) => { crate::debug!("KVM - Halt Details : {:#?}", &self); @@ -741,14 +747,16 @@ impl Hypervisor for KVMDriver { } }, Err(e) => match e.errno() { - // we send a signal to the thread to cancel execution this results in EINTR being returned by KVM so we return Cancelled + // We send a signal (SIGRTMIN+offset) to interrupt the vcpu, which causes EINTR libc::EINTR => { - // If cancellation was not requested for this specific vm, the vcpu was interrupted because of debug interrupt or - // a stale signal that meant to be delivered to a previous/other vcpu on this same thread, so let's ignore it + // Check if cancellation was requested for THIS specific generation. + // If not, the EINTR came from: + // - A debug interrupt (if GDB is enabled) + // - A stale signal from a previous guest call (generation mismatch) + // - A signal meant for a different sandbox on the same thread + // In these cases, we return Retry to continue execution. if cancel_requested { - self.interrupt_handle - .cancel_requested - .store(false, Ordering::Relaxed); + self.interrupt_handle.clear_cancel_requested(); HyperlightExit::Cancelled() } else { #[cfg(gdb)] diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs index 781099134..bbcbf4695 100644 --- a/src/hyperlight_host/src/hypervisor/mod.rs +++ b/src/hyperlight_host/src/hypervisor/mod.rs @@ -435,13 +435,51 @@ impl VirtualCPU { pub trait InterruptHandle: Debug + Send + Sync { /// Interrupt the corresponding sandbox from running. /// - /// - If this is called while the vcpu is running, then it will interrupt the vcpu and return `true`. - /// - If this is called while the vcpu is not running, (for example during a host call), the - /// vcpu will not immediately be interrupted, but will prevent the vcpu from running **the next time** - /// it's scheduled, and returns `false`. + /// This method attempts to cancel a currently executing guest function call by sending + /// a signal to the VCPU thread. It uses generation tracking and call_active flag to + /// ensure the interruption is safe and precise. + /// + /// # Behavior + /// + /// - **Guest function running**: If called while a guest function is executing (VCPU running + /// or in a host function call), this stamps the current generation into cancel_requested + /// and sends a signal to interrupt the VCPU. Returns `true`. + /// + /// - **No active call**: If called when no guest function call is in progress (call_active=false), + /// this has no effect and returns `false`. This prevents "kill-in-advance" where kill() + /// is called before a guest function starts. + /// + /// - **During host function**: If the guest call is currently executing a host function + /// (VCPU not running but call_active=true), this stamps cancel_requested. When the + /// host function returns and attempts to re-enter the guest, the cancellation will + /// be detected and the call will abort. Returns `true`. + /// + /// # Generation Tracking + /// + /// The method stamps the current generation number along with the cancellation request. + /// This ensures that: + /// - Stale signals from previous calls are ignored (generation mismatch) + /// - Only the intended guest function call is affected + /// - Multiple rapid kill() calls on the same generation are idempotent + /// + /// # Blocking Behavior + /// + /// This function will block while attempting to deliver the signal to the VCPU thread, + /// retrying until either: + /// - The signal is successfully delivered (VCPU transitions from running to not running) + /// - The VCPU stops running for another reason (e.g., call completes normally) + /// - No call is active (call_active=false) + /// + /// # Returns + /// + /// - `true`: Cancellation request was stamped (kill will take effect) + /// - `false`: No active call, cancellation request was not stamped (no effect) /// /// # Note - /// This function will block for the duration of the time it takes for the vcpu thread to be interrupted. + /// + /// To reliably interrupt a guest call, ensure `kill()` is called while the guest + /// function is actually executing. Calling kill() before call_guest_function() will + /// have no effect. fn kill(&self) -> bool; /// Used by a debugger to interrupt the corresponding sandbox from running. @@ -458,50 +496,177 @@ pub trait InterruptHandle: Debug + Send + Sync { /// Returns true if the corresponding sandbox has been dropped fn dropped(&self) -> bool; + + /// Increment the generation counter for a new guest function call (Linux only). + /// + /// This must be called exactly once at the start of each guest function call, + /// before any VCPU execution begins. The returned generation number will be + /// used throughout the entire guest call, even if the VCPU is run multiple + /// times (due to host function calls, retries, etc.). + /// + /// # Returns + /// + /// The new generation number assigned to this guest function call. + /// + /// # Note + /// + /// This is only called on Linux (KVM/MSHV). Windows uses a different interrupt mechanism. + #[cfg(any(kvm, mshv))] + fn increment_call_generation(&self) -> u64; + + /// Mark that a guest function call is starting (Linux only). + /// + /// Sets the call_active flag to true, indicating that a guest function call + /// is now in progress. This allows kill() to stamp cancel_requested. + /// + /// Must be called immediately after increment_call_generation() and before + /// any VCPU execution begins. + /// + /// # Note + /// + /// This is only called on Linux (KVM/MSHV). Windows uses a different interrupt mechanism. + #[cfg(any(kvm, mshv))] + fn set_call_active(&self); + + /// Mark that a guest function call has completed (Linux only). + /// + /// Clears the call_active flag, indicating that no guest function call is + /// in progress. After this, kill() will have no effect and will return false. + /// + /// Must be called at the end of call_guest_function_by_name_no_reset(), + /// after the guest call has fully completed (whether successfully or with error). + /// + /// # Note + /// + /// This is only called on Linux (KVM/MSHV). Windows uses a different interrupt mechanism. + #[cfg(any(kvm, mshv))] + fn clear_call_active(&self); } #[cfg(any(kvm, mshv))] #[derive(Debug)] pub(super) struct LinuxInterruptHandle { - /// Invariant: vcpu is running => most significant bit (63) of `running` is set. (Neither converse nor inverse is true) + /// Atomic flag combining running state and generation counter. + /// + /// **Bit 63**: VCPU running state (1 = running, 0 = not running) + /// **Bits 0-62**: Generation counter (incremented once per guest function call) + /// + /// # Generation Tracking + /// + /// The generation counter is incremented once at the start of each guest function call + /// and remains constant throughout that call, even if the VCPU is run multiple times + /// (due to host function calls, retries, etc.). This design solves the race condition + /// where a kill() from a previous call could spuriously cancel a new call. /// - /// Additionally, bit 0-62 tracks how many times the VCPU has been run. Incremented each time `run()` is called. + /// ## Why Generations Are Needed /// - /// This prevents an ABA problem where: - /// 1. The VCPU is running (generation N), - /// 2. It gets cancelled, - /// 3. Then quickly restarted (generation N+1), - /// before the original thread has observed that it was cancelled. + /// Consider this scenario WITHOUT generation tracking: + /// 1. Thread A starts guest call 1, VCPU runs + /// 2. Thread B calls kill(), sends signal to Thread A + /// 3. Guest call 1 completes before signal arrives + /// 4. Thread A starts guest call 2, VCPU runs again + /// 5. Stale signal from step 2 arrives and incorrectly cancels call 2 /// - /// Without this generation counter, the interrupt logic might assume the VCPU is still - /// in the *original* run (generation N), see that it's `running`, and re-send the signal. - /// But the new VCPU run (generation N+1) would treat this as a stale signal and ignore it, - /// potentially causing an infinite loop where no effective interrupt is delivered. + /// WITH generation tracking: + /// 1. Thread A starts guest call 1 (generation N), VCPU runs + /// 2. Thread B calls kill(), stamps cancel_requested with generation N + /// 3. Guest call 1 completes, signal may or may not have arrived yet + /// 4. Thread A starts guest call 2 (generation N+1), VCPU runs again + /// 5. If stale signal arrives, signal handler checks: cancel_requested.generation (N) != current generation (N+1) + /// 6. Stale signal is ignored, call 2 continues normally /// - /// Invariant: If the VCPU is running, `run_generation[bit 0-62]` matches the current run's generation. + /// ## Per-Call vs Per-Run Generation + /// + /// It's critical that generation is incremented per GUEST FUNCTION CALL, not per vcpu.run(): + /// - A single guest function call may invoke vcpu.run() multiple times (host calls, retries) + /// - All run() calls within the same guest call must share the same generation + /// - This ensures kill() affects the entire guest function call atomically + /// + /// # Invariants + /// + /// - If VCPU is running: bit 63 is set (neither converse nor inverse holds) + /// - If VCPU is running: bits 0-62 match the current guest call's generation running: AtomicU64, - /// Invariant: vcpu is running => `tid` is the thread on which it is running. - /// Note: multiple vms may have the same `tid`, but at most one vm will have `running` set to true. + + /// Thread ID where the VCPU is currently running. + /// + /// # Invariants + /// + /// - If VCPU is running: tid contains the thread ID of the executing thread + /// - Multiple VMs may share the same tid, but at most one will have running=true tid: AtomicU64, - /// True when an "interruptor" has requested the VM to be cancelled. Set immediately when - /// `kill()` is called, and cleared when the vcpu is no longer running. - /// This is used to - /// 1. make sure stale signals do not interrupt the - /// the wrong vcpu (a vcpu may only be interrupted iff `cancel_requested` is true), - /// 2. ensure that if a vm is killed while a host call is running, - /// the vm will not re-enter the guest after the host call returns. - cancel_requested: AtomicBool, - /// True when the debugger has requested the VM to be interrupted. Set immediately when - /// `kill_from_debugger()` is called, and cleared when the vcpu is no longer running. - /// This is used to make sure stale signals do not interrupt the the wrong vcpu - /// (a vcpu may only be interrupted by a debugger if `debug_interrupt` is true), + + /// Generation-aware cancellation request flag. + /// + /// **Bit 63**: Cancellation requested flag (1 = kill requested, 0 = no kill) + /// **Bits 0-62**: Generation number when cancellation was requested + /// + /// # Purpose + /// + /// This flag serves three critical functions: + /// + /// 1. **Prevent stale signals**: A VCPU may only be interrupted if cancel_requested + /// is set AND the generation matches the current call's generation + /// + /// 2. **Handle host function calls**: If kill() is called while a host function is + /// executing (VCPU not running but call is active), cancel_requested is stamped + /// with the current generation. When the host function returns and the VCPU + /// attempts to re-enter the guest, it will see the cancellation and abort. + /// + /// 3. **Detect stale kills**: If cancel_requested.generation doesn't match the + /// current generation, it's from a previous call and should be ignored + /// + /// # States and Transitions + /// + /// - **No cancellation**: cancel_requested = 0 (bit 63 clear) + /// - **Cancellation for generation N**: cancel_requested = (1 << 63) | N + /// - Signal handler checks: (cancel_requested & 0x7FFFFFFFFFFFFFFF) == current_generation + cancel_requested: AtomicU64, + + /// Flag indicating whether a guest function call is currently in progress. + /// + /// **true**: A guest function call is active (between call start and completion) + /// **false**: No guest function call is active + /// + /// # Purpose + /// + /// This flag prevents kill() from having any effect when called outside of a + /// guest function call. This solves the "kill-in-advance" problem where kill() + /// could be called before a guest function starts and would incorrectly cancel it. + /// + /// # Behavior + /// + /// - Set to true at the start of call_guest_function_by_name_no_reset() + /// - Cleared at the end of call_guest_function_by_name_no_reset() + /// - kill() only stamps cancel_requested if call_active is true + /// - If kill() is called when call_active=false, it returns false and has no effect + /// + /// # Why AtomicBool is Safe + /// + /// Although there's a theoretical race where: + /// 1. Thread A checks call_active (false) + /// 2. Thread B sets call_active (true) and starts guest call + /// 3. Thread A's kill() returns false (no effect) + /// + /// This is acceptable because the generation tracking provides an additional + /// safety layer. Even if a stale kill somehow stamped cancel_requested, the + /// generation mismatch would cause it to be ignored. + call_active: AtomicBool, + + /// Debugger interrupt request flag (GDB only). + /// + /// Set when kill_from_debugger() is called, cleared when VCPU stops running. + /// Used to distinguish debugger interrupts from normal kill() interrupts. #[cfg(gdb)] debug_interrupt: AtomicBool, - /// Whether the corresponding vm is dropped + + /// Whether the corresponding VM has been dropped. dropped: AtomicBool, - /// Retry delay between signals sent to the vcpu thread + + /// Delay between retry attempts when sending signals to the VCPU thread. retry_delay: Duration, - /// The offset of the SIGRTMIN signal used to interrupt the vcpu thread + + /// Offset from SIGRTMIN for the signal used to interrupt the VCPU thread. sig_rt_min_offset: u8, } @@ -509,18 +674,51 @@ pub(super) struct LinuxInterruptHandle { impl LinuxInterruptHandle { const RUNNING_BIT: u64 = 1 << 63; const MAX_GENERATION: u64 = Self::RUNNING_BIT - 1; + const CANCEL_REQUESTED_BIT: u64 = 1 << 63; + + // Set cancel_requested to true with the given generation + fn set_cancel_requested(&self, generation: u64) { + let value = Self::CANCEL_REQUESTED_BIT | (generation & Self::MAX_GENERATION); + self.cancel_requested.store(value, Ordering::Release); + } + + // Clear cancel_requested (reset to no cancellation) + pub(crate) fn clear_cancel_requested(&self) { + self.cancel_requested.store(0, Ordering::Release); + } - // set running to true and increment the generation. Generation will wrap around at `MAX_GENERATION`. - fn set_running_and_increment_generation(&self) -> std::result::Result { + // Check if cancel_requested is set for the given generation + fn is_cancel_requested_for_generation(&self, generation: u64) -> bool { + let raw = self.cancel_requested.load(Ordering::Acquire); + let is_set = raw & Self::CANCEL_REQUESTED_BIT != 0; + let stored_generation = raw & Self::MAX_GENERATION; + is_set && stored_generation == generation + } + + // Increment the generation for a new guest function call. Generation will wrap around at `MAX_GENERATION`. + pub(crate) fn increment_generation(&self) -> u64 { self.running - .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |raw| { - let generation = raw & !Self::RUNNING_BIT; - if generation == Self::MAX_GENERATION { + .fetch_update(Ordering::Release, Ordering::Acquire, |raw| { + let current_generation = raw & !Self::RUNNING_BIT; + let running_bit = raw & Self::RUNNING_BIT; + if current_generation == Self::MAX_GENERATION { // restart generation from 0 - return Some(Self::RUNNING_BIT); + return Some(running_bit); } - Some((generation + 1) | Self::RUNNING_BIT) + Some((current_generation + 1) | running_bit) }) + .map(|raw| (raw & !Self::RUNNING_BIT) + 1) // Return the NEW generation + .unwrap_or(1) // If wrapped, return 1 + } + + // set running to true without incrementing generation + fn set_running_bit(&self) -> u64 { + self.running + .fetch_update(Ordering::Release, Ordering::Acquire, |raw| { + Some(raw | Self::RUNNING_BIT) + }) + .map(|raw| raw & !Self::RUNNING_BIT) // Return the current generation + .unwrap_or(0) } // clear the running bit and return the generation @@ -536,7 +734,7 @@ impl LinuxInterruptHandle { (running, generation) } - fn send_signal(&self) -> bool { + fn send_signal(&self, stamp_generation: bool) -> bool { let signal_number = libc::SIGRTMIN() + self.sig_rt_min_offset as libc::c_int; let mut sent_signal = false; let mut target_generation: Option = None; @@ -544,6 +742,20 @@ impl LinuxInterruptHandle { loop { let (running, generation) = self.get_running_and_generation(); + // Stamp generation into cancel_requested if requested and this is the first iteration + // We stamp even when running=false to support killing during host function calls + // The generation tracking will prevent stale kills from affecting new calls + // Only stamp if a call is actually active (call_active=true) + if stamp_generation + && target_generation.is_none() + && self.call_active.load(Ordering::Acquire) + { + self.set_cancel_requested(generation); + target_generation = Some(generation); + } + + // If not running, we've stamped the generation (if requested), so we're done + // This handles the host function call scenario if !running { break; } @@ -570,18 +782,33 @@ impl LinuxInterruptHandle { #[cfg(any(kvm, mshv))] impl InterruptHandle for LinuxInterruptHandle { fn kill(&self) -> bool { - self.cancel_requested.store(true, Ordering::Relaxed); - - self.send_signal() + // send_signal will stamp the generation into cancel_requested + // right before sending each signal, ensuring they're always in sync + self.send_signal(true) } #[cfg(gdb)] fn kill_from_debugger(&self) -> bool { self.debug_interrupt.store(true, Ordering::Relaxed); - self.send_signal() + self.send_signal(false) } fn dropped(&self) -> bool { self.dropped.load(Ordering::Relaxed) } + + #[cfg(any(kvm, mshv))] + fn increment_call_generation(&self) -> u64 { + self.increment_generation() + } + + #[cfg(any(kvm, mshv))] + fn set_call_active(&self) { + self.call_active.store(true, Ordering::Release); + } + + #[cfg(any(kvm, mshv))] + fn clear_call_active(&self) { + self.call_active.store(false, Ordering::Release); + } } #[cfg(all(test, any(target_os = "windows", kvm)))] diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs index 6dc02d71f..6dcb1508b 100644 --- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs +++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs @@ -390,6 +390,14 @@ impl MultiUseSandbox { return_type: ReturnType, args: Vec, ) -> Result { + // Increment generation for this guest function call (Linux only) + #[cfg(any(kvm, mshv))] + let _generation = self.vm.interrupt_handle().increment_call_generation(); + + // Mark that a guest function call is now active (Linux only) + #[cfg(any(kvm, mshv))] + self.vm.interrupt_handle().set_call_active(); + let res = (|| { let estimated_capacity = estimate_flatbuffer_capacity(function_name, &args); @@ -405,6 +413,10 @@ impl MultiUseSandbox { self.mem_mgr.write_guest_function_call(buffer)?; + // Increment generation for this guest function call (Linux only) + #[cfg(any(kvm, mshv))] + self.vm.interrupt_handle().increment_call_generation(); + self.vm.dispatch_call_from_host( self.dispatch_ptr.clone(), #[cfg(gdb)] @@ -440,6 +452,11 @@ impl MultiUseSandbox { if res.is_err() { self.mem_mgr.clear_io_buffers(); } + + // Mark that the guest function call has completed (Linux only) + #[cfg(any(kvm, mshv))] + self.vm.interrupt_handle().clear_call_active(); + res } diff --git a/src/hyperlight_host/tests/integration_test.rs b/src/hyperlight_host/tests/integration_test.rs index 0869a9d5a..ed204f712 100644 --- a/src/hyperlight_host/tests/integration_test.rs +++ b/src/hyperlight_host/tests/integration_test.rs @@ -68,7 +68,9 @@ fn interrupt_host_call() { } }); - let result = sandbox.call::("CallHostSpin", ()).unwrap_err(); + let result = sandbox.call::("CallHostSpin", ()); + println!("Result: {:?}", result); + let result = result.unwrap_err(); assert!(matches!(result, HyperlightError::ExecutionCanceledByHost())); thread.join().unwrap(); @@ -105,7 +107,8 @@ fn interrupt_in_progress_guest_call() { thread.join().expect("Thread should finish"); } -/// Makes sure interrupting a vm before the guest call has started also prevents the guest call from being executed +/// Makes sure interrupting a vm before the guest call has started has no effect, +/// but a second kill after the call starts will interrupt it #[test] fn interrupt_guest_call_in_advance() { let mut sbox1: MultiUseSandbox = new_uninit_rust().unwrap().evolve().unwrap(); @@ -114,15 +117,20 @@ fn interrupt_guest_call_in_advance() { let interrupt_handle = sbox1.interrupt_handle(); assert!(!interrupt_handle.dropped()); // not yet dropped - // kill vm before the guest call has started + // First kill before the guest call has started - should have no effect + // Then kill again after a delay to interrupt the actual call let thread = thread::spawn(move || { - assert!(!interrupt_handle.kill()); // should return false since vcpu is not running yet + assert!(!interrupt_handle.kill()); // should return false since no call is active barrier2.wait(); + // Wait a bit for the Spin call to actually start + thread::sleep(Duration::from_millis(100)); + assert!(interrupt_handle.kill()); // this should succeed and interrupt the Spin call barrier2.wait(); // wait here until main thread has dropped the sandbox assert!(interrupt_handle.dropped()); }); - barrier.wait(); // wait until `kill()` is called before starting the guest call + barrier.wait(); // wait until first `kill()` is called before starting the guest call + // The Spin call should be interrupted by the second kill() let res = sbox1.call::("Spin", ()).unwrap_err(); assert!(matches!(res, HyperlightError::ExecutionCanceledByHost())); @@ -857,3 +865,365 @@ fn test_if_guest_is_able_to_get_string_return_values_from_host() { "Guest Function, string added by Host Function".to_string() ); } + +/// Test that monitors CPU time usage and can interrupt a guest based on CPU time limits +/// Uses a pool of 100 sandboxes, 100 threads, and 500 iterations per thread +/// Some sandboxes are expected to complete normally, some are expected to be killed +/// This test makes sure that a reused sandbox is not killed in the case where the previous +/// execution was killed due to CPU time limit but the invocation completed normally before the cancel was processed. +#[test] +fn test_cpu_time_interrupt() { + use std::collections::VecDeque; + use std::mem::MaybeUninit; + use std::sync::Mutex; + use std::sync::atomic::AtomicUsize; + use std::sync::mpsc::channel; + + const POOL_SIZE: usize = 100; + const NUM_THREADS: usize = 100; + const ITERATIONS_PER_THREAD: usize = 500; + + // Create a pool of 100 sandboxes + println!("Creating pool of {} sandboxes...", POOL_SIZE); + let mut sandbox_pool: Vec = Vec::with_capacity(POOL_SIZE); + for i in 0..POOL_SIZE { + let sandbox = new_uninit_rust().unwrap().evolve().unwrap(); + if (i + 1) % 10 == 0 { + println!("Created {}/{} sandboxes", i + 1, POOL_SIZE); + } + sandbox_pool.push(sandbox); + } + + // Wrap the pool in Arc> for thread-safe access + let pool = Arc::new(Mutex::new(VecDeque::from(sandbox_pool))); + + // Counters for statistics + let total_iterations = Arc::new(AtomicUsize::new(0)); + let killed_count = Arc::new(AtomicUsize::new(0)); + let completed_count = Arc::new(AtomicUsize::new(0)); + let errors_count = Arc::new(AtomicUsize::new(0)); + + println!( + "Starting {} threads with {} iterations each...", + NUM_THREADS, ITERATIONS_PER_THREAD + ); + + // Spawn worker threads + let mut thread_handles = vec![]; + for thread_id in 0..NUM_THREADS { + let pool_clone = Arc::clone(&pool); + let total_iterations_clone = Arc::clone(&total_iterations); + let killed_count_clone = Arc::clone(&killed_count); + let completed_count_clone = Arc::clone(&completed_count); + let errors_count_clone = Arc::clone(&errors_count); + + let handle = thread::spawn(move || { + for iteration in 0..ITERATIONS_PER_THREAD { + // === START OF ITERATION === + // Get a fresh sandbox from the pool for this iteration + let mut sandbox = { + let mut pool_guard = pool_clone.lock().unwrap(); + // Wait if pool is empty (shouldn't happen with proper design) + while pool_guard.is_empty() { + drop(pool_guard); + thread::sleep(Duration::from_micros(100)); + pool_guard = pool_clone.lock().unwrap(); + } + pool_guard.pop_front().unwrap() + }; + + // Vary CPU time between 3ms and 7ms to ensure some get killed and some complete + // The CPU limit is 5ms, so: + // - 3-4ms should complete normally + // - 6-7ms should be killed + // - 5ms is borderline and could go either way + let cpu_time_ms = + 3 + (((thread_id * ITERATIONS_PER_THREAD + iteration) % 5) as u32); + + let interrupt_handle = sandbox.interrupt_handle(); + + // Channel to send the thread ID + let (tx, rx) = channel(); + + // Flag to signal monitoring start + let should_monitor = Arc::new(AtomicBool::new(false)); + let should_monitor_clone = should_monitor.clone(); + + // Flag to signal monitoring stop (when guest execution completes) + let stop_monitoring = Arc::new(AtomicBool::new(false)); + let stop_monitoring_clone = stop_monitoring.clone(); + + // Flag to track if we actually sent a kill signal + let was_killed = Arc::new(AtomicBool::new(false)); + let was_killed_clone = was_killed.clone(); + + // Spawn CPU time monitor thread + let monitor_thread = thread::spawn(move || { + let main_thread_id = match rx.recv() { + Ok(tid) => tid, + Err(_) => return, + }; + + while !should_monitor_clone.load(Ordering::Acquire) { + thread::sleep(Duration::from_micros(50)); + } + + #[cfg(target_os = "linux")] + unsafe { + let mut clock_id: libc::clockid_t = 0; + if libc::pthread_getcpuclockid(main_thread_id, &mut clock_id) != 0 { + return; + } + + let cpu_limit_ns = 5_000_000; // 5ms CPU time limit + let mut start_time = MaybeUninit::::uninit(); + + if libc::clock_gettime(clock_id, start_time.as_mut_ptr()) != 0 { + return; + } + let start_time = start_time.assume_init(); + + loop { + // Check if we should stop monitoring (guest completed) + if stop_monitoring_clone.load(Ordering::Acquire) { + break; + } + + let mut current_time = MaybeUninit::::uninit(); + if libc::clock_gettime(clock_id, current_time.as_mut_ptr()) != 0 { + break; + } + let current_time = current_time.assume_init(); + + let elapsed_ns = (current_time.tv_sec - start_time.tv_sec) + * 1_000_000_000 + + (current_time.tv_nsec - start_time.tv_nsec); + + if elapsed_ns > cpu_limit_ns { + // Double-check that monitoring should still continue before killing + // The guest might have completed between our last check and now + if stop_monitoring_clone.load(Ordering::Acquire) { + break; + } + + // Mark that we sent a kill signal BEFORE calling kill + // to avoid race conditions + was_killed_clone.store(true, Ordering::Release); + interrupt_handle.kill(); + break; + } + + thread::sleep(Duration::from_micros(50)); + } + } + + #[cfg(target_os = "windows")] + unsafe { + use std::ffi::c_void; + + // On Windows, we use GetThreadTimes to get CPU time + // main_thread_id is a HANDLE on Windows + let thread_handle = main_thread_id as *mut c_void; + + let cpu_limit_ns: i64 = 5_000_000; // 5ms CPU time limit (in nanoseconds) + + let mut creation_time = + MaybeUninit::::uninit(); + let mut exit_time = + MaybeUninit::::uninit(); + let mut kernel_time_start = + MaybeUninit::::uninit(); + let mut user_time_start = + MaybeUninit::::uninit(); + + // Get initial CPU times + if windows_sys::Win32::System::Threading::GetThreadTimes( + thread_handle, + creation_time.as_mut_ptr(), + exit_time.as_mut_ptr(), + kernel_time_start.as_mut_ptr(), + user_time_start.as_mut_ptr(), + ) == 0 + { + return; + } + + let kernel_time_start = kernel_time_start.assume_init(); + let user_time_start = user_time_start.assume_init(); + + // Convert FILETIME to u64 (100-nanosecond intervals) + let start_cpu_time = ((kernel_time_start.dwHighDateTime as u64) << 32 + | kernel_time_start.dwLowDateTime as u64) + + ((user_time_start.dwHighDateTime as u64) << 32 + | user_time_start.dwLowDateTime as u64); + + loop { + // Check if we should stop monitoring (guest completed) + if stop_monitoring_clone.load(Ordering::Acquire) { + break; + } + + let mut kernel_time_current = + MaybeUninit::::uninit(); + let mut user_time_current = + MaybeUninit::::uninit(); + + if windows_sys::Win32::System::Threading::GetThreadTimes( + thread_handle, + creation_time.as_mut_ptr(), + exit_time.as_mut_ptr(), + kernel_time_current.as_mut_ptr(), + user_time_current.as_mut_ptr(), + ) == 0 + { + break; + } + + let kernel_time_current = kernel_time_current.assume_init(); + let user_time_current = user_time_current.assume_init(); + + // Convert FILETIME to u64 + let current_cpu_time = ((kernel_time_current.dwHighDateTime as u64) + << 32 + | kernel_time_current.dwLowDateTime as u64) + + ((user_time_current.dwHighDateTime as u64) << 32 + | user_time_current.dwLowDateTime as u64); + + // FILETIME is in 100-nanosecond intervals, convert to nanoseconds + let elapsed_ns = ((current_cpu_time - start_cpu_time) * 100) as i64; + + if elapsed_ns > cpu_limit_ns { + // Double-check that monitoring should still continue before killing + // The guest might have completed between our last check and now + if stop_monitoring_clone.load(Ordering::Acquire) { + break; + } + + // Mark that we sent a kill signal BEFORE calling kill + // to avoid race conditions + was_killed_clone.store(true, Ordering::Release); + interrupt_handle.kill(); + break; + } + + thread::sleep(Duration::from_micros(50)); + } + } + }); + + // Send thread ID and start monitoring + #[cfg(target_os = "linux")] + unsafe { + let thread_id = libc::pthread_self(); + let _ = tx.send(thread_id); + } + + #[cfg(target_os = "windows")] + unsafe { + // On Windows, get the current thread's pseudo-handle + let thread_handle = windows_sys::Win32::System::Threading::GetCurrentThread(); + let _ = tx.send(thread_handle as usize); + } + + should_monitor.store(true, Ordering::Release); + + // Call the guest function + let result = sandbox.call::("SpinForMs", cpu_time_ms); + + // Signal the monitor to stop + stop_monitoring.store(true, Ordering::Release); + + // Wait for monitor thread to complete to ensure was_killed flag is set + let _ = monitor_thread.join(); + + // NOW check if we sent a kill signal (after monitor thread has completed) + let kill_was_sent = was_killed.load(Ordering::Acquire); + + // Process the result and validate correctness + match result { + Err(HyperlightError::ExecutionCanceledByHost()) => { + // We received a cancellation error + if !kill_was_sent { + // ERROR: We got a cancellation but never sent a kill! + panic!( + "Thread {} iteration {}: Got ExecutionCanceledByHost but no kill signal was sent!", + thread_id, iteration + ); + } + // This is correct - we sent kill and got the error + killed_count_clone.fetch_add(1, Ordering::Relaxed); + } + Ok(_) => { + // Execution completed normally + // This is OK whether or not we sent a kill - the guest might have + // finished just before the kill signal arrived + completed_count_clone.fetch_add(1, Ordering::Relaxed); + } + Err(e) => { + // Unexpected error + eprintln!( + "Thread {} iteration {}: Unexpected error: {:?}, kill_sent: {}", + thread_id, iteration, e, kill_was_sent + ); + errors_count_clone.fetch_add(1, Ordering::Relaxed); + } + } + + total_iterations_clone.fetch_add(1, Ordering::Relaxed); + + // Progress reporting + let current_total = total_iterations_clone.load(Ordering::Relaxed); + if current_total % 500 == 0 { + println!( + "Progress: {}/{} iterations completed", + current_total, + NUM_THREADS * ITERATIONS_PER_THREAD + ); + } + + // === END OF ITERATION === + // Return sandbox to pool for reuse by other threads/iterations + { + let mut pool_guard = pool_clone.lock().unwrap(); + pool_guard.push_back(sandbox); + } + } + }); + + thread_handles.push(handle); + } + + // Wait for all threads to complete + for handle in thread_handles { + handle.join().unwrap(); + } + + // Print statistics + let total = total_iterations.load(Ordering::Relaxed); + let killed = killed_count.load(Ordering::Relaxed); + let completed = completed_count.load(Ordering::Relaxed); + let errors = errors_count.load(Ordering::Relaxed); + + println!("\n=== Test Statistics ==="); + println!("Total iterations: {}", total); + println!("Killed (CPU limit exceeded): {}", killed); + println!("Completed normally: {}", completed); + println!("Errors: {}", errors); + println!("Kill rate: {:.1}%", (killed as f64 / total as f64) * 100.0); + + // Verify we had both kills and completions + assert!( + killed > 0, + "Expected some executions to be killed, but none were" + ); + assert!( + completed > 0, + "Expected some executions to complete, but none did" + ); + assert_eq!(errors, 0, "Expected no errors, but got {}", errors); + assert_eq!( + total, + NUM_THREADS * ITERATIONS_PER_THREAD, + "Not all iterations completed" + ); +} diff --git a/src/tests/rust_guests/simpleguest/src/main.rs b/src/tests/rust_guests/simpleguest/src/main.rs index bdf8d5f62..05d75a512 100644 --- a/src/tests/rust_guests/simpleguest/src/main.rs +++ b/src/tests/rust_guests/simpleguest/src/main.rs @@ -583,6 +583,37 @@ fn spin(_: &FunctionCall) -> Result> { Ok(get_flatbuffer_result(())) } +/// Spins the CPU for approximately the specified number of milliseconds +fn spin_for_ms(fc: &FunctionCall) -> Result> { + let milliseconds = if let ParameterValue::UInt(ms) = fc.parameters.clone().unwrap()[0].clone() { + ms + } else { + return Err(HyperlightGuestError::new( + ErrorCode::GuestFunctionParameterTypeMismatch, + "Expected UInt parameter".to_string(), + )); + }; + + // Simple busy-wait loop - not precise but good enough for testing + // Different iteration counts for debug vs release mode to ensure reasonable CPU usage + #[cfg(debug_assertions)] + let iterations_per_ms = 120_000; // Debug mode - less optimized, tuned for ~50% kill rate + + #[cfg(not(debug_assertions))] + let iterations_per_ms = 1_000_000; // Release mode - highly optimized + + let total_iterations = milliseconds * iterations_per_ms; + + let mut counter: u64 = 0; + for _ in 0..total_iterations { + // Prevent the compiler from optimizing away the loop + counter = counter.wrapping_add(1); + core::hint::black_box(counter); + } + + Ok(get_flatbuffer_result(counter)) +} + #[hyperlight_guest_tracing::trace_function] fn test_abort(function_call: &FunctionCall) -> Result> { if let ParameterValue::Int(code) = function_call.parameters.clone().unwrap()[0].clone() { @@ -1366,6 +1397,14 @@ pub extern "C" fn hyperlight_main() { ); register_function(spin_def); + let spin_for_ms_def = GuestFunctionDefinition::new( + "SpinForMs".to_string(), + Vec::from(&[ParameterType::UInt]), + ReturnType::ULong, + spin_for_ms as usize, + ); + register_function(spin_for_ms_def); + let abort_def = GuestFunctionDefinition::new( "GuestAbortWithCode".to_string(), Vec::from(&[ParameterType::Int]), From a1c7acb4e81f2dc08424d6502fcf61dd12a9c2a9 Mon Sep 17 00:00:00 2001 From: James Sturtevant Date: Fri, 17 Oct 2025 15:14:15 -0700 Subject: [PATCH 02/16] Do not set canceled on windows if vm is not running Signed-off-by: James Sturtevant --- src/hyperlight_host/src/hypervisor/hyperv_windows.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs index 490e1f72e..0eb2424da 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs @@ -963,9 +963,15 @@ pub struct WindowsInterruptHandle { impl InterruptHandle for WindowsInterruptHandle { fn kill(&self) -> bool { + // don't send the signal if the the vm isn't running + // In the case this is called before the vm is running the cancel_requested would be set + // and stay set while the vm is running. + let running = self.running.load(Ordering::Relaxed); + if !running { + return false; + } self.cancel_requested.store(true, Ordering::Relaxed); - self.running.load(Ordering::Relaxed) - && unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() } + unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() } } #[cfg(gdb)] fn kill_from_debugger(&self) -> bool { From 860bda32d7f99951409bc64ae6b1a546aba6057f Mon Sep 17 00:00:00 2001 From: James Sturtevant Date: Fri, 17 Oct 2025 16:17:45 -0700 Subject: [PATCH 03/16] Should retry if Windows platform does cancelation and it wasn't requested by us Signed-off-by: James Sturtevant --- .../src/hypervisor/hyperv_windows.rs | 26 +++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs index 0eb2424da..8c59ff1df 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs @@ -581,6 +581,8 @@ impl Hypervisor for HypervWindowsDriver { } self.processor.run()? }; + let cancel_was_requested_manually = self.interrupt_handle + .cancel_requested.load(Ordering::Relaxed); self.interrupt_handle .cancel_requested .store(false, Ordering::Relaxed); @@ -663,11 +665,31 @@ impl Hypervisor for HypervWindowsDriver { // and resume execution HyperlightExit::Debug(VcpuStopReason::Interrupt) } else { - HyperlightExit::Cancelled() + if !cancel_was_requested_manually { + // This was an internal cancellation + // The virtualization stack can use this function to return the control + // of a virtual processor back to the virtualization stack in case it + // needs to change the state of a VM or to inject an event into the processor + debug!("Internal cancellation detected, returning Retry error"); + HyperlightExit::Retry() + } else { + HyperlightExit::Cancelled() + } } #[cfg(not(gdb))] - HyperlightExit::Cancelled() + { + if !cancel_was_requested_manually { + // This was an internal cancellation + // The virtualization stack can use this function to return the control + // of a virtual processor back to the virtualization stack in case it + // needs to change the state of a VM or to inject an event into the processor + debug!("Internal cancellation detected, returning Retry error"); + HyperlightExit::Retry() + } else { + HyperlightExit::Cancelled() + } + } } #[cfg(gdb)] WHV_RUN_VP_EXIT_REASON(4098i32) => { From fdd3eb12cea355720f119b6ee0be12b3aabf9d08 Mon Sep 17 00:00:00 2001 From: James Sturtevant Date: Fri, 17 Oct 2025 17:07:40 -0700 Subject: [PATCH 04/16] Handle host function call cancelations Signed-off-by: James Sturtevant --- .../src/hypervisor/hyperv_windows.rs | 47 ++++++++++++++----- src/hyperlight_host/src/hypervisor/mod.rs | 26 ++++------ .../src/sandbox/initialized_multi_use.rs | 6 +-- src/tests/rust_guests/simpleguest/src/main.rs | 4 +- 4 files changed, 50 insertions(+), 33 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs index 8c59ff1df..527d31c48 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs @@ -332,6 +332,7 @@ impl HypervWindowsDriver { cancel_requested: AtomicBool::new(false), #[cfg(gdb)] debug_interrupt: AtomicBool::new(false), + call_active: AtomicBool::new(false), partition_handle, dropped: AtomicBool::new(false), }); @@ -581,8 +582,10 @@ impl Hypervisor for HypervWindowsDriver { } self.processor.run()? }; - let cancel_was_requested_manually = self.interrupt_handle - .cancel_requested.load(Ordering::Relaxed); + let cancel_was_requested_manually = self + .interrupt_handle + .cancel_requested + .load(Ordering::Relaxed); self.interrupt_handle .cancel_requested .store(false, Ordering::Relaxed); @@ -668,9 +671,9 @@ impl Hypervisor for HypervWindowsDriver { if !cancel_was_requested_manually { // This was an internal cancellation // The virtualization stack can use this function to return the control - // of a virtual processor back to the virtualization stack in case it + // of a virtual processor back to the virtualization stack in case it // needs to change the state of a VM or to inject an event into the processor - debug!("Internal cancellation detected, returning Retry error"); + println!("Internal cancellation detected, returning Retry error"); HyperlightExit::Retry() } else { HyperlightExit::Cancelled() @@ -682,9 +685,9 @@ impl Hypervisor for HypervWindowsDriver { if !cancel_was_requested_manually { // This was an internal cancellation // The virtualization stack can use this function to return the control - // of a virtual processor back to the virtualization stack in case it + // of a virtual processor back to the virtualization stack in case it // needs to change the state of a VM or to inject an event into the processor - debug!("Internal cancellation detected, returning Retry error"); + println!("Internal cancellation detected, returning Retry error"); HyperlightExit::Retry() } else { HyperlightExit::Cancelled() @@ -979,21 +982,35 @@ pub struct WindowsInterruptHandle { // This is used to signal the GDB thread to stop the vCPU #[cfg(gdb)] debug_interrupt: AtomicBool, + /// Flag indicating whether a guest function call is currently in progress. + /// + /// **true**: A guest function call is active (between call start and completion) + /// **false**: No guest function call is active + /// + /// # Purpose + /// + /// This flag prevents kill() from having any effect when called outside of a + /// guest function call. This solves the "kill-in-advance" problem where kill() + /// could be called before a guest function starts and would incorrectly cancel it. + call_active: AtomicBool, partition_handle: WHV_PARTITION_HANDLE, dropped: AtomicBool, } impl InterruptHandle for WindowsInterruptHandle { fn kill(&self) -> bool { + // Check if a call is actually active first + if !self.call_active.load(Ordering::Acquire) { + return false; + } + // don't send the signal if the the vm isn't running // In the case this is called before the vm is running the cancel_requested would be set // and stay set while the vm is running. - let running = self.running.load(Ordering::Relaxed); - if !running { - return false; - } + self.cancel_requested.store(true, Ordering::Relaxed); - unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() } + self.running.load(Ordering::Relaxed) + && unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() } } #[cfg(gdb)] fn kill_from_debugger(&self) -> bool { @@ -1005,4 +1022,12 @@ impl InterruptHandle for WindowsInterruptHandle { fn dropped(&self) -> bool { self.dropped.load(Ordering::Relaxed) } + + fn set_call_active(&self) { + self.call_active.store(true, Ordering::Release); + } + + fn clear_call_active(&self) { + self.call_active.store(false, Ordering::Release); + } } diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs index bbcbf4695..73d2ab35e 100644 --- a/src/hyperlight_host/src/hypervisor/mod.rs +++ b/src/hyperlight_host/src/hypervisor/mod.rs @@ -26,7 +26,7 @@ use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; use crate::metrics::METRIC_GUEST_CANCELLATION; #[cfg(feature = "trace_guest")] use crate::sandbox::TraceInfo; -use crate::{HyperlightError, Result, log_then_return}; +use crate::{HyperlightError, Result, debug, log_then_return}; /// HyperV-on-linux functionality #[cfg(mshv)] @@ -413,7 +413,10 @@ impl VirtualCPU { log_then_return!("Unexpected VM Exit {:?}", reason); } - Ok(HyperlightExit::Retry()) => continue, + Ok(HyperlightExit::Retry()) => { + debug!("retring vm run"); + continue; + } Err(e) => { #[cfg(crashdump)] crashdump::generate_crashdump(hv)?; @@ -436,7 +439,7 @@ pub trait InterruptHandle: Debug + Send + Sync { /// Interrupt the corresponding sandbox from running. /// /// This method attempts to cancel a currently executing guest function call by sending - /// a signal to the VCPU thread. It uses generation tracking and call_active flag to + /// a signal to the VCPU thread. It uses generation tracking (linux only) and call_active flag to /// ensure the interruption is safe and precise. /// /// # Behavior @@ -454,7 +457,7 @@ pub trait InterruptHandle: Debug + Send + Sync { /// host function returns and attempts to re-enter the guest, the cancellation will /// be detected and the call will abort. Returns `true`. /// - /// # Generation Tracking + /// # Generation Tracking (linux only) /// /// The method stamps the current generation number along with the cancellation request. /// This ensures that: @@ -468,7 +471,6 @@ pub trait InterruptHandle: Debug + Send + Sync { /// retrying until either: /// - The signal is successfully delivered (VCPU transitions from running to not running) /// - The VCPU stops running for another reason (e.g., call completes normally) - /// - No call is active (call_active=false) /// /// # Returns /// @@ -514,32 +516,22 @@ pub trait InterruptHandle: Debug + Send + Sync { #[cfg(any(kvm, mshv))] fn increment_call_generation(&self) -> u64; - /// Mark that a guest function call is starting (Linux only). + /// Mark that a guest function call is starting. /// /// Sets the call_active flag to true, indicating that a guest function call /// is now in progress. This allows kill() to stamp cancel_requested. /// /// Must be called immediately after increment_call_generation() and before /// any VCPU execution begins. - /// - /// # Note - /// - /// This is only called on Linux (KVM/MSHV). Windows uses a different interrupt mechanism. - #[cfg(any(kvm, mshv))] fn set_call_active(&self); - /// Mark that a guest function call has completed (Linux only). + /// Mark that a guest function call has completed. /// /// Clears the call_active flag, indicating that no guest function call is /// in progress. After this, kill() will have no effect and will return false. /// /// Must be called at the end of call_guest_function_by_name_no_reset(), /// after the guest call has fully completed (whether successfully or with error). - /// - /// # Note - /// - /// This is only called on Linux (KVM/MSHV). Windows uses a different interrupt mechanism. - #[cfg(any(kvm, mshv))] fn clear_call_active(&self); } diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs index 6dcb1508b..129d4a938 100644 --- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs +++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs @@ -394,8 +394,7 @@ impl MultiUseSandbox { #[cfg(any(kvm, mshv))] let _generation = self.vm.interrupt_handle().increment_call_generation(); - // Mark that a guest function call is now active (Linux only) - #[cfg(any(kvm, mshv))] + // Mark that a guest function call is now active self.vm.interrupt_handle().set_call_active(); let res = (|| { @@ -453,8 +452,7 @@ impl MultiUseSandbox { self.mem_mgr.clear_io_buffers(); } - // Mark that the guest function call has completed (Linux only) - #[cfg(any(kvm, mshv))] + // Mark that the guest function call has completed self.vm.interrupt_handle().clear_call_active(); res diff --git a/src/tests/rust_guests/simpleguest/src/main.rs b/src/tests/rust_guests/simpleguest/src/main.rs index 05d75a512..30d1b6b49 100644 --- a/src/tests/rust_guests/simpleguest/src/main.rs +++ b/src/tests/rust_guests/simpleguest/src/main.rs @@ -611,7 +611,9 @@ fn spin_for_ms(fc: &FunctionCall) -> Result> { core::hint::black_box(counter); } - Ok(get_flatbuffer_result(counter)) + // Calculate the actual number of milliseconds spun for, based on the counter and iterations per ms + let ms_spun = (counter / iterations_per_ms as u64) as u64; + Ok(get_flatbuffer_result(ms_spun)) } #[hyperlight_guest_tracing::trace_function] From dfef9b9760451311ff01658eed3a32c6e82413af Mon Sep 17 00:00:00 2001 From: James Sturtevant Date: Fri, 17 Oct 2025 17:10:58 -0700 Subject: [PATCH 05/16] Fix type and spelling Signed-off-by: James Sturtevant --- src/hyperlight_host/src/hypervisor/mod.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs index 73d2ab35e..c9d6b996f 100644 --- a/src/hyperlight_host/src/hypervisor/mod.rs +++ b/src/hyperlight_host/src/hypervisor/mod.rs @@ -114,7 +114,9 @@ pub enum HyperlightExit { Cancelled(), /// The vCPU has exited for a reason that is not handled by Hyperlight Unknown(String), - /// The operation should be retried, for example this can happen on Linux where a call to run the CPU can return EAGAIN + /// The operation should be retried + /// On Linux this can happen where a call to run the CPU can return EAGAIN + /// On Windows the platform could cause a cancelation of the VM run Retry(), } @@ -414,7 +416,7 @@ impl VirtualCPU { log_then_return!("Unexpected VM Exit {:?}", reason); } Ok(HyperlightExit::Retry()) => { - debug!("retring vm run"); + debug!("retrying vm run"); continue; } Err(e) => { From 06cc02349f4d5d3fe47f594d8ac8ffe4fd9a5a22 Mon Sep 17 00:00:00 2001 From: James Sturtevant Date: Fri, 17 Oct 2025 17:19:05 -0700 Subject: [PATCH 06/16] Fix println that was for debugging Signed-off-by: James Sturtevant --- src/hyperlight_host/src/hypervisor/hyperv_windows.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs index 527d31c48..f92852fb5 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs @@ -673,7 +673,7 @@ impl Hypervisor for HypervWindowsDriver { // The virtualization stack can use this function to return the control // of a virtual processor back to the virtualization stack in case it // needs to change the state of a VM or to inject an event into the processor - println!("Internal cancellation detected, returning Retry error"); + debug!("Internal cancellation detected, returning Retry error"); HyperlightExit::Retry() } else { HyperlightExit::Cancelled() @@ -687,7 +687,7 @@ impl Hypervisor for HypervWindowsDriver { // The virtualization stack can use this function to return the control // of a virtual processor back to the virtualization stack in case it // needs to change the state of a VM or to inject an event into the processor - println!("Internal cancellation detected, returning Retry error"); + debug!("Internal cancellation detected, returning Retry error"); HyperlightExit::Retry() } else { HyperlightExit::Cancelled() From f4a149acb85cea97ea0371c35d15ee7aa78e2c85 Mon Sep 17 00:00:00 2001 From: James Sturtevant Date: Fri, 17 Oct 2025 17:39:55 -0700 Subject: [PATCH 07/16] fix clippy Signed-off-by: James Sturtevant --- .../src/hypervisor/hyperv_windows.rs | 18 ++++++++---------- src/tests/rust_guests/simpleguest/src/main.rs | 2 +- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs index f92852fb5..359f1a68f 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs @@ -667,17 +667,15 @@ impl Hypervisor for HypervWindowsDriver { // return a special exit reason so that the gdb thread can handle it // and resume execution HyperlightExit::Debug(VcpuStopReason::Interrupt) + } else if !cancel_was_requested_manually { + // This was an internal cancellation + // The virtualization stack can use this function to return the control + // of a virtual processor back to the virtualization stack in case it + // needs to change the state of a VM or to inject an event into the processor + debug!("Internal cancellation detected, returning Retry error"); + HyperlightExit::Retry() } else { - if !cancel_was_requested_manually { - // This was an internal cancellation - // The virtualization stack can use this function to return the control - // of a virtual processor back to the virtualization stack in case it - // needs to change the state of a VM or to inject an event into the processor - debug!("Internal cancellation detected, returning Retry error"); - HyperlightExit::Retry() - } else { - HyperlightExit::Cancelled() - } + HyperlightExit::Cancelled() } #[cfg(not(gdb))] diff --git a/src/tests/rust_guests/simpleguest/src/main.rs b/src/tests/rust_guests/simpleguest/src/main.rs index 30d1b6b49..758e88fea 100644 --- a/src/tests/rust_guests/simpleguest/src/main.rs +++ b/src/tests/rust_guests/simpleguest/src/main.rs @@ -612,7 +612,7 @@ fn spin_for_ms(fc: &FunctionCall) -> Result> { } // Calculate the actual number of milliseconds spun for, based on the counter and iterations per ms - let ms_spun = (counter / iterations_per_ms as u64) as u64; + let ms_spun = (counter / iterations_per_ms as u64); Ok(get_flatbuffer_result(ms_spun)) } From df448fce7e83d2720df63fe142729a9ac4c3d821 Mon Sep 17 00:00:00 2001 From: James Sturtevant Date: Fri, 17 Oct 2025 17:39:55 -0700 Subject: [PATCH 08/16] fix clippy Signed-off-by: James Sturtevant --- src/tests/rust_guests/simpleguest/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/rust_guests/simpleguest/src/main.rs b/src/tests/rust_guests/simpleguest/src/main.rs index 758e88fea..a761a1358 100644 --- a/src/tests/rust_guests/simpleguest/src/main.rs +++ b/src/tests/rust_guests/simpleguest/src/main.rs @@ -612,7 +612,7 @@ fn spin_for_ms(fc: &FunctionCall) -> Result> { } // Calculate the actual number of milliseconds spun for, based on the counter and iterations per ms - let ms_spun = (counter / iterations_per_ms as u64); + let ms_spun = counter / iterations_per_ms as u64; Ok(get_flatbuffer_result(ms_spun)) } From 4f14cf6ec16f9a1e6f50d8987d4d92fc079c00e4 Mon Sep 17 00:00:00 2001 From: James Sturtevant Date: Fri, 17 Oct 2025 19:56:34 -0700 Subject: [PATCH 09/16] temp commit: add logging in CI Signed-off-by: James Sturtevant --- src/hyperlight_host/src/hypervisor/hyperv_windows.rs | 2 +- src/hyperlight_host/src/hypervisor/mod.rs | 4 ++-- src/hyperlight_host/tests/integration_test.rs | 1 + 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs index 359f1a68f..6ca01046f 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs @@ -685,7 +685,7 @@ impl Hypervisor for HypervWindowsDriver { // The virtualization stack can use this function to return the control // of a virtual processor back to the virtualization stack in case it // needs to change the state of a VM or to inject an event into the processor - debug!("Internal cancellation detected, returning Retry error"); + println!("Internal cancellation detected, returning Retry error"); HyperlightExit::Retry() } else { HyperlightExit::Cancelled() diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs index c9d6b996f..5646dcd30 100644 --- a/src/hyperlight_host/src/hypervisor/mod.rs +++ b/src/hyperlight_host/src/hypervisor/mod.rs @@ -416,8 +416,8 @@ impl VirtualCPU { log_then_return!("Unexpected VM Exit {:?}", reason); } Ok(HyperlightExit::Retry()) => { - debug!("retrying vm run"); - continue; + println!("retrying vm run"); + continue } Err(e) => { #[cfg(crashdump)] diff --git a/src/hyperlight_host/tests/integration_test.rs b/src/hyperlight_host/tests/integration_test.rs index ed204f712..5f89156e1 100644 --- a/src/hyperlight_host/tests/integration_test.rs +++ b/src/hyperlight_host/tests/integration_test.rs @@ -1101,6 +1101,7 @@ fn test_cpu_time_interrupt() { // Mark that we sent a kill signal BEFORE calling kill // to avoid race conditions + println!("Thread {} iteration {}: CPU time exceeded, sending kill signal", thread_id, iteration); was_killed_clone.store(true, Ordering::Release); interrupt_handle.kill(); break; From 21a72e5c8472f27fa68d78cdda5dacee19a184c3 Mon Sep 17 00:00:00 2001 From: James Sturtevant Date: Fri, 17 Oct 2025 21:17:51 -0700 Subject: [PATCH 10/16] timing issue Signed-off-by: James Sturtevant --- src/hyperlight_host/src/hypervisor/hyperv_windows.rs | 9 +++------ src/hyperlight_host/src/hypervisor/mod.rs | 2 +- src/hyperlight_host/tests/integration_test.rs | 7 +++++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs index 6ca01046f..1c2dec58b 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs @@ -565,6 +565,7 @@ impl Hypervisor for HypervWindowsDriver { .load(Ordering::Relaxed) || debug_interrupt { + println!("already cancelled"); WHV_RUN_VP_EXIT_CONTEXT { ExitReason: WHV_RUN_VP_EXIT_REASON(8193i32), // WHvRunVpExitReasonCanceled VpContext: Default::default(), @@ -1002,13 +1003,9 @@ impl InterruptHandle for WindowsInterruptHandle { return false; } - // don't send the signal if the the vm isn't running - // In the case this is called before the vm is running the cancel_requested would be set - // and stay set while the vm is running. - + let running = self.running.load(Ordering::Relaxed); self.cancel_requested.store(true, Ordering::Relaxed); - self.running.load(Ordering::Relaxed) - && unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() } + running && unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() } } #[cfg(gdb)] fn kill_from_debugger(&self) -> bool { diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs index 5646dcd30..8e8fea6a9 100644 --- a/src/hyperlight_host/src/hypervisor/mod.rs +++ b/src/hyperlight_host/src/hypervisor/mod.rs @@ -417,7 +417,7 @@ impl VirtualCPU { } Ok(HyperlightExit::Retry()) => { println!("retrying vm run"); - continue + continue; } Err(e) => { #[cfg(crashdump)] diff --git a/src/hyperlight_host/tests/integration_test.rs b/src/hyperlight_host/tests/integration_test.rs index 5f89156e1..0f65f05bf 100644 --- a/src/hyperlight_host/tests/integration_test.rs +++ b/src/hyperlight_host/tests/integration_test.rs @@ -1025,7 +1025,7 @@ fn test_cpu_time_interrupt() { // main_thread_id is a HANDLE on Windows let thread_handle = main_thread_id as *mut c_void; - let cpu_limit_ns: i64 = 5_000_000; // 5ms CPU time limit (in nanoseconds) + let cpu_limit_ns: i64 = 1_000_000; // 5ms CPU time limit (in nanoseconds) let mut creation_time = MaybeUninit::::uninit(); @@ -1101,7 +1101,10 @@ fn test_cpu_time_interrupt() { // Mark that we sent a kill signal BEFORE calling kill // to avoid race conditions - println!("Thread {} iteration {}: CPU time exceeded, sending kill signal", thread_id, iteration); + println!( + "Thread {} iteration {}: CPU time exceeded, sending kill signal", + thread_id, iteration + ); was_killed_clone.store(true, Ordering::Release); interrupt_handle.kill(); break; From 17200670c8e267abc5bc585ad24f2ccbd0bb5ad9 Mon Sep 17 00:00:00 2001 From: Simon Davies Date: Mon, 20 Oct 2025 14:29:48 +0100 Subject: [PATCH 11/16] fix clippy Signed-off-by: Simon Davies --- src/hyperlight_host/src/hypervisor/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs index 8e8fea6a9..f86a2c9b7 100644 --- a/src/hyperlight_host/src/hypervisor/mod.rs +++ b/src/hyperlight_host/src/hypervisor/mod.rs @@ -26,7 +26,7 @@ use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; use crate::metrics::METRIC_GUEST_CANCELLATION; #[cfg(feature = "trace_guest")] use crate::sandbox::TraceInfo; -use crate::{HyperlightError, Result, debug, log_then_return}; +use crate::{HyperlightError, Result, log_then_return}; /// HyperV-on-linux functionality #[cfg(mshv)] From cd85e762b771953e9f2109a0c336c974a34c7771 Mon Sep 17 00:00:00 2001 From: Simon Davies Date: Mon, 20 Oct 2025 15:09:53 +0100 Subject: [PATCH 12/16] fixes Signed-off-by: Simon Davies --- src/hyperlight_host/src/hypervisor/mod.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs index f86a2c9b7..7d6e08bb7 100644 --- a/src/hyperlight_host/src/hypervisor/mod.rs +++ b/src/hyperlight_host/src/hypervisor/mod.rs @@ -734,6 +734,12 @@ impl LinuxInterruptHandle { let mut target_generation: Option = None; loop { + + if (!self.call_active.load(Ordering::Acquire)) { + // No active call, so no need to send signal + break; + } + let (running, generation) = self.get_running_and_generation(); // Stamp generation into cancel_requested if requested and this is the first iteration @@ -776,6 +782,13 @@ impl LinuxInterruptHandle { #[cfg(any(kvm, mshv))] impl InterruptHandle for LinuxInterruptHandle { fn kill(&self) -> bool { + + if !(self.call_active.load(Ordering::Acquire)) + { + // No active call, so no effect + return false; + } + // send_signal will stamp the generation into cancel_requested // right before sending each signal, ensuring they're always in sync self.send_signal(true) From f2ed3fe325dd43b0352d2d4e7724598b9acf50cc Mon Sep 17 00:00:00 2001 From: Simon Davies Date: Mon, 20 Oct 2025 18:37:11 +0100 Subject: [PATCH 13/16] Use generation approach on windows Signed-off-by: Simon Davies --- .../src/hypervisor/hyperv_windows.rs | 144 +++++++++++++++--- 1 file changed, 120 insertions(+), 24 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs index 1c2dec58b..8376101e2 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs @@ -17,7 +17,7 @@ limitations under the License. use std::fmt; use std::fmt::{Debug, Formatter}; use std::string::String; -use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; use std::sync::{Arc, Mutex}; use log::LevelFilter; @@ -328,8 +328,8 @@ impl HypervWindowsDriver { }; let interrupt_handle = Arc::new(WindowsInterruptHandle { - running: AtomicBool::new(false), - cancel_requested: AtomicBool::new(false), + running: AtomicU64::new(0), + cancel_requested: AtomicU64::new(0), #[cfg(gdb)] debug_interrupt: AtomicBool::new(false), call_active: AtomicBool::new(false), @@ -548,7 +548,8 @@ impl Hypervisor for HypervWindowsDriver { #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] fn run(&mut self) -> Result { - self.interrupt_handle.running.store(true, Ordering::Relaxed); + // Get current generation and set running bit + let generation = self.interrupt_handle.set_running_bit(); #[cfg(not(gdb))] let debug_interrupt = false; @@ -558,11 +559,10 @@ impl Hypervisor for HypervWindowsDriver { .debug_interrupt .load(Ordering::Relaxed); - // Don't run the vcpu if `cancel_requested` is true + // Check if cancellation was requested for THIS generation let exit_context = if self .interrupt_handle - .cancel_requested - .load(Ordering::Relaxed) + .is_cancel_requested_for_generation(generation) || debug_interrupt { println!("already cancelled"); @@ -583,16 +583,20 @@ impl Hypervisor for HypervWindowsDriver { } self.processor.run()? }; - let cancel_was_requested_manually = self - .interrupt_handle - .cancel_requested - .load(Ordering::Relaxed); - self.interrupt_handle - .cancel_requested - .store(false, Ordering::Relaxed); - self.interrupt_handle - .running - .store(false, Ordering::Relaxed); + + // Clear running bit + self.interrupt_handle.clear_running_bit(); + + let exit_reason = exit_context.ExitReason; + let is_canceled = exit_reason.0 == 8193; // WHvRunVpExitReasonCanceled + + // Check if this was a manual cancellation (vs internal Windows cancellation) + let cancel_was_requested_manually = self.interrupt_handle.is_cancel_requested_for_generation(generation); + + // Only clear cancel_requested if we're actually processing a cancellation for this generation + if is_canceled && cancel_was_requested_manually { + self.interrupt_handle.clear_cancel_requested(); + } #[cfg(gdb)] let debug_interrupt = self @@ -975,9 +979,23 @@ impl Drop for HypervWindowsDriver { #[derive(Debug)] pub struct WindowsInterruptHandle { - // `WHvCancelRunVirtualProcessor()` will return Ok even if the vcpu is not running, which is the reason we need this flag. - running: AtomicBool, - cancel_requested: AtomicBool, + /// Combined running flag (bit 63) and generation counter (bits 0-62). + /// + /// The generation increments with each guest function call to prevent + /// stale cancellations from affecting new calls (ABA problem). + /// + /// Layout: `[running:1 bit][generation:63 bits]` + running: AtomicU64, + + /// Combined cancel_requested flag (bit 63) and generation counter (bits 0-62). + /// + /// When kill() is called, this stores the current generation along with + /// the cancellation flag. The VCPU only honors the cancellation if the + /// generation matches its current generation. + /// + /// Layout: `[cancel_requested:1 bit][generation:63 bits]` + cancel_requested: AtomicU64, + // This is used to signal the GDB thread to stop the vCPU #[cfg(gdb)] debug_interrupt: AtomicBool, @@ -996,6 +1014,78 @@ pub struct WindowsInterruptHandle { dropped: AtomicBool, } +impl WindowsInterruptHandle { + const RUNNING_BIT: u64 = 1 << 63; + const MAX_GENERATION: u64 = Self::RUNNING_BIT - 1; + const CANCEL_REQUESTED_BIT: u64 = 1 << 63; + + /// Set cancel_requested to true with the given generation + fn set_cancel_requested(&self, generation: u64) { + let value = Self::CANCEL_REQUESTED_BIT | (generation & Self::MAX_GENERATION); + self.cancel_requested.store(value, Ordering::Release); + } + + /// Clear cancel_requested (reset to no cancellation) + pub(crate) fn clear_cancel_requested(&self) { + self.cancel_requested.store(0, Ordering::Release); + } + + /// Check if cancel_requested is set for the given generation + fn is_cancel_requested_for_generation(&self, generation: u64) -> bool { + let raw = self.cancel_requested.load(Ordering::Acquire); + let is_set = raw & Self::CANCEL_REQUESTED_BIT != 0; + let stored_generation = raw & Self::MAX_GENERATION; + is_set && stored_generation == generation + } + + /// Increment the generation for a new guest function call + pub(crate) fn increment_generation(&self) -> u64 { + self.running + .fetch_update(Ordering::Release, Ordering::Acquire, |raw| { + let current_generation = raw & !Self::RUNNING_BIT; + let running_bit = raw & Self::RUNNING_BIT; + if current_generation == Self::MAX_GENERATION { + // Wrap around to 0 + return Some(running_bit); + } + Some((current_generation + 1) | running_bit) + }) + .map(|raw| { + let old_gen = raw & !Self::RUNNING_BIT; + if old_gen == Self::MAX_GENERATION { + 0 + } else { + old_gen + 1 + } + }) + .unwrap_or(0) + } + + /// Set running bit to true, return current generation + fn set_running_bit(&self) -> u64 { + self.running + .fetch_update(Ordering::Release, Ordering::Acquire, |raw| { + Some(raw | Self::RUNNING_BIT) + }) + .map(|raw| raw & !Self::RUNNING_BIT) + .unwrap_or(0) + } + + /// Clear running bit, return current generation + fn clear_running_bit(&self) -> u64 { + self.running + .fetch_and(!Self::RUNNING_BIT, Ordering::Relaxed) + & !Self::RUNNING_BIT + } + + fn get_running_and_generation(&self) -> (bool, u64) { + let raw = self.running.load(Ordering::Relaxed); + let running = raw & Self::RUNNING_BIT != 0; + let generation = raw & !Self::RUNNING_BIT; + (running, generation) + } +} + impl InterruptHandle for WindowsInterruptHandle { fn kill(&self) -> bool { // Check if a call is actually active first @@ -1003,15 +1093,20 @@ impl InterruptHandle for WindowsInterruptHandle { return false; } - let running = self.running.load(Ordering::Relaxed); - self.cancel_requested.store(true, Ordering::Relaxed); + // Get the current running state and generation + let (running, generation) = self.get_running_and_generation(); + + // Set cancel_requested with the current generation + self.set_cancel_requested(generation); + + // Only call WHvCancelRunVirtualProcessor if VCPU is actually running in guest mode running && unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() } } #[cfg(gdb)] fn kill_from_debugger(&self) -> bool { self.debug_interrupt.store(true, Ordering::Relaxed); - self.running.load(Ordering::Relaxed) - && unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() } + let (running, _) = self.get_running_and_generation(); + running && unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() } } fn dropped(&self) -> bool { @@ -1019,6 +1114,7 @@ impl InterruptHandle for WindowsInterruptHandle { } fn set_call_active(&self) { + self.increment_generation(); self.call_active.store(true, Ordering::Release); } From 570d621e984d3712c586a0e042437599ea569afb Mon Sep 17 00:00:00 2001 From: Simon Davies Date: Mon, 20 Oct 2025 18:46:08 +0100 Subject: [PATCH 14/16] fix fmt Signed-off-by: Simon Davies --- .../src/hypervisor/hyperv_windows.rs | 20 ++++++++++--------- src/hyperlight_host/src/hypervisor/mod.rs | 5 +---- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs index 8376101e2..92bee47d9 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs @@ -591,7 +591,9 @@ impl Hypervisor for HypervWindowsDriver { let is_canceled = exit_reason.0 == 8193; // WHvRunVpExitReasonCanceled // Check if this was a manual cancellation (vs internal Windows cancellation) - let cancel_was_requested_manually = self.interrupt_handle.is_cancel_requested_for_generation(generation); + let cancel_was_requested_manually = self + .interrupt_handle + .is_cancel_requested_for_generation(generation); // Only clear cancel_requested if we're actually processing a cancellation for this generation if is_canceled && cancel_was_requested_manually { @@ -980,22 +982,22 @@ impl Drop for HypervWindowsDriver { #[derive(Debug)] pub struct WindowsInterruptHandle { /// Combined running flag (bit 63) and generation counter (bits 0-62). - /// + /// /// The generation increments with each guest function call to prevent /// stale cancellations from affecting new calls (ABA problem). - /// + /// /// Layout: `[running:1 bit][generation:63 bits]` running: AtomicU64, - + /// Combined cancel_requested flag (bit 63) and generation counter (bits 0-62). - /// + /// /// When kill() is called, this stores the current generation along with /// the cancellation flag. The VCPU only honors the cancellation if the /// generation matches its current generation. - /// + /// /// Layout: `[cancel_requested:1 bit][generation:63 bits]` cancel_requested: AtomicU64, - + // This is used to signal the GDB thread to stop the vCPU #[cfg(gdb)] debug_interrupt: AtomicBool, @@ -1095,10 +1097,10 @@ impl InterruptHandle for WindowsInterruptHandle { // Get the current running state and generation let (running, generation) = self.get_running_and_generation(); - + // Set cancel_requested with the current generation self.set_cancel_requested(generation); - + // Only call WHvCancelRunVirtualProcessor if VCPU is actually running in guest mode running && unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() } } diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs index 7d6e08bb7..b0adf59d6 100644 --- a/src/hyperlight_host/src/hypervisor/mod.rs +++ b/src/hyperlight_host/src/hypervisor/mod.rs @@ -734,7 +734,6 @@ impl LinuxInterruptHandle { let mut target_generation: Option = None; loop { - if (!self.call_active.load(Ordering::Acquire)) { // No active call, so no need to send signal break; @@ -782,9 +781,7 @@ impl LinuxInterruptHandle { #[cfg(any(kvm, mshv))] impl InterruptHandle for LinuxInterruptHandle { fn kill(&self) -> bool { - - if !(self.call_active.load(Ordering::Acquire)) - { + if !(self.call_active.load(Ordering::Acquire)) { // No active call, so no effect return false; } From e31a9f75835fcb7e0e0549aa63531308269f7db5 Mon Sep 17 00:00:00 2001 From: Simon Davies Date: Mon, 20 Oct 2025 19:46:28 +0100 Subject: [PATCH 15/16] fix clippy Signed-off-by: Simon Davies --- src/hyperlight_host/src/hypervisor/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs index b0adf59d6..1868c7042 100644 --- a/src/hyperlight_host/src/hypervisor/mod.rs +++ b/src/hyperlight_host/src/hypervisor/mod.rs @@ -734,7 +734,7 @@ impl LinuxInterruptHandle { let mut target_generation: Option = None; loop { - if (!self.call_active.load(Ordering::Acquire)) { + if !self.call_active.load(Ordering::Acquire) { // No active call, so no need to send signal break; } From 4e211f23d9cdc03f95e381031c08eea6ac5a7374 Mon Sep 17 00:00:00 2001 From: Simon Davies Date: Mon, 20 Oct 2025 20:44:07 +0100 Subject: [PATCH 16/16] fxi tests Signed-off-by: Simon Davies --- .../src/hypervisor/hyperv_windows.rs | 1 - src/hyperlight_host/tests/integration_test.rs | 70 ++++++++++++++----- 2 files changed, 54 insertions(+), 17 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs index 92bee47d9..b46d6e6e4 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs @@ -565,7 +565,6 @@ impl Hypervisor for HypervWindowsDriver { .is_cancel_requested_for_generation(generation) || debug_interrupt { - println!("already cancelled"); WHV_RUN_VP_EXIT_CONTEXT { ExitReason: WHV_RUN_VP_EXIT_REASON(8193i32), // WHvRunVpExitReasonCanceled VpContext: Default::default(), diff --git a/src/hyperlight_host/tests/integration_test.rs b/src/hyperlight_host/tests/integration_test.rs index 0f65f05bf..6ae6fb8fb 100644 --- a/src/hyperlight_host/tests/integration_test.rs +++ b/src/hyperlight_host/tests/integration_test.rs @@ -166,29 +166,47 @@ fn interrupt_same_thread() { const NUM_ITERS: usize = 500; - // kill vm after 1 second + // Spawn killer thread that attempts to interrupt sbox2 let thread = thread::spawn(move || { for _ in 0..NUM_ITERS { - barrier2.wait(); interrupt_handle.kill(); + // Sync at END of iteration to ensure main thread completes its work + barrier2.wait(); } }); - for _ in 0..NUM_ITERS { - barrier.wait(); + for i in 0..NUM_ITERS { + // Call sbox1 - should never be interrupted sbox1 .call::("Echo", "hello".to_string()) - .expect("Only sandbox 2 is allowed to be interrupted"); + .unwrap_or_else(|e| { + panic!( + "Iteration {}: sbox1 should not be interrupted, got: {:?}", + i, e + ) + }); + + // Call sbox2 - may be interrupted match sbox2.call::("Echo", "hello".to_string()) { Ok(_) | Err(HyperlightError::ExecutionCanceledByHost()) => { // Only allow successful calls or interrupted. - // The call can be successful in case the call is finished before kill() is called. + // The call can be successful if it completes before kill() is called. } - _ => panic!("Unexpected return"), + Err(e) => panic!("Iteration {}: sbox2 unexpected error: {:?}", i, e), }; + + // Call sbox3 - should never be interrupted sbox3 .call::("Echo", "hello".to_string()) - .expect("Only sandbox 2 is allowed to be interrupted"); + .unwrap_or_else(|e| { + panic!( + "Iteration {}: sbox3 should not be interrupted, got: {:?}", + i, e + ) + }); + + // Sync at END of iteration to ensure killer thread waits for work to complete + barrier.wait(); } thread.join().expect("Thread should finish"); } @@ -210,31 +228,51 @@ fn interrupt_same_thread_no_barrier() { const NUM_ITERS: usize = 500; - // kill vm after 1 second + // Spawn killer thread that continuously attempts to interrupt sbox2 let thread = thread::spawn(move || { barrier2.wait(); - while !workload_done2.load(Ordering::Relaxed) { + // Use Acquire ordering to ensure we see the updated workload_done flag + while !workload_done2.load(Ordering::Acquire) { interrupt_handle.kill(); + // Small yield to prevent tight spinning + thread::yield_now(); } }); barrier.wait(); - for _ in 0..NUM_ITERS { + for i in 0..NUM_ITERS { + // Call sbox1 - should never be interrupted sbox1 .call::("Echo", "hello".to_string()) - .expect("Only sandbox 2 is allowed to be interrupted"); + .unwrap_or_else(|e| { + panic!( + "Iteration {}: sbox1 should not be interrupted, got: {:?}", + i, e + ) + }); + + // Call sbox2 - may be interrupted match sbox2.call::("Echo", "hello".to_string()) { Ok(_) | Err(HyperlightError::ExecutionCanceledByHost()) => { // Only allow successful calls or interrupted. - // The call can be successful in case the call is finished before kill() is called. + // The call can be successful if it completes before kill() is called. } - _ => panic!("Unexpected return"), + Err(e) => panic!("Iteration {}: sbox2 unexpected error: {:?}", i, e), }; + + // Call sbox3 - should never be interrupted sbox3 .call::("Echo", "hello".to_string()) - .expect("Only sandbox 2 is allowed to be interrupted"); + .unwrap_or_else(|e| { + panic!( + "Iteration {}: sbox3 should not be interrupted, got: {:?}", + i, e + ) + }); } - workload_done.store(true, Ordering::Relaxed); + + // Use Release ordering to ensure killer thread sees the update + workload_done.store(true, Ordering::Release); thread.join().expect("Thread should finish"); }