diff --git a/src/hyperlight_host/Cargo.toml b/src/hyperlight_host/Cargo.toml
index 0b003b20d..acde6b9d6 100644
--- a/src/hyperlight_host/Cargo.toml
+++ b/src/hyperlight_host/Cargo.toml
@@ -65,7 +65,7 @@ windows = { version = "0.62", features = [
     "Win32_System_JobObjects",
     "Win32_System_SystemServices",
 ] }
-windows-sys = { version = "0.61", features = ["Win32"] }
+windows-sys = { version = "0.61", features = ["Win32", "Win32_System", "Win32_System_Threading"] }
 windows-result = "0.4"
 rust-embed = { version = "8.7.2", features = ["debug-embed", "include-exclude", "interpolate-folder-path"] }
 sha256 = "1.6.0"
diff --git a/src/hyperlight_host/src/hypervisor/hyperv_linux.rs b/src/hyperlight_host/src/hypervisor/hyperv_linux.rs
index 80ee0b16b..42835c16f 100644
--- a/src/hyperlight_host/src/hypervisor/hyperv_linux.rs
+++ b/src/hyperlight_host/src/hypervisor/hyperv_linux.rs
@@ -403,7 +403,8 @@ impl HypervLinuxDriver {
 
         let interrupt_handle = Arc::new(LinuxInterruptHandle {
             running: AtomicU64::new(0),
-            cancel_requested: AtomicBool::new(false),
+            cancel_requested: AtomicU64::new(0),
+            call_active: AtomicBool::new(false),
             #[cfg(gdb)]
             debug_interrupt: AtomicBool::new(false),
             #[cfg(all(
@@ -669,16 +670,13 @@ impl Hypervisor for HypervLinuxDriver {
         self.interrupt_handle
             .tid
             .store(unsafe { libc::pthread_self() as u64 }, Ordering::Relaxed);
-        // Note: if a `InterruptHandle::kill()` called while this thread is **here**
-        // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call
-        self.interrupt_handle
-            .set_running_and_increment_generation()
-            .map_err(|e| {
-                new_error!(
-                    "Error setting running state and incrementing generation: {}",
-                    e
-                )
-            })?;
+        // Note: if `InterruptHandle::kill()` is called while this thread is **here**
+        // (after set_running_bit but before checking cancel_requested):
+        // - kill() will stamp cancel_requested with the current generation
+        // - We will check cancel_requested below and skip the VcpuFd::run() call
+        // - This is the desired behavior - the kill takes effect immediately
+        let generation = self.interrupt_handle.set_running_bit();
+
         #[cfg(not(gdb))]
         let debug_interrupt = false;
         #[cfg(gdb)]
@@ -687,14 +685,16 @@ impl Hypervisor for HypervLinuxDriver {
             .debug_interrupt
             .load(Ordering::Relaxed);
 
-        // Don't run the vcpu if `cancel_requested` is true
+        // Don't run the vcpu if `cancel_requested` is set for our generation
         //
-        // Note: if a `InterruptHandle::kill()` called while this thread is **here**
-        // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call
+        // Note: if `InterruptHandle::kill()` is called while this thread is **here**
+        // (after checking cancel_requested but before vcpu.run()):
+        // - kill() will stamp cancel_requested with the current generation
+        // - We will proceed with vcpu.run(), but signals will be sent to interrupt it
+        // - The vcpu will be interrupted and return EINTR (handled below)
         let exit_reason = if self
             .interrupt_handle
-            .cancel_requested
-            .load(Ordering::Relaxed)
+            .is_cancel_requested_for_generation(generation)
             || debug_interrupt
         {
             Err(mshv_ioctls::MshvError::from(libc::EINTR))
@@ -707,11 +707,12 @@ impl Hypervisor for HypervLinuxDriver {
                     Some(hyperlight_guest_tracing::invariant_tsc::read_tsc());
                 self.trace_info.guest_start_epoch = Some(std::time::Instant::now());
             }
-            // Note: if a `InterruptHandle::kill()` called while this thread is **here**
-            // Then the vcpu will run, but we will keep sending signals to this thread
-            // to interrupt it until `running` is set to false. The `vcpu_fd::run()` call will
-            // return either normally with an exit reason, or from being "kicked" by out signal handler, with an EINTR error,
-            // both of which are fine.
+            // Note: if `InterruptHandle::kill()` is called while this thread is **here**
+            // (during vcpu.run() execution):
+            // - kill() stamps cancel_requested with the current generation
+            // - kill() sends signals (SIGRTMIN+offset) to this thread repeatedly
+            // - The signal handler is a no-op, but it causes vcpu.run() to return EINTR
+            // - We check cancel_requested below and return Cancelled if generation matches
             #[cfg(mshv2)]
             {
                 let hv_message: hv_message = Default::default();
@@ -720,27 +721,32 @@ impl Hypervisor for HypervLinuxDriver {
             #[cfg(mshv3)]
             self.vcpu_fd.run()
         };
-        // Note: if a `InterruptHandle::kill()` called while this thread is **here**
-        // Then signals will be sent to this thread until `running` is set to false.
-        // This is fine since the signal handler is a no-op.
+        // Note: if `InterruptHandle::kill()` is called while this thread is **here**
+        // (after vcpu.run() returns but before clear_running_bit):
+        // - kill() continues sending signals to this thread (running bit is still set)
+        // - The signals are harmless (no-op handler), we just need to check cancel_requested
+        // - We load cancel_requested below to determine if this run was cancelled
         let cancel_requested = self
             .interrupt_handle
-            .cancel_requested
-            .load(Ordering::Relaxed);
+            .is_cancel_requested_for_generation(generation);
         #[cfg(gdb)]
         let debug_interrupt = self
             .interrupt_handle
             .debug_interrupt
             .load(Ordering::Relaxed);
-        // Note: if a `InterruptHandle::kill()` called while this thread is **here**
-        // Then `cancel_requested` will be set to true again, which will cancel the **next vcpu run**.
-        // Additionally signals will be sent to this thread until `running` is set to false.
-        // This is fine since the signal handler is a no-op.
+        // Note: if `InterruptHandle::kill()` is called while this thread is **here**
+        // (after loading cancel_requested but before clear_running_bit):
+        // - kill() stamps cancel_requested with the CURRENT generation (not the one we just loaded)
+        // - kill() continues sending signals until running bit is cleared
+        // - The newly stamped cancel_requested will affect the NEXT vcpu.run() call
+        // - Signals sent now are harmless (no-op handler)
         self.interrupt_handle.clear_running_bit();
-        // At this point, `running` is false so no more signals will be sent to this thread,
-        // but we may still receive async signals that were sent before this point.
-        // To prevent those signals from interrupting subsequent calls to `run()`,
-        // we make sure to check `cancel_requested` before cancelling (see `libc::EINTR` match-arm below).
+        // At this point, running bit is clear so kill() will stop sending signals.
+        // However, we may still receive delayed signals that were sent before clear_running_bit.
+        // These stale signals are harmless because:
+        // - The signal handler is a no-op
+        // - We check generation matching in cancel_requested before treating EINTR as cancellation
+        // - If generation doesn't match, we return Retry instead of Cancelled
         let result = match exit_reason {
             Ok(m) => match m.header.message_type {
                 HALT_MESSAGE => {
@@ -820,14 +826,16 @@ impl Hypervisor for HypervLinuxDriver {
                 }
             },
             Err(e) => match e.errno() {
-                // we send a signal to the thread to cancel execution this results in EINTR being returned by KVM so we return Cancelled
+                // We send a signal (SIGRTMIN+offset) to interrupt the vcpu, which causes EINTR
                 libc::EINTR => {
-                    // If cancellation was not requested for this specific vm, the vcpu was interrupted because of debug interrupt or
-                    // a stale signal that meant to be delivered to a previous/other vcpu on this same thread, so let's ignore it
+                    // Check if cancellation was requested for THIS specific generation.
+                    // If not, the EINTR came from:
+                    // - A debug interrupt (if GDB is enabled)
+                    // - A stale signal from a previous guest call (generation mismatch)
+                    // - A signal meant for a different sandbox on the same thread
+                    // In these cases, we return Retry to continue execution.
                     if cancel_requested {
-                        self.interrupt_handle
-                            .cancel_requested
-                            .store(false, Ordering::Relaxed);
+                        self.interrupt_handle.clear_cancel_requested();
                         HyperlightExit::Cancelled()
                     } else {
                         #[cfg(gdb)]
diff --git a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs
index 490e1f72e..8376101e2 100644
--- a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs
+++ b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs
@@ -17,7 +17,7 @@ limitations under the License.
 use std::fmt;
 use std::fmt::{Debug, Formatter};
 use std::string::String;
-use std::sync::atomic::{AtomicBool, Ordering};
+use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
 use std::sync::{Arc, Mutex};
 
 use log::LevelFilter;
@@ -328,10 +328,11 @@ impl HypervWindowsDriver {
         };
 
         let interrupt_handle = Arc::new(WindowsInterruptHandle {
-            running: AtomicBool::new(false),
-            cancel_requested: AtomicBool::new(false),
+            running: AtomicU64::new(0),
+            cancel_requested: AtomicU64::new(0),
             #[cfg(gdb)]
             debug_interrupt: AtomicBool::new(false),
+            call_active: AtomicBool::new(false),
             partition_handle,
             dropped: AtomicBool::new(false),
         });
@@ -547,7 +548,8 @@ impl Hypervisor for HypervWindowsDriver {
 
     #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
     fn run(&mut self) -> Result<super::HyperlightExit> {
-        self.interrupt_handle.running.store(true, Ordering::Relaxed);
+        // Get current generation and set running bit
+        let generation = self.interrupt_handle.set_running_bit();
 
         #[cfg(not(gdb))]
         let debug_interrupt = false;
@@ -557,13 +559,13 @@ impl Hypervisor for HypervWindowsDriver {
             .debug_interrupt
             .load(Ordering::Relaxed);
 
-        // Don't run the vcpu if `cancel_requested` is true
+        // Check if cancellation was requested for THIS generation
         let exit_context = if self
             .interrupt_handle
-            .cancel_requested
-            .load(Ordering::Relaxed)
+            .is_cancel_requested_for_generation(generation)
             || debug_interrupt
         {
+            println!("already cancelled");
             WHV_RUN_VP_EXIT_CONTEXT {
                 ExitReason: WHV_RUN_VP_EXIT_REASON(8193i32), // WHvRunVpExitReasonCanceled
                 VpContext: Default::default(),
@@ -581,12 +583,20 @@ impl Hypervisor for HypervWindowsDriver {
             }
             self.processor.run()?
         };
-        self.interrupt_handle
-            .cancel_requested
-            .store(false, Ordering::Relaxed);
-        self.interrupt_handle
-            .running
-            .store(false, Ordering::Relaxed);
+
+        // Clear running bit
+        self.interrupt_handle.clear_running_bit();
+
+        let exit_reason = exit_context.ExitReason;
+        let is_canceled = exit_reason.0 == 8193; // WHvRunVpExitReasonCanceled
+
+        // Check if this was a manual cancellation (vs internal Windows cancellation)
+        let cancel_was_requested_manually = self.interrupt_handle.is_cancel_requested_for_generation(generation);
+
+        // Only clear cancel_requested if we're actually processing a cancellation for this generation
+        if is_canceled && cancel_was_requested_manually {
+            self.interrupt_handle.clear_cancel_requested();
+        }
 
         #[cfg(gdb)]
         let debug_interrupt = self
@@ -662,12 +672,30 @@ impl Hypervisor for HypervWindowsDriver {
                     // return a special exit reason so that the gdb thread can handle it
                     // and resume execution
                     HyperlightExit::Debug(VcpuStopReason::Interrupt)
+                } else if !cancel_was_requested_manually {
+                    // This was an internal cancellation
+                    // The virtualization stack can use this function to return the control
+                    // of a virtual processor back to the virtualization stack in case it
+                    // needs to change the state of a VM or to inject an event into the processor
+                    debug!("Internal cancellation detected, returning Retry error");
+                    HyperlightExit::Retry()
                 } else {
                     HyperlightExit::Cancelled()
                 }
 
                 #[cfg(not(gdb))]
-                HyperlightExit::Cancelled()
+                {
+                    if !cancel_was_requested_manually {
+                        // This was an internal cancellation
+                        // The virtualization stack can use this function to return the control
+                        // of a virtual processor back to the virtualization stack in case it
+                        // needs to change the state of a VM or to inject an event into the processor
+                        println!("Internal cancellation detected, returning Retry error");
+                        HyperlightExit::Retry()
+                    } else {
+                        HyperlightExit::Cancelled()
+                    }
+                }
             }
             #[cfg(gdb)]
             WHV_RUN_VP_EXIT_REASON(4098i32) => {
@@ -951,30 +979,146 @@ impl Drop for HypervWindowsDriver {
 
 #[derive(Debug)]
 pub struct WindowsInterruptHandle {
-    // `WHvCancelRunVirtualProcessor()` will return Ok even if the vcpu is not running, which is the reason we need this flag.
-    running: AtomicBool,
-    cancel_requested: AtomicBool,
+    /// Combined running flag (bit 63) and generation counter (bits 0-62).
+    /// 
+    /// The generation increments with each guest function call to prevent
+    /// stale cancellations from affecting new calls (ABA problem).
+    /// 
+    /// Layout: `[running:1 bit][generation:63 bits]`
+    running: AtomicU64,
+    
+    /// Combined cancel_requested flag (bit 63) and generation counter (bits 0-62).
+    /// 
+    /// When kill() is called, this stores the current generation along with
+    /// the cancellation flag. The VCPU only honors the cancellation if the
+    /// generation matches its current generation.
+    /// 
+    /// Layout: `[cancel_requested:1 bit][generation:63 bits]`
+    cancel_requested: AtomicU64,
+    
     // This is used to signal the GDB thread to stop the vCPU
     #[cfg(gdb)]
     debug_interrupt: AtomicBool,
+    /// Flag indicating whether a guest function call is currently in progress.
+    ///
+    /// **true**: A guest function call is active (between call start and completion)
+    /// **false**: No guest function call is active
+    ///
+    /// # Purpose
+    ///
+    /// This flag prevents kill() from having any effect when called outside of a
+    /// guest function call. This solves the "kill-in-advance" problem where kill()
+    /// could be called before a guest function starts and would incorrectly cancel it.
+    call_active: AtomicBool,
     partition_handle: WHV_PARTITION_HANDLE,
     dropped: AtomicBool,
 }
 
+impl WindowsInterruptHandle {
+    const RUNNING_BIT: u64 = 1 << 63;
+    const MAX_GENERATION: u64 = Self::RUNNING_BIT - 1;
+    const CANCEL_REQUESTED_BIT: u64 = 1 << 63;
+
+    /// Set cancel_requested to true with the given generation
+    fn set_cancel_requested(&self, generation: u64) {
+        let value = Self::CANCEL_REQUESTED_BIT | (generation & Self::MAX_GENERATION);
+        self.cancel_requested.store(value, Ordering::Release);
+    }
+
+    /// Clear cancel_requested (reset to no cancellation)
+    pub(crate) fn clear_cancel_requested(&self) {
+        self.cancel_requested.store(0, Ordering::Release);
+    }
+
+    /// Check if cancel_requested is set for the given generation
+    fn is_cancel_requested_for_generation(&self, generation: u64) -> bool {
+        let raw = self.cancel_requested.load(Ordering::Acquire);
+        let is_set = raw & Self::CANCEL_REQUESTED_BIT != 0;
+        let stored_generation = raw & Self::MAX_GENERATION;
+        is_set && stored_generation == generation
+    }
+
+    /// Increment the generation for a new guest function call
+    pub(crate) fn increment_generation(&self) -> u64 {
+        self.running
+            .fetch_update(Ordering::Release, Ordering::Acquire, |raw| {
+                let current_generation = raw & !Self::RUNNING_BIT;
+                let running_bit = raw & Self::RUNNING_BIT;
+                if current_generation == Self::MAX_GENERATION {
+                    // Wrap around to 0
+                    return Some(running_bit);
+                }
+                Some((current_generation + 1) | running_bit)
+            })
+            .map(|raw| {
+                let old_gen = raw & !Self::RUNNING_BIT;
+                if old_gen == Self::MAX_GENERATION {
+                    0
+                } else {
+                    old_gen + 1
+                }
+            })
+            .unwrap_or(0)
+    }
+
+    /// Set running bit to true, return current generation
+    fn set_running_bit(&self) -> u64 {
+        self.running
+            .fetch_update(Ordering::Release, Ordering::Acquire, |raw| {
+                Some(raw | Self::RUNNING_BIT)
+            })
+            .map(|raw| raw & !Self::RUNNING_BIT)
+            .unwrap_or(0)
+    }
+
+    /// Clear running bit, return current generation
+    fn clear_running_bit(&self) -> u64 {
+        self.running
+            .fetch_and(!Self::RUNNING_BIT, Ordering::Relaxed)
+            & !Self::RUNNING_BIT
+    }
+
+    fn get_running_and_generation(&self) -> (bool, u64) {
+        let raw = self.running.load(Ordering::Relaxed);
+        let running = raw & Self::RUNNING_BIT != 0;
+        let generation = raw & !Self::RUNNING_BIT;
+        (running, generation)
+    }
+}
+
 impl InterruptHandle for WindowsInterruptHandle {
     fn kill(&self) -> bool {
-        self.cancel_requested.store(true, Ordering::Relaxed);
-        self.running.load(Ordering::Relaxed)
-            && unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() }
+        // Check if a call is actually active first
+        if !self.call_active.load(Ordering::Acquire) {
+            return false;
+        }
+
+        // Get the current running state and generation
+        let (running, generation) = self.get_running_and_generation();
+        
+        // Set cancel_requested with the current generation
+        self.set_cancel_requested(generation);
+        
+        // Only call WHvCancelRunVirtualProcessor if VCPU is actually running in guest mode
+        running && unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() }
     }
     #[cfg(gdb)]
     fn kill_from_debugger(&self) -> bool {
         self.debug_interrupt.store(true, Ordering::Relaxed);
-        self.running.load(Ordering::Relaxed)
-            && unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() }
+        let (running, _) = self.get_running_and_generation();
+        running && unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() }
     }
 
     fn dropped(&self) -> bool {
         self.dropped.load(Ordering::Relaxed)
     }
+
+    fn set_call_active(&self) {
+        self.increment_generation();
+        self.call_active.store(true, Ordering::Release);
+    }
+
+    fn clear_call_active(&self) {
+        self.call_active.store(false, Ordering::Release);
+    }
 }
diff --git a/src/hyperlight_host/src/hypervisor/kvm.rs b/src/hyperlight_host/src/hypervisor/kvm.rs
index a42538b33..96b9b3e04 100644
--- a/src/hyperlight_host/src/hypervisor/kvm.rs
+++ b/src/hyperlight_host/src/hypervisor/kvm.rs
@@ -343,7 +343,8 @@ impl KVMDriver {
 
         let interrupt_handle = Arc::new(LinuxInterruptHandle {
             running: AtomicU64::new(0),
-            cancel_requested: AtomicBool::new(false),
+            cancel_requested: AtomicU64::new(0),
+            call_active: AtomicBool::new(false),
             #[cfg(gdb)]
             debug_interrupt: AtomicBool::new(false),
             #[cfg(all(
@@ -627,16 +628,13 @@ impl Hypervisor for KVMDriver {
         self.interrupt_handle
             .tid
             .store(unsafe { libc::pthread_self() as u64 }, Ordering::Relaxed);
-        // Note: if a `InterruptHandle::kill()` called while this thread is **here**
-        // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call
-        self.interrupt_handle
-            .set_running_and_increment_generation()
-            .map_err(|e| {
-                new_error!(
-                    "Error setting running state and incrementing generation: {}",
-                    e
-                )
-            })?;
+        // Note: if `InterruptHandle::kill()` is called while this thread is **here**
+        // (after set_running_bit but before checking cancel_requested):
+        // - kill() will stamp cancel_requested with the current generation
+        // - We will check cancel_requested below and skip the VcpuFd::run() call
+        // - This is the desired behavior - the kill takes effect immediately
+        let generation = self.interrupt_handle.set_running_bit();
+
         #[cfg(not(gdb))]
         let debug_interrupt = false;
         #[cfg(gdb)]
@@ -644,14 +642,16 @@ impl Hypervisor for KVMDriver {
             .interrupt_handle
             .debug_interrupt
             .load(Ordering::Relaxed);
-        // Don't run the vcpu if `cancel_requested` is true
+        // Don't run the vcpu if `cancel_requested` is set for our generation
         //
-        // Note: if a `InterruptHandle::kill()` called while this thread is **here**
-        // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call
+        // Note: if `InterruptHandle::kill()` is called while this thread is **here**
+        // (after checking cancel_requested but before vcpu.run()):
+        // - kill() will stamp cancel_requested with the current generation
+        // - We will proceed with vcpu.run(), but signals will be sent to interrupt it
+        // - The vcpu will be interrupted and return EINTR (handled below)
         let exit_reason = if self
             .interrupt_handle
-            .cancel_requested
-            .load(Ordering::Relaxed)
+            .is_cancel_requested_for_generation(generation)
             || debug_interrupt
         {
             Err(kvm_ioctls::Error::new(libc::EINTR))
@@ -665,34 +665,40 @@ impl Hypervisor for KVMDriver {
                     Some(hyperlight_guest_tracing::invariant_tsc::read_tsc());
             }
 
-            // Note: if a `InterruptHandle::kill()` called while this thread is **here**
-            // Then the vcpu will run, but we will keep sending signals to this thread
-            // to interrupt it until `running` is set to false. The `vcpu_fd::run()` call will
-            // return either normally with an exit reason, or from being "kicked" by out signal handler, with an EINTR error,
-            // both of which are fine.
+            // Note: if `InterruptHandle::kill()` is called while this thread is **here**
+            // (during vcpu.run() execution):
+            // - kill() stamps cancel_requested with the current generation
+            // - kill() sends signals (SIGRTMIN+offset) to this thread repeatedly
+            // - The signal handler is a no-op, but it causes vcpu.run() to return EINTR
+            // - We check cancel_requested below and return Cancelled if generation matches
             self.vcpu_fd.run()
         };
-        // Note: if a `InterruptHandle::kill()` called while this thread is **here**
-        // Then signals will be sent to this thread until `running` is set to false.
-        // This is fine since the signal handler is a no-op.
+        // Note: if `InterruptHandle::kill()` is called while this thread is **here**
+        // (after vcpu.run() returns but before clear_running_bit):
+        // - kill() continues sending signals to this thread (running bit is still set)
+        // - The signals are harmless (no-op handler), we just need to check cancel_requested
+        // - We load cancel_requested below to determine if this run was cancelled
         let cancel_requested = self
             .interrupt_handle
-            .cancel_requested
-            .load(Ordering::Relaxed);
+            .is_cancel_requested_for_generation(generation);
         #[cfg(gdb)]
         let debug_interrupt = self
             .interrupt_handle
             .debug_interrupt
             .load(Ordering::Relaxed);
-        // Note: if a `InterruptHandle::kill()` called while this thread is **here**
-        // Then `cancel_requested` will be set to true again, which will cancel the **next vcpu run**.
-        // Additionally signals will be sent to this thread until `running` is set to false.
-        // This is fine since the signal handler is a no-op.
+        // Note: if `InterruptHandle::kill()` is called while this thread is **here**
+        // (after loading cancel_requested but before clear_running_bit):
+        // - kill() stamps cancel_requested with the CURRENT generation (not the one we just loaded)
+        // - kill() continues sending signals until running bit is cleared
+        // - The newly stamped cancel_requested will affect the NEXT vcpu.run() call
+        // - Signals sent now are harmless (no-op handler)
         self.interrupt_handle.clear_running_bit();
-        // At this point, `running` is false so no more signals will be sent to this thread,
-        // but we may still receive async signals that were sent before this point.
-        // To prevent those signals from interrupting subsequent calls to `run()` (on other vms!),
-        // we make sure to check `cancel_requested` before cancelling (see `libc::EINTR` match-arm below).
+        // At this point, running bit is clear so kill() will stop sending signals.
+        // However, we may still receive delayed signals that were sent before clear_running_bit.
+        // These stale signals are harmless because:
+        // - The signal handler is a no-op
+        // - We check generation matching in cancel_requested before treating EINTR as cancellation
+        // - If generation doesn't match, we return Retry instead of Cancelled
         let result = match exit_reason {
             Ok(VcpuExit::Hlt) => {
                 crate::debug!("KVM - Halt Details : {:#?}", &self);
@@ -741,14 +747,16 @@ impl Hypervisor for KVMDriver {
                 }
             },
             Err(e) => match e.errno() {
-                // we send a signal to the thread to cancel execution this results in EINTR being returned by KVM so we return Cancelled
+                // We send a signal (SIGRTMIN+offset) to interrupt the vcpu, which causes EINTR
                 libc::EINTR => {
-                    // If cancellation was not requested for this specific vm, the vcpu was interrupted because of debug interrupt or
-                    // a stale signal that meant to be delivered to a previous/other vcpu on this same thread, so let's ignore it
+                    // Check if cancellation was requested for THIS specific generation.
+                    // If not, the EINTR came from:
+                    // - A debug interrupt (if GDB is enabled)
+                    // - A stale signal from a previous guest call (generation mismatch)
+                    // - A signal meant for a different sandbox on the same thread
+                    // In these cases, we return Retry to continue execution.
                     if cancel_requested {
-                        self.interrupt_handle
-                            .cancel_requested
-                            .store(false, Ordering::Relaxed);
+                        self.interrupt_handle.clear_cancel_requested();
                         HyperlightExit::Cancelled()
                     } else {
                         #[cfg(gdb)]
diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs
index 781099134..7d6e08bb7 100644
--- a/src/hyperlight_host/src/hypervisor/mod.rs
+++ b/src/hyperlight_host/src/hypervisor/mod.rs
@@ -114,7 +114,9 @@ pub enum HyperlightExit {
     Cancelled(),
     /// The vCPU has exited for a reason that is not handled by Hyperlight
     Unknown(String),
-    /// The operation should be retried, for example this can happen on Linux where a call to run the CPU can return EAGAIN
+    /// The operation should be retried
+    /// On Linux this can happen where a call to run the CPU can return EAGAIN
+    /// On Windows the platform could cause a cancelation of the VM run
     Retry(),
 }
 
@@ -413,7 +415,10 @@ impl VirtualCPU {
 
                     log_then_return!("Unexpected VM Exit {:?}", reason);
                 }
-                Ok(HyperlightExit::Retry()) => continue,
+                Ok(HyperlightExit::Retry()) => {
+                    println!("retrying vm run");
+                    continue;
+                }
                 Err(e) => {
                     #[cfg(crashdump)]
                     crashdump::generate_crashdump(hv)?;
@@ -435,13 +440,50 @@ impl VirtualCPU {
 pub trait InterruptHandle: Debug + Send + Sync {
     /// Interrupt the corresponding sandbox from running.
     ///
-    /// - If this is called while the vcpu is running, then it will interrupt the vcpu and return `true`.
-    /// - If this is called while the vcpu is not running, (for example during a host call), the
-    ///   vcpu will not immediately be interrupted, but will prevent the vcpu from running **the next time**
-    ///   it's scheduled, and returns `false`.
+    /// This method attempts to cancel a currently executing guest function call by sending
+    /// a signal to the VCPU thread. It uses generation tracking (linux only) and call_active flag to
+    /// ensure the interruption is safe and precise.
+    ///
+    /// # Behavior
+    ///
+    /// - **Guest function running**: If called while a guest function is executing (VCPU running
+    ///   or in a host function call), this stamps the current generation into cancel_requested
+    ///   and sends a signal to interrupt the VCPU. Returns `true`.
+    ///
+    /// - **No active call**: If called when no guest function call is in progress (call_active=false),
+    ///   this has no effect and returns `false`. This prevents "kill-in-advance" where kill()
+    ///   is called before a guest function starts.
+    ///
+    /// - **During host function**: If the guest call is currently executing a host function
+    ///   (VCPU not running but call_active=true), this stamps cancel_requested. When the
+    ///   host function returns and attempts to re-enter the guest, the cancellation will
+    ///   be detected and the call will abort. Returns `true`.
+    ///
+    /// # Generation Tracking (linux only)
+    ///
+    /// The method stamps the current generation number along with the cancellation request.
+    /// This ensures that:
+    /// - Stale signals from previous calls are ignored (generation mismatch)
+    /// - Only the intended guest function call is affected
+    /// - Multiple rapid kill() calls on the same generation are idempotent
+    ///
+    /// # Blocking Behavior
+    ///
+    /// This function will block while attempting to deliver the signal to the VCPU thread,
+    /// retrying until either:
+    /// - The signal is successfully delivered (VCPU transitions from running to not running)
+    /// - The VCPU stops running for another reason (e.g., call completes normally)
+    ///
+    /// # Returns
+    ///
+    /// - `true`: Cancellation request was stamped (kill will take effect)
+    /// - `false`: No active call, cancellation request was not stamped (no effect)
     ///
     /// # Note
-    /// This function will block for the duration of the time it takes for the vcpu thread to be interrupted.
+    ///
+    /// To reliably interrupt a guest call, ensure `kill()` is called while the guest
+    /// function is actually executing. Calling kill() before call_guest_function() will
+    /// have no effect.
     fn kill(&self) -> bool;
 
     /// Used by a debugger to interrupt the corresponding sandbox from running.
@@ -458,50 +500,167 @@ pub trait InterruptHandle: Debug + Send + Sync {
 
     /// Returns true if the corresponding sandbox has been dropped
     fn dropped(&self) -> bool;
+
+    /// Increment the generation counter for a new guest function call (Linux only).
+    ///
+    /// This must be called exactly once at the start of each guest function call,
+    /// before any VCPU execution begins. The returned generation number will be
+    /// used throughout the entire guest call, even if the VCPU is run multiple
+    /// times (due to host function calls, retries, etc.).
+    ///
+    /// # Returns
+    ///
+    /// The new generation number assigned to this guest function call.
+    ///
+    /// # Note
+    ///
+    /// This is only called on Linux (KVM/MSHV). Windows uses a different interrupt mechanism.
+    #[cfg(any(kvm, mshv))]
+    fn increment_call_generation(&self) -> u64;
+
+    /// Mark that a guest function call is starting.
+    ///
+    /// Sets the call_active flag to true, indicating that a guest function call
+    /// is now in progress. This allows kill() to stamp cancel_requested.
+    ///
+    /// Must be called immediately after increment_call_generation() and before
+    /// any VCPU execution begins.
+    fn set_call_active(&self);
+
+    /// Mark that a guest function call has completed.
+    ///
+    /// Clears the call_active flag, indicating that no guest function call is
+    /// in progress. After this, kill() will have no effect and will return false.
+    ///
+    /// Must be called at the end of call_guest_function_by_name_no_reset(),
+    /// after the guest call has fully completed (whether successfully or with error).
+    fn clear_call_active(&self);
 }
 
 #[cfg(any(kvm, mshv))]
 #[derive(Debug)]
 pub(super) struct LinuxInterruptHandle {
-    /// Invariant: vcpu is running => most significant bit (63) of `running` is set. (Neither converse nor inverse is true)
+    /// Atomic flag combining running state and generation counter.
+    ///
+    /// **Bit 63**: VCPU running state (1 = running, 0 = not running)
+    /// **Bits 0-62**: Generation counter (incremented once per guest function call)
+    ///
+    /// # Generation Tracking
+    ///
+    /// The generation counter is incremented once at the start of each guest function call
+    /// and remains constant throughout that call, even if the VCPU is run multiple times
+    /// (due to host function calls, retries, etc.). This design solves the race condition
+    /// where a kill() from a previous call could spuriously cancel a new call.
     ///
-    /// Additionally, bit 0-62 tracks how many times the VCPU has been run. Incremented each time `run()` is called.
+    /// ## Why Generations Are Needed
     ///
-    /// This prevents an ABA problem where:
-    /// 1. The VCPU is running (generation N),
-    /// 2. It gets cancelled,
-    /// 3. Then quickly restarted (generation N+1),
-    ///    before the original thread has observed that it was cancelled.
+    /// Consider this scenario WITHOUT generation tracking:
+    /// 1. Thread A starts guest call 1, VCPU runs
+    /// 2. Thread B calls kill(), sends signal to Thread A
+    /// 3. Guest call 1 completes before signal arrives
+    /// 4. Thread A starts guest call 2, VCPU runs again
+    /// 5. Stale signal from step 2 arrives and incorrectly cancels call 2
     ///
-    /// Without this generation counter, the interrupt logic might assume the VCPU is still
-    /// in the *original* run (generation N), see that it's `running`, and re-send the signal.
-    /// But the new VCPU run (generation N+1) would treat this as a stale signal and ignore it,
-    /// potentially causing an infinite loop where no effective interrupt is delivered.
+    /// WITH generation tracking:
+    /// 1. Thread A starts guest call 1 (generation N), VCPU runs
+    /// 2. Thread B calls kill(), stamps cancel_requested with generation N
+    /// 3. Guest call 1 completes, signal may or may not have arrived yet
+    /// 4. Thread A starts guest call 2 (generation N+1), VCPU runs again
+    /// 5. If stale signal arrives, signal handler checks: cancel_requested.generation (N) != current generation (N+1)
+    /// 6. Stale signal is ignored, call 2 continues normally
     ///
-    /// Invariant: If the VCPU is running, `run_generation[bit 0-62]` matches the current run's generation.
+    /// ## Per-Call vs Per-Run Generation
+    ///
+    /// It's critical that generation is incremented per GUEST FUNCTION CALL, not per vcpu.run():
+    /// - A single guest function call may invoke vcpu.run() multiple times (host calls, retries)
+    /// - All run() calls within the same guest call must share the same generation
+    /// - This ensures kill() affects the entire guest function call atomically
+    ///
+    /// # Invariants
+    ///
+    /// - If VCPU is running: bit 63 is set (neither converse nor inverse holds)
+    /// - If VCPU is running: bits 0-62 match the current guest call's generation
     running: AtomicU64,
-    /// Invariant: vcpu is running => `tid` is the thread on which it is running.
-    /// Note: multiple vms may have the same `tid`, but at most one vm will have `running` set to true.
+
+    /// Thread ID where the VCPU is currently running.
+    ///
+    /// # Invariants
+    ///
+    /// - If VCPU is running: tid contains the thread ID of the executing thread
+    /// - Multiple VMs may share the same tid, but at most one will have running=true
     tid: AtomicU64,
-    /// True when an "interruptor" has requested the VM to be cancelled. Set immediately when
-    /// `kill()` is called, and cleared when the vcpu is no longer running.
-    /// This is used to
-    /// 1. make sure stale signals do not interrupt the
-    ///    the wrong vcpu (a vcpu may only be interrupted iff `cancel_requested` is true),
-    /// 2. ensure that if a vm is killed while a host call is running,
-    ///    the vm will not re-enter the guest after the host call returns.
-    cancel_requested: AtomicBool,
-    /// True when the debugger has requested the VM to be interrupted. Set immediately when
-    /// `kill_from_debugger()` is called, and cleared when the vcpu is no longer running.
-    /// This is used to make sure stale signals do not interrupt the the wrong vcpu
-    /// (a vcpu may only be interrupted by a debugger if `debug_interrupt` is true),
+
+    /// Generation-aware cancellation request flag.
+    ///
+    /// **Bit 63**: Cancellation requested flag (1 = kill requested, 0 = no kill)
+    /// **Bits 0-62**: Generation number when cancellation was requested
+    ///
+    /// # Purpose
+    ///
+    /// This flag serves three critical functions:
+    ///
+    /// 1. **Prevent stale signals**: A VCPU may only be interrupted if cancel_requested
+    ///    is set AND the generation matches the current call's generation
+    ///
+    /// 2. **Handle host function calls**: If kill() is called while a host function is
+    ///    executing (VCPU not running but call is active), cancel_requested is stamped
+    ///    with the current generation. When the host function returns and the VCPU
+    ///    attempts to re-enter the guest, it will see the cancellation and abort.
+    ///
+    /// 3. **Detect stale kills**: If cancel_requested.generation doesn't match the
+    ///    current generation, it's from a previous call and should be ignored
+    ///
+    /// # States and Transitions
+    ///
+    /// - **No cancellation**: cancel_requested = 0 (bit 63 clear)
+    /// - **Cancellation for generation N**: cancel_requested = (1 << 63) | N
+    /// - Signal handler checks: (cancel_requested & 0x7FFFFFFFFFFFFFFF) == current_generation
+    cancel_requested: AtomicU64,
+
+    /// Flag indicating whether a guest function call is currently in progress.
+    ///
+    /// **true**: A guest function call is active (between call start and completion)
+    /// **false**: No guest function call is active
+    ///
+    /// # Purpose
+    ///
+    /// This flag prevents kill() from having any effect when called outside of a
+    /// guest function call. This solves the "kill-in-advance" problem where kill()
+    /// could be called before a guest function starts and would incorrectly cancel it.
+    ///
+    /// # Behavior
+    ///
+    /// - Set to true at the start of call_guest_function_by_name_no_reset()
+    /// - Cleared at the end of call_guest_function_by_name_no_reset()
+    /// - kill() only stamps cancel_requested if call_active is true
+    /// - If kill() is called when call_active=false, it returns false and has no effect
+    ///
+    /// # Why AtomicBool is Safe
+    ///
+    /// Although there's a theoretical race where:
+    /// 1. Thread A checks call_active (false)
+    /// 2. Thread B sets call_active (true) and starts guest call
+    /// 3. Thread A's kill() returns false (no effect)
+    ///
+    /// This is acceptable because the generation tracking provides an additional
+    /// safety layer. Even if a stale kill somehow stamped cancel_requested, the
+    /// generation mismatch would cause it to be ignored.
+    call_active: AtomicBool,
+
+    /// Debugger interrupt request flag (GDB only).
+    ///
+    /// Set when kill_from_debugger() is called, cleared when VCPU stops running.
+    /// Used to distinguish debugger interrupts from normal kill() interrupts.
     #[cfg(gdb)]
     debug_interrupt: AtomicBool,
-    /// Whether the corresponding vm is dropped
+
+    /// Whether the corresponding VM has been dropped.
     dropped: AtomicBool,
-    /// Retry delay between signals sent to the vcpu thread
+
+    /// Delay between retry attempts when sending signals to the VCPU thread.
     retry_delay: Duration,
-    /// The offset of the SIGRTMIN signal used to interrupt the vcpu thread
+
+    /// Offset from SIGRTMIN for the signal used to interrupt the VCPU thread.
     sig_rt_min_offset: u8,
 }
 
@@ -509,18 +668,51 @@ pub(super) struct LinuxInterruptHandle {
 impl LinuxInterruptHandle {
     const RUNNING_BIT: u64 = 1 << 63;
     const MAX_GENERATION: u64 = Self::RUNNING_BIT - 1;
+    const CANCEL_REQUESTED_BIT: u64 = 1 << 63;
 
-    // set running to true and increment the generation. Generation will wrap around at `MAX_GENERATION`.
-    fn set_running_and_increment_generation(&self) -> std::result::Result<u64, u64> {
+    // Set cancel_requested to true with the given generation
+    fn set_cancel_requested(&self, generation: u64) {
+        let value = Self::CANCEL_REQUESTED_BIT | (generation & Self::MAX_GENERATION);
+        self.cancel_requested.store(value, Ordering::Release);
+    }
+
+    // Clear cancel_requested (reset to no cancellation)
+    pub(crate) fn clear_cancel_requested(&self) {
+        self.cancel_requested.store(0, Ordering::Release);
+    }
+
+    // Check if cancel_requested is set for the given generation
+    fn is_cancel_requested_for_generation(&self, generation: u64) -> bool {
+        let raw = self.cancel_requested.load(Ordering::Acquire);
+        let is_set = raw & Self::CANCEL_REQUESTED_BIT != 0;
+        let stored_generation = raw & Self::MAX_GENERATION;
+        is_set && stored_generation == generation
+    }
+
+    // Increment the generation for a new guest function call. Generation will wrap around at `MAX_GENERATION`.
+    pub(crate) fn increment_generation(&self) -> u64 {
         self.running
-            .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |raw| {
-                let generation = raw & !Self::RUNNING_BIT;
-                if generation == Self::MAX_GENERATION {
+            .fetch_update(Ordering::Release, Ordering::Acquire, |raw| {
+                let current_generation = raw & !Self::RUNNING_BIT;
+                let running_bit = raw & Self::RUNNING_BIT;
+                if current_generation == Self::MAX_GENERATION {
                     // restart generation from 0
-                    return Some(Self::RUNNING_BIT);
+                    return Some(running_bit);
                 }
-                Some((generation + 1) | Self::RUNNING_BIT)
+                Some((current_generation + 1) | running_bit)
             })
+            .map(|raw| (raw & !Self::RUNNING_BIT) + 1) // Return the NEW generation
+            .unwrap_or(1) // If wrapped, return 1
+    }
+
+    // set running to true without incrementing generation
+    fn set_running_bit(&self) -> u64 {
+        self.running
+            .fetch_update(Ordering::Release, Ordering::Acquire, |raw| {
+                Some(raw | Self::RUNNING_BIT)
+            })
+            .map(|raw| raw & !Self::RUNNING_BIT) // Return the current generation
+            .unwrap_or(0)
     }
 
     // clear the running bit and return the generation
@@ -536,14 +728,34 @@ impl LinuxInterruptHandle {
         (running, generation)
     }
 
-    fn send_signal(&self) -> bool {
+    fn send_signal(&self, stamp_generation: bool) -> bool {
         let signal_number = libc::SIGRTMIN() + self.sig_rt_min_offset as libc::c_int;
         let mut sent_signal = false;
         let mut target_generation: Option<u64> = None;
 
         loop {
+
+            if (!self.call_active.load(Ordering::Acquire)) {
+                // No active call, so no need to send signal
+                break;
+            }
+
             let (running, generation) = self.get_running_and_generation();
 
+            // Stamp generation into cancel_requested if requested and this is the first iteration
+            // We stamp even when running=false to support killing during host function calls
+            // The generation tracking will prevent stale kills from affecting new calls
+            // Only stamp if a call is actually active (call_active=true)
+            if stamp_generation
+                && target_generation.is_none()
+                && self.call_active.load(Ordering::Acquire)
+            {
+                self.set_cancel_requested(generation);
+                target_generation = Some(generation);
+            }
+
+            // If not running, we've stamped the generation (if requested), so we're done
+            // This handles the host function call scenario
             if !running {
                 break;
             }
@@ -570,18 +782,40 @@ impl LinuxInterruptHandle {
 #[cfg(any(kvm, mshv))]
 impl InterruptHandle for LinuxInterruptHandle {
     fn kill(&self) -> bool {
-        self.cancel_requested.store(true, Ordering::Relaxed);
 
-        self.send_signal()
+        if !(self.call_active.load(Ordering::Acquire))
+        {
+            // No active call, so no effect
+            return false;
+        }
+
+        // send_signal will stamp the generation into cancel_requested
+        // right before sending each signal, ensuring they're always in sync
+        self.send_signal(true)
     }
     #[cfg(gdb)]
     fn kill_from_debugger(&self) -> bool {
         self.debug_interrupt.store(true, Ordering::Relaxed);
-        self.send_signal()
+        self.send_signal(false)
     }
     fn dropped(&self) -> bool {
         self.dropped.load(Ordering::Relaxed)
     }
+
+    #[cfg(any(kvm, mshv))]
+    fn increment_call_generation(&self) -> u64 {
+        self.increment_generation()
+    }
+
+    #[cfg(any(kvm, mshv))]
+    fn set_call_active(&self) {
+        self.call_active.store(true, Ordering::Release);
+    }
+
+    #[cfg(any(kvm, mshv))]
+    fn clear_call_active(&self) {
+        self.call_active.store(false, Ordering::Release);
+    }
 }
 
 #[cfg(all(test, any(target_os = "windows", kvm)))]
diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs
index 6dc02d71f..129d4a938 100644
--- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs
+++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs
@@ -390,6 +390,13 @@ impl MultiUseSandbox {
         return_type: ReturnType,
         args: Vec<ParameterValue>,
     ) -> Result<ReturnValue> {
+        // Increment generation for this guest function call (Linux only)
+        #[cfg(any(kvm, mshv))]
+        let _generation = self.vm.interrupt_handle().increment_call_generation();
+
+        // Mark that a guest function call is now active
+        self.vm.interrupt_handle().set_call_active();
+
         let res = (|| {
             let estimated_capacity = estimate_flatbuffer_capacity(function_name, &args);
 
@@ -405,6 +412,10 @@ impl MultiUseSandbox {
 
             self.mem_mgr.write_guest_function_call(buffer)?;
 
+            // Increment generation for this guest function call (Linux only)
+            #[cfg(any(kvm, mshv))]
+            self.vm.interrupt_handle().increment_call_generation();
+
             self.vm.dispatch_call_from_host(
                 self.dispatch_ptr.clone(),
                 #[cfg(gdb)]
@@ -440,6 +451,10 @@ impl MultiUseSandbox {
         if res.is_err() {
             self.mem_mgr.clear_io_buffers();
         }
+
+        // Mark that the guest function call has completed
+        self.vm.interrupt_handle().clear_call_active();
+
         res
     }
 
diff --git a/src/hyperlight_host/tests/integration_test.rs b/src/hyperlight_host/tests/integration_test.rs
index 0869a9d5a..0f65f05bf 100644
--- a/src/hyperlight_host/tests/integration_test.rs
+++ b/src/hyperlight_host/tests/integration_test.rs
@@ -68,7 +68,9 @@ fn interrupt_host_call() {
         }
     });
 
-    let result = sandbox.call::<i32>("CallHostSpin", ()).unwrap_err();
+    let result = sandbox.call::<i32>("CallHostSpin", ());
+    println!("Result: {:?}", result);
+    let result = result.unwrap_err();
     assert!(matches!(result, HyperlightError::ExecutionCanceledByHost()));
 
     thread.join().unwrap();
@@ -105,7 +107,8 @@ fn interrupt_in_progress_guest_call() {
     thread.join().expect("Thread should finish");
 }
 
-/// Makes sure interrupting a vm before the guest call has started also prevents the guest call from being executed
+/// Makes sure interrupting a vm before the guest call has started has no effect,
+/// but a second kill after the call starts will interrupt it
 #[test]
 fn interrupt_guest_call_in_advance() {
     let mut sbox1: MultiUseSandbox = new_uninit_rust().unwrap().evolve().unwrap();
@@ -114,15 +117,20 @@ fn interrupt_guest_call_in_advance() {
     let interrupt_handle = sbox1.interrupt_handle();
     assert!(!interrupt_handle.dropped()); // not yet dropped
 
-    // kill vm before the guest call has started
+    // First kill before the guest call has started - should have no effect
+    // Then kill again after a delay to interrupt the actual call
     let thread = thread::spawn(move || {
-        assert!(!interrupt_handle.kill()); // should return false since vcpu is not running yet
+        assert!(!interrupt_handle.kill()); // should return false since no call is active
         barrier2.wait();
+        // Wait a bit for the Spin call to actually start
+        thread::sleep(Duration::from_millis(100));
+        assert!(interrupt_handle.kill()); // this should succeed and interrupt the Spin call
         barrier2.wait(); // wait here until main thread has dropped the sandbox
         assert!(interrupt_handle.dropped());
     });
 
-    barrier.wait(); // wait until `kill()` is called before starting the guest call
+    barrier.wait(); // wait until first `kill()` is called before starting the guest call
+    // The Spin call should be interrupted by the second kill()
     let res = sbox1.call::<i32>("Spin", ()).unwrap_err();
     assert!(matches!(res, HyperlightError::ExecutionCanceledByHost()));
 
@@ -857,3 +865,369 @@ fn test_if_guest_is_able_to_get_string_return_values_from_host() {
         "Guest Function, string added by Host Function".to_string()
     );
 }
+
+/// Test that monitors CPU time usage and can interrupt a guest based on CPU time limits
+/// Uses a pool of 100 sandboxes, 100 threads, and 500 iterations per thread
+/// Some sandboxes are expected to complete normally, some are expected to be killed
+/// This test makes sure that a reused sandbox is not killed in the case where the previous
+/// execution was killed due to CPU time limit but the invocation completed normally before the cancel was processed.
+#[test]
+fn test_cpu_time_interrupt() {
+    use std::collections::VecDeque;
+    use std::mem::MaybeUninit;
+    use std::sync::Mutex;
+    use std::sync::atomic::AtomicUsize;
+    use std::sync::mpsc::channel;
+
+    const POOL_SIZE: usize = 100;
+    const NUM_THREADS: usize = 100;
+    const ITERATIONS_PER_THREAD: usize = 500;
+
+    // Create a pool of 100 sandboxes
+    println!("Creating pool of {} sandboxes...", POOL_SIZE);
+    let mut sandbox_pool: Vec<MultiUseSandbox> = Vec::with_capacity(POOL_SIZE);
+    for i in 0..POOL_SIZE {
+        let sandbox = new_uninit_rust().unwrap().evolve().unwrap();
+        if (i + 1) % 10 == 0 {
+            println!("Created {}/{} sandboxes", i + 1, POOL_SIZE);
+        }
+        sandbox_pool.push(sandbox);
+    }
+
+    // Wrap the pool in Arc<Mutex<VecDeque>> for thread-safe access
+    let pool = Arc::new(Mutex::new(VecDeque::from(sandbox_pool)));
+
+    // Counters for statistics
+    let total_iterations = Arc::new(AtomicUsize::new(0));
+    let killed_count = Arc::new(AtomicUsize::new(0));
+    let completed_count = Arc::new(AtomicUsize::new(0));
+    let errors_count = Arc::new(AtomicUsize::new(0));
+
+    println!(
+        "Starting {} threads with {} iterations each...",
+        NUM_THREADS, ITERATIONS_PER_THREAD
+    );
+
+    // Spawn worker threads
+    let mut thread_handles = vec![];
+    for thread_id in 0..NUM_THREADS {
+        let pool_clone = Arc::clone(&pool);
+        let total_iterations_clone = Arc::clone(&total_iterations);
+        let killed_count_clone = Arc::clone(&killed_count);
+        let completed_count_clone = Arc::clone(&completed_count);
+        let errors_count_clone = Arc::clone(&errors_count);
+
+        let handle = thread::spawn(move || {
+            for iteration in 0..ITERATIONS_PER_THREAD {
+                // === START OF ITERATION ===
+                // Get a fresh sandbox from the pool for this iteration
+                let mut sandbox = {
+                    let mut pool_guard = pool_clone.lock().unwrap();
+                    // Wait if pool is empty (shouldn't happen with proper design)
+                    while pool_guard.is_empty() {
+                        drop(pool_guard);
+                        thread::sleep(Duration::from_micros(100));
+                        pool_guard = pool_clone.lock().unwrap();
+                    }
+                    pool_guard.pop_front().unwrap()
+                };
+
+                // Vary CPU time between 3ms and 7ms to ensure some get killed and some complete
+                // The CPU limit is 5ms, so:
+                // - 3-4ms should complete normally
+                // - 6-7ms should be killed
+                // - 5ms is borderline and could go either way
+                let cpu_time_ms =
+                    3 + (((thread_id * ITERATIONS_PER_THREAD + iteration) % 5) as u32);
+
+                let interrupt_handle = sandbox.interrupt_handle();
+
+                // Channel to send the thread ID
+                let (tx, rx) = channel();
+
+                // Flag to signal monitoring start
+                let should_monitor = Arc::new(AtomicBool::new(false));
+                let should_monitor_clone = should_monitor.clone();
+
+                // Flag to signal monitoring stop (when guest execution completes)
+                let stop_monitoring = Arc::new(AtomicBool::new(false));
+                let stop_monitoring_clone = stop_monitoring.clone();
+
+                // Flag to track if we actually sent a kill signal
+                let was_killed = Arc::new(AtomicBool::new(false));
+                let was_killed_clone = was_killed.clone();
+
+                // Spawn CPU time monitor thread
+                let monitor_thread = thread::spawn(move || {
+                    let main_thread_id = match rx.recv() {
+                        Ok(tid) => tid,
+                        Err(_) => return,
+                    };
+
+                    while !should_monitor_clone.load(Ordering::Acquire) {
+                        thread::sleep(Duration::from_micros(50));
+                    }
+
+                    #[cfg(target_os = "linux")]
+                    unsafe {
+                        let mut clock_id: libc::clockid_t = 0;
+                        if libc::pthread_getcpuclockid(main_thread_id, &mut clock_id) != 0 {
+                            return;
+                        }
+
+                        let cpu_limit_ns = 5_000_000; // 5ms CPU time limit
+                        let mut start_time = MaybeUninit::<libc::timespec>::uninit();
+
+                        if libc::clock_gettime(clock_id, start_time.as_mut_ptr()) != 0 {
+                            return;
+                        }
+                        let start_time = start_time.assume_init();
+
+                        loop {
+                            // Check if we should stop monitoring (guest completed)
+                            if stop_monitoring_clone.load(Ordering::Acquire) {
+                                break;
+                            }
+
+                            let mut current_time = MaybeUninit::<libc::timespec>::uninit();
+                            if libc::clock_gettime(clock_id, current_time.as_mut_ptr()) != 0 {
+                                break;
+                            }
+                            let current_time = current_time.assume_init();
+
+                            let elapsed_ns = (current_time.tv_sec - start_time.tv_sec)
+                                * 1_000_000_000
+                                + (current_time.tv_nsec - start_time.tv_nsec);
+
+                            if elapsed_ns > cpu_limit_ns {
+                                // Double-check that monitoring should still continue before killing
+                                // The guest might have completed between our last check and now
+                                if stop_monitoring_clone.load(Ordering::Acquire) {
+                                    break;
+                                }
+
+                                // Mark that we sent a kill signal BEFORE calling kill
+                                // to avoid race conditions
+                                was_killed_clone.store(true, Ordering::Release);
+                                interrupt_handle.kill();
+                                break;
+                            }
+
+                            thread::sleep(Duration::from_micros(50));
+                        }
+                    }
+
+                    #[cfg(target_os = "windows")]
+                    unsafe {
+                        use std::ffi::c_void;
+
+                        // On Windows, we use GetThreadTimes to get CPU time
+                        // main_thread_id is a HANDLE on Windows
+                        let thread_handle = main_thread_id as *mut c_void;
+
+                        let cpu_limit_ns: i64 = 1_000_000; // 5ms CPU time limit (in nanoseconds)
+
+                        let mut creation_time =
+                            MaybeUninit::<windows_sys::Win32::Foundation::FILETIME>::uninit();
+                        let mut exit_time =
+                            MaybeUninit::<windows_sys::Win32::Foundation::FILETIME>::uninit();
+                        let mut kernel_time_start =
+                            MaybeUninit::<windows_sys::Win32::Foundation::FILETIME>::uninit();
+                        let mut user_time_start =
+                            MaybeUninit::<windows_sys::Win32::Foundation::FILETIME>::uninit();
+
+                        // Get initial CPU times
+                        if windows_sys::Win32::System::Threading::GetThreadTimes(
+                            thread_handle,
+                            creation_time.as_mut_ptr(),
+                            exit_time.as_mut_ptr(),
+                            kernel_time_start.as_mut_ptr(),
+                            user_time_start.as_mut_ptr(),
+                        ) == 0
+                        {
+                            return;
+                        }
+
+                        let kernel_time_start = kernel_time_start.assume_init();
+                        let user_time_start = user_time_start.assume_init();
+
+                        // Convert FILETIME to u64 (100-nanosecond intervals)
+                        let start_cpu_time = ((kernel_time_start.dwHighDateTime as u64) << 32
+                            | kernel_time_start.dwLowDateTime as u64)
+                            + ((user_time_start.dwHighDateTime as u64) << 32
+                                | user_time_start.dwLowDateTime as u64);
+
+                        loop {
+                            // Check if we should stop monitoring (guest completed)
+                            if stop_monitoring_clone.load(Ordering::Acquire) {
+                                break;
+                            }
+
+                            let mut kernel_time_current =
+                                MaybeUninit::<windows_sys::Win32::Foundation::FILETIME>::uninit();
+                            let mut user_time_current =
+                                MaybeUninit::<windows_sys::Win32::Foundation::FILETIME>::uninit();
+
+                            if windows_sys::Win32::System::Threading::GetThreadTimes(
+                                thread_handle,
+                                creation_time.as_mut_ptr(),
+                                exit_time.as_mut_ptr(),
+                                kernel_time_current.as_mut_ptr(),
+                                user_time_current.as_mut_ptr(),
+                            ) == 0
+                            {
+                                break;
+                            }
+
+                            let kernel_time_current = kernel_time_current.assume_init();
+                            let user_time_current = user_time_current.assume_init();
+
+                            // Convert FILETIME to u64
+                            let current_cpu_time = ((kernel_time_current.dwHighDateTime as u64)
+                                << 32
+                                | kernel_time_current.dwLowDateTime as u64)
+                                + ((user_time_current.dwHighDateTime as u64) << 32
+                                    | user_time_current.dwLowDateTime as u64);
+
+                            // FILETIME is in 100-nanosecond intervals, convert to nanoseconds
+                            let elapsed_ns = ((current_cpu_time - start_cpu_time) * 100) as i64;
+
+                            if elapsed_ns > cpu_limit_ns {
+                                // Double-check that monitoring should still continue before killing
+                                // The guest might have completed between our last check and now
+                                if stop_monitoring_clone.load(Ordering::Acquire) {
+                                    break;
+                                }
+
+                                // Mark that we sent a kill signal BEFORE calling kill
+                                // to avoid race conditions
+                                println!(
+                                    "Thread {} iteration {}: CPU time exceeded, sending kill signal",
+                                    thread_id, iteration
+                                );
+                                was_killed_clone.store(true, Ordering::Release);
+                                interrupt_handle.kill();
+                                break;
+                            }
+
+                            thread::sleep(Duration::from_micros(50));
+                        }
+                    }
+                });
+
+                // Send thread ID and start monitoring
+                #[cfg(target_os = "linux")]
+                unsafe {
+                    let thread_id = libc::pthread_self();
+                    let _ = tx.send(thread_id);
+                }
+
+                #[cfg(target_os = "windows")]
+                unsafe {
+                    // On Windows, get the current thread's pseudo-handle
+                    let thread_handle = windows_sys::Win32::System::Threading::GetCurrentThread();
+                    let _ = tx.send(thread_handle as usize);
+                }
+
+                should_monitor.store(true, Ordering::Release);
+
+                // Call the guest function
+                let result = sandbox.call::<u64>("SpinForMs", cpu_time_ms);
+
+                // Signal the monitor to stop
+                stop_monitoring.store(true, Ordering::Release);
+
+                // Wait for monitor thread to complete to ensure was_killed flag is set
+                let _ = monitor_thread.join();
+
+                // NOW check if we sent a kill signal (after monitor thread has completed)
+                let kill_was_sent = was_killed.load(Ordering::Acquire);
+
+                // Process the result and validate correctness
+                match result {
+                    Err(HyperlightError::ExecutionCanceledByHost()) => {
+                        // We received a cancellation error
+                        if !kill_was_sent {
+                            // ERROR: We got a cancellation but never sent a kill!
+                            panic!(
+                                "Thread {} iteration {}: Got ExecutionCanceledByHost but no kill signal was sent!",
+                                thread_id, iteration
+                            );
+                        }
+                        // This is correct - we sent kill and got the error
+                        killed_count_clone.fetch_add(1, Ordering::Relaxed);
+                    }
+                    Ok(_) => {
+                        // Execution completed normally
+                        // This is OK whether or not we sent a kill - the guest might have
+                        // finished just before the kill signal arrived
+                        completed_count_clone.fetch_add(1, Ordering::Relaxed);
+                    }
+                    Err(e) => {
+                        // Unexpected error
+                        eprintln!(
+                            "Thread {} iteration {}: Unexpected error: {:?}, kill_sent: {}",
+                            thread_id, iteration, e, kill_was_sent
+                        );
+                        errors_count_clone.fetch_add(1, Ordering::Relaxed);
+                    }
+                }
+
+                total_iterations_clone.fetch_add(1, Ordering::Relaxed);
+
+                // Progress reporting
+                let current_total = total_iterations_clone.load(Ordering::Relaxed);
+                if current_total % 500 == 0 {
+                    println!(
+                        "Progress: {}/{} iterations completed",
+                        current_total,
+                        NUM_THREADS * ITERATIONS_PER_THREAD
+                    );
+                }
+
+                // === END OF ITERATION ===
+                // Return sandbox to pool for reuse by other threads/iterations
+                {
+                    let mut pool_guard = pool_clone.lock().unwrap();
+                    pool_guard.push_back(sandbox);
+                }
+            }
+        });
+
+        thread_handles.push(handle);
+    }
+
+    // Wait for all threads to complete
+    for handle in thread_handles {
+        handle.join().unwrap();
+    }
+
+    // Print statistics
+    let total = total_iterations.load(Ordering::Relaxed);
+    let killed = killed_count.load(Ordering::Relaxed);
+    let completed = completed_count.load(Ordering::Relaxed);
+    let errors = errors_count.load(Ordering::Relaxed);
+
+    println!("\n=== Test Statistics ===");
+    println!("Total iterations: {}", total);
+    println!("Killed (CPU limit exceeded): {}", killed);
+    println!("Completed normally: {}", completed);
+    println!("Errors: {}", errors);
+    println!("Kill rate: {:.1}%", (killed as f64 / total as f64) * 100.0);
+
+    // Verify we had both kills and completions
+    assert!(
+        killed > 0,
+        "Expected some executions to be killed, but none were"
+    );
+    assert!(
+        completed > 0,
+        "Expected some executions to complete, but none did"
+    );
+    assert_eq!(errors, 0, "Expected no errors, but got {}", errors);
+    assert_eq!(
+        total,
+        NUM_THREADS * ITERATIONS_PER_THREAD,
+        "Not all iterations completed"
+    );
+}
diff --git a/src/tests/rust_guests/simpleguest/src/main.rs b/src/tests/rust_guests/simpleguest/src/main.rs
index bdf8d5f62..a761a1358 100644
--- a/src/tests/rust_guests/simpleguest/src/main.rs
+++ b/src/tests/rust_guests/simpleguest/src/main.rs
@@ -583,6 +583,39 @@ fn spin(_: &FunctionCall) -> Result<Vec<u8>> {
     Ok(get_flatbuffer_result(()))
 }
 
+/// Spins the CPU for approximately the specified number of milliseconds
+fn spin_for_ms(fc: &FunctionCall) -> Result<Vec<u8>> {
+    let milliseconds = if let ParameterValue::UInt(ms) = fc.parameters.clone().unwrap()[0].clone() {
+        ms
+    } else {
+        return Err(HyperlightGuestError::new(
+            ErrorCode::GuestFunctionParameterTypeMismatch,
+            "Expected UInt parameter".to_string(),
+        ));
+    };
+
+    // Simple busy-wait loop - not precise but good enough for testing
+    // Different iteration counts for debug vs release mode to ensure reasonable CPU usage
+    #[cfg(debug_assertions)]
+    let iterations_per_ms = 120_000; // Debug mode - less optimized, tuned for ~50% kill rate
+
+    #[cfg(not(debug_assertions))]
+    let iterations_per_ms = 1_000_000; // Release mode - highly optimized
+
+    let total_iterations = milliseconds * iterations_per_ms;
+
+    let mut counter: u64 = 0;
+    for _ in 0..total_iterations {
+        // Prevent the compiler from optimizing away the loop
+        counter = counter.wrapping_add(1);
+        core::hint::black_box(counter);
+    }
+
+    // Calculate the actual number of milliseconds spun for, based on the counter and iterations per ms
+    let ms_spun = counter / iterations_per_ms as u64;
+    Ok(get_flatbuffer_result(ms_spun))
+}
+
 #[hyperlight_guest_tracing::trace_function]
 fn test_abort(function_call: &FunctionCall) -> Result<Vec<u8>> {
     if let ParameterValue::Int(code) = function_call.parameters.clone().unwrap()[0].clone() {
@@ -1366,6 +1399,14 @@ pub extern "C" fn hyperlight_main() {
     );
     register_function(spin_def);
 
+    let spin_for_ms_def = GuestFunctionDefinition::new(
+        "SpinForMs".to_string(),
+        Vec::from(&[ParameterType::UInt]),
+        ReturnType::ULong,
+        spin_for_ms as usize,
+    );
+    register_function(spin_for_ms_def);
+
     let abort_def = GuestFunctionDefinition::new(
         "GuestAbortWithCode".to_string(),
         Vec::from(&[ParameterType::Int]),