@@ -395,6 +395,7 @@ impl HypervLinuxDriver {
395395 orig_rsp : rsp_ptr,
396396 interrupt_handle : Arc :: new ( LinuxInterruptHandle {
397397 running : AtomicBool :: new ( false ) ,
398+ cancel_requested : AtomicBool :: new ( false ) ,
398399 tid : AtomicU64 :: new ( unsafe { libc:: pthread_self ( ) } ) ,
399400 dropped : AtomicBool :: new ( false ) ,
400401 } ) ,
@@ -584,37 +585,52 @@ impl Hypervisor for HypervLinuxDriver {
584585 self . interrupt_handle
585586 . tid
586587 . store ( unsafe { libc:: pthread_self ( ) as u64 } , Ordering :: Relaxed ) ;
587- // Note: if a `InterruptHandle::kill()` signal is delivered to this thread **here**
588- // - before we've set the running to true,
589- // Then the signal does not have any effect, because the signal handler is a no-op.
588+ // Note: if a `InterruptHandle::kill()` called while this thread is **here**
589+ // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call
590590 self . interrupt_handle . running . store ( true , Ordering :: Relaxed ) ;
591- // Note: if a `InterruptHandle::kill()` signal is delivered to this thread **here**
592- // - after we've set the running to true,
593- // - before we've called `VcpuFd::run()`
594- // Then the individual signal is lost, because the signal is only processed after we've left userspace.
595- // However, for this reason, we keep sending the signal again and again until we see that the atomic `running` is set to false.
596- #[ cfg( mshv2) ]
597- let run_result = {
598- let hv_message: hv_message = Default :: default ( ) ;
599- self . vcpu_fd . run ( hv_message)
591+ // Don't run the vcpu is `cancel_requested` is true
592+ //
593+ // Note: if a `InterruptHandle::kill()` called while this thread is **here**
594+ // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call
595+ let exit_reason = if self
596+ . interrupt_handle
597+ . cancel_requested
598+ . swap ( false , Ordering :: Relaxed )
599+ {
600+ return Ok ( HyperlightExit :: Cancelled ( ) ) ;
601+ } else {
602+ // Note: if a `InterruptHandle::kill()` called while this thread is **here**
603+ // Then the vcpu will run, but we will keep sending signals to this thread
604+ // to interrupt it until `running` is set to false. The `vcpu_fd::run()` call will
605+ // return either normally with an exit reason, or from being "kicked" by out signal handler, with an EINTR error,
606+ // both of which are fine.
607+ #[ cfg( mshv2) ]
608+ {
609+ let hv_message: hv_message = Default :: default ( ) ;
610+ self . vcpu_fd . run ( hv_message)
611+ }
612+ #[ cfg( mshv3) ]
613+ self . vcpu_fd . run ( )
600614 } ;
601- #[ cfg( mshv3) ]
602- let run_result = self . vcpu_fd . run ( ) ;
603- // Note: if a `InterruptHandle::kill()` signal is delivered to this thread **here**
604- // - after we've called `VcpuFd::run()`
605- // - before we've set the running to false
606- // Then this is fine because the call to `VcpuFd::run()` is already finished,
607- // the signal handler itself is a no-op, and the signals will stop being sent
608- // once we've set the `running` to false.
615+ // Note: if a `InterruptHandle::kill()` called while this thread is **here**
616+ // Then signals will be sent to this thread until `running` is set to false.
617+ // This is fine since the signal handler is a no-op.
618+ let cancel_requested = self
619+ . interrupt_handle
620+ . cancel_requested
621+ . swap ( false , Ordering :: Relaxed ) ;
622+ // Note: if a `InterruptHandle::kill()` called while this thread is **here**
623+ // Then `cancel_requested` will be set to true again, which will cancel the **next vcpu run**.
624+ // Additionally signals will be sent to this thread until `running` is set to false.
625+ // This is fine since the signal handler is a no-op.
609626 self . interrupt_handle
610627 . running
611628 . store ( false , Ordering :: Relaxed ) ;
612- // Note: if a `InterruptHandle::kill()` signal is delivered to this thread **here**
613- // - after we've set the running to false,
614- // Then the signal does not have any effect, because the signal handler is a no-op.
615- // This is fine since we are already done with the `VcpuFd::run()` call.
616-
617- let result = match run_result {
629+ // At this point, `running` is false so no more signals will be sent to this thread,
630+ // but we may still receive async signals that were sent before this point.
631+ // To prevent those signals from interrupting subsequent calls to `run()`,
632+ // we make sure to check `cancel_requested` before cancelling (see `libc::EINTR` match-arm below).
633+ let result = match exit_reason {
618634 Ok ( m) => match m. header . message_type {
619635 HALT_MESSAGE => {
620636 crate :: debug!( "mshv - Halt Details : {:#?}" , & self ) ;
@@ -691,7 +707,15 @@ impl Hypervisor for HypervLinuxDriver {
691707 } ,
692708 Err ( e) => match e. errno ( ) {
693709 // we send a signal to the thread to cancel execution this results in EINTR being returned by KVM so we return Cancelled
694- libc:: EINTR => HyperlightExit :: Cancelled ( ) ,
710+ libc:: EINTR => {
711+ // If cancellation was not requested for this specific vm, the vcpu was interrupted because of stale signal
712+ // that was meant to be delivered to a previous/other vcpu on this same thread, so let's ignore it
713+ if !cancel_requested {
714+ HyperlightExit :: Retry ( )
715+ } else {
716+ HyperlightExit :: Cancelled ( )
717+ }
718+ }
695719 libc:: EAGAIN => HyperlightExit :: Retry ( ) ,
696720 _ => {
697721 crate :: debug!( "mshv Error - Details: Error: {} \n {:#?}" , e, & self ) ;
0 commit comments