@@ -338,8 +338,23 @@ pub trait InterruptHandle: Send + Sync {
338338#[ cfg( any( kvm,  mshv) ) ]  
339339#[ derive( Debug ) ]  
340340pub ( super )  struct  LinuxInterruptHandle  { 
341-     /// Invariant: vcpu is running => `running` is true. (Neither converse nor inverse is true) 
342-      running :  AtomicBool , 
341+     /// Invariant: vcpu is running => most significant bit (63) of `running` is set. (Neither converse nor inverse is true) 
342+      /// 
343+      /// Additionally, bit 0-62 tracks how many times the VCPU has been run. Incremented each time `run()` is called. 
344+      /// 
345+      /// This prevents an ABA problem where: 
346+      /// 1. The VCPU is running (generation N), 
347+      /// 2. It gets cancelled, 
348+      /// 3. Then quickly restarted (generation N+1), 
349+      ///     before the original thread has observed that it was cancelled. 
350+      /// 
351+      /// Without this generation counter, the interrupt logic might assume the VCPU is still 
352+      /// in the *original* run (generation N), see that it's `running`, and re-send the signal. 
353+      /// But the new VCPU run (generation N+1) would treat this as a stale signal and ignore it, 
354+      /// potentially causing an infinite loop where no effective interrupt is delivered. 
355+      /// 
356+      /// Invariant: If the VCPU is running, `run_generation[bit 0-62]` matches the current run's generation. 
357+      running :  AtomicU64 , 
343358    /// Invariant: vcpu is running => `tid` is the thread on which it is running. 
344359     /// Note: multiple vms may have the same `tid`, but at most one vm will have `running` set to true. 
345360     tid :  AtomicU64 , 
@@ -359,15 +374,61 @@ pub(super) struct LinuxInterruptHandle {
359374     sig_rt_min_offset :  u8 , 
360375} 
361376
377+ #[ cfg( any( kvm,  mshv) ) ]  
378+ impl  LinuxInterruptHandle  { 
379+     const  RUNNING_BIT :  u64  = 1  << 63 ; 
380+     const  MAX_GENERATION :  u64  = Self :: RUNNING_BIT  - 1 ; 
381+ 
382+     // set running to true and increment the generation. Generation will wrap around at `MAX_GENERATION`. 
383+     fn  set_running_and_increment_generation ( & self )  -> std:: result:: Result < u64 ,  u64 >  { 
384+         self . running 
385+             . fetch_update ( Ordering :: Relaxed ,  Ordering :: Relaxed ,  |raw| { 
386+                 let  generation = raw &  !Self :: RUNNING_BIT ; 
387+                 if  generation == Self :: MAX_GENERATION  { 
388+                     // restart generation from 0 
389+                     return  Some ( Self :: RUNNING_BIT ) ; 
390+                 } 
391+                 Some ( ( generation + 1 )  | Self :: RUNNING_BIT ) 
392+             } ) 
393+     } 
394+ 
395+     // clear the running bit and return the generation 
396+     fn  clear_running_bit ( & self )  -> u64  { 
397+         self . running 
398+             . fetch_and ( !Self :: RUNNING_BIT ,  Ordering :: Relaxed ) 
399+     } 
400+ 
401+     fn  get_running_and_generation ( & self )  -> ( bool ,  u64 )  { 
402+         let  raw = self . running . load ( Ordering :: Relaxed ) ; 
403+         let  running = raw &  Self :: RUNNING_BIT  != 0 ; 
404+         let  generation = raw &  !Self :: RUNNING_BIT ; 
405+         ( running,  generation) 
406+     } 
407+ } 
408+ 
362409#[ cfg( any( kvm,  mshv) ) ]  
363410impl  InterruptHandle  for  LinuxInterruptHandle  { 
364411    fn  kill ( & self )  -> bool  { 
365412        self . cancel_requested . store ( true ,  Ordering :: Relaxed ) ; 
366413
367414        let  signal_number = libc:: SIGRTMIN ( )  + self . sig_rt_min_offset  as  libc:: c_int ; 
368415        let  mut  sent_signal = false ; 
416+         let  mut  target_generation:  Option < u64 >  = None ; 
417+ 
418+         loop  { 
419+             let  ( running,  generation)  = self . get_running_and_generation ( ) ; 
420+ 
421+             if  !running { 
422+                 break ; 
423+             } 
424+ 
425+             match  target_generation { 
426+                 None  => target_generation = Some ( generation) , 
427+                 // prevent ABA problem 
428+                 Some ( expected)  if  expected != generation => break , 
429+                 _ => { } 
430+             } 
369431
370-         while  self . running . load ( Ordering :: Relaxed )  { 
371432            log:: info!( "Sending signal to kill vcpu thread..." ) ; 
372433            sent_signal = true ; 
373434            unsafe  { 
0 commit comments