Skip to content

Commit 169e34b

Browse files
committed
Fix calling kill in advance
Signed-off-by: Ludvig Liljenberg <[email protected]>
1 parent 757eb87 commit 169e34b

File tree

2 files changed

+49
-58
lines changed

2 files changed

+49
-58
lines changed

src/hyperlight_host/src/hypervisor/mod.rs

Lines changed: 40 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -426,77 +426,61 @@ impl InterruptHandle for WindowsInterruptHandle {
426426
}
427427
}
428428

429-
/// Get the logging level to pass to the guest entrypoint
430-
fn get_max_log_level() -> u32 {
431-
// Check to see if the RUST_LOG environment variable is set
432-
// and if so, parse it to get the log_level for hyperlight_guest
433-
// if that is not set get the log level for the hyperlight_host
434-
435-
// This is done as the guest will produce logs based on the log level returned here
436-
// producing those logs is expensive and we don't want to do it if the host is not
437-
// going to process them
438-
439-
let val = std::env::var("RUST_LOG").unwrap_or_default();
440-
441-
let level = if val.contains("hyperlight_guest") {
442-
val.split(',')
443-
.find(|s| s.contains("hyperlight_guest"))
444-
.unwrap_or("")
445-
.split('=')
446-
.nth(1)
447-
.unwrap_or("")
448-
} else if val.contains("hyperlight_host") {
449-
val.split(',')
450-
.find(|s| s.contains("hyperlight_host"))
451-
.unwrap_or("")
452-
.split('=')
453-
.nth(1)
454-
.unwrap_or("")
455-
} else {
456-
// look for a value string that does not contain "="
457-
val.split(',').find(|s| !s.contains("=")).unwrap_or("")
458-
};
459-
460-
log::info!("Determined guest log level: {}", level);
461-
// Convert the log level string to a LevelFilter
462-
// If no value is found, default to Error
463-
LevelFilter::from_str(level).unwrap_or(LevelFilter::Error) as u32
464-
}
465-
466429
#[cfg(target_os = "windows")]
467430
#[derive(Debug)]
468431
pub(super) struct WindowsInterruptHandle {
469-
// `WHvCancelRunVirtualProcessor()` will return Ok even if the vcpu is not running, which is the reason we need this flag.
470-
running: AtomicBool,
471-
cancel_requested: AtomicBool,
432+
/// Atomic value packing vcpu execution state.
433+
///
434+
/// Bit layout:
435+
/// - Bit 1: RUNNING_BIT - set when vcpu is actively running
436+
/// - Bit 0: CANCEL_BIT - set when cancellation has been requested
437+
///
438+
/// `WHvCancelRunVirtualProcessor()` will return Ok even if the vcpu is not running,
439+
/// which is why we need the RUNNING_BIT.
440+
///
441+
/// CANCEL_BIT persists across vcpu exits/re-entries within a single `HyperlightVm::run()` call
442+
/// (e.g., during host function calls), but is cleared at the start of each new `HyperlightVm::run()` call.
443+
state: AtomicU64,
444+
472445
// This is used to signal the GDB thread to stop the vCPU
473446
#[cfg(gdb)]
474447
debug_interrupt: AtomicBool,
475448
partition_handle: windows::Win32::System::Hypervisor::WHV_PARTITION_HANDLE,
476449
dropped: AtomicBool,
477450
}
478451

452+
#[cfg(target_os = "windows")]
453+
impl WindowsInterruptHandle {
454+
const RUNNING_BIT: u64 = 1 << 1;
455+
const CANCEL_BIT: u64 = 1 << 0;
456+
}
457+
479458
#[cfg(target_os = "windows")]
480459
impl InterruptHandleImpl for WindowsInterruptHandle {
481460
fn set_tid(&self) {
482461
// No-op on Windows - we don't need to track thread ID
483462
}
484463

485464
fn set_running(&self) {
486-
self.running.store(true, Ordering::Relaxed);
465+
// Release ordering to ensure prior memory operations are visible when another thread observes running=true
466+
self.state.fetch_or(Self::RUNNING_BIT, Ordering::Release);
487467
}
488468

489469
fn is_cancelled(&self) -> bool {
490-
self.cancel_requested.load(Ordering::Relaxed)
470+
// Acquire ordering to synchronize with the Release in kill()
471+
// This ensures we see the CANCEL_BIT set by the interrupt thread
472+
self.state.load(Ordering::Acquire) & Self::CANCEL_BIT != 0
491473
}
492474

493475
fn clear_cancel(&self) {
494-
self.cancel_requested.store(false, Ordering::Relaxed);
476+
// Relaxed is sufficient here - we're the only thread that clears this bit
477+
// at the start of run(), and there's no data race on the clear operation itself
478+
self.state.fetch_and(!Self::CANCEL_BIT, Ordering::Relaxed);
495479
}
496480

497481
fn clear_running(&self) {
498-
// On Windows, clear running, cancel_requested, and debug_interrupt together
499-
self.running.store(false, Ordering::Relaxed);
482+
// Release ordering to ensure all vcpu operations are visible before clearing running
483+
self.state.fetch_and(!Self::RUNNING_BIT, Ordering::Release);
500484
#[cfg(gdb)]
501485
self.debug_interrupt.store(false, Ordering::Relaxed);
502486
}
@@ -528,16 +512,24 @@ impl InterruptHandle for WindowsInterruptHandle {
528512
fn kill(&self) -> bool {
529513
use windows::Win32::System::Hypervisor::WHvCancelRunVirtualProcessor;
530514

531-
self.cancel_requested.store(true, Ordering::Relaxed);
532-
self.running.load(Ordering::Relaxed)
515+
// Release ordering ensures that any writes before kill() are visible to the vcpu thread
516+
// when it checks is_cancelled() with Acquire ordering
517+
self.state.fetch_or(Self::CANCEL_BIT, Ordering::Release);
518+
519+
// Acquire ordering to synchronize with the Release in set_running()
520+
// This ensures we see the running state set by the vcpu thread
521+
let state = self.state.load(Ordering::Acquire);
522+
(state & Self::RUNNING_BIT != 0)
533523
&& unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() }
534524
}
535525
#[cfg(gdb)]
536526
fn kill_from_debugger(&self) -> bool {
537527
use windows::Win32::System::Hypervisor::WHvCancelRunVirtualProcessor;
538528

539529
self.debug_interrupt.store(true, Ordering::Relaxed);
540-
self.running.load(Ordering::Relaxed)
530+
// Acquire ordering to synchronize with the Release in set_running()
531+
let state = self.state.load(Ordering::Acquire);
532+
(state & Self::RUNNING_BIT != 0)
541533
&& unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() }
542534
}
543535

src/hyperlight_host/tests/integration_test.rs

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ fn interrupt_host_call() {
6363
}
6464
});
6565

66-
let result = sandbox.call::<i32>("CallHostSpin", ()).unwrap_err();
66+
let result = sandbox.call::<()>("CallHostSpin", ()).unwrap_err();
6767
assert!(matches!(result, HyperlightError::ExecutionCanceledByHost()));
6868
assert!(sandbox.poisoned());
6969

@@ -140,6 +140,9 @@ fn interrupt_guest_call_in_advance() {
140140
// Make sure we can still call guest functions after the VM was interrupted early
141141
// i.e. make sure we dont kill the next iteration.
142142
sbox1.call::<String>("Echo", "hello".to_string()).unwrap();
143+
assert!(!sbox1.poisoned());
144+
sbox1.call::<String>("Echo", "hello".to_string()).unwrap();
145+
assert!(!sbox1.poisoned());
143146

144147
// drop vm to make sure other thread can detect it
145148
drop(sbox1);
@@ -160,7 +163,7 @@ fn interrupt_guest_call_in_advance() {
160163
fn interrupt_same_thread() {
161164
let mut sbox1: MultiUseSandbox = new_uninit_rust().unwrap().evolve().unwrap();
162165
let mut sbox2: MultiUseSandbox = new_uninit_rust().unwrap().evolve().unwrap();
163-
let snapshot2 = sbox2.snapshot().unwrap();
166+
let snapshot = sbox2.snapshot().unwrap();
164167
let mut sbox3: MultiUseSandbox = new_uninit_rust().unwrap().evolve().unwrap();
165168

166169
let barrier = Arc::new(Barrier::new(2));
@@ -191,7 +194,7 @@ fn interrupt_same_thread() {
191194
_ => panic!("Unexpected return"),
192195
};
193196
if sbox2.poisoned() {
194-
sbox2.restore(&snapshot2).unwrap();
197+
sbox2.restore(&snapshot).unwrap();
195198
}
196199
sbox3
197200
.call::<String>("Echo", "hello".to_string())
@@ -205,7 +208,7 @@ fn interrupt_same_thread() {
205208
fn interrupt_same_thread_no_barrier() {
206209
let mut sbox1: MultiUseSandbox = new_uninit_rust().unwrap().evolve().unwrap();
207210
let mut sbox2: MultiUseSandbox = new_uninit_rust().unwrap().evolve().unwrap();
208-
let snapshot2 = sbox2.snapshot().unwrap();
211+
let snapshot = sbox2.snapshot().unwrap();
209212
let mut sbox3: MultiUseSandbox = new_uninit_rust().unwrap().evolve().unwrap();
210213

211214
let barrier = Arc::new(Barrier::new(2));
@@ -238,7 +241,7 @@ fn interrupt_same_thread_no_barrier() {
238241
_ => panic!("Unexpected return"),
239242
};
240243
if sbox2.poisoned() {
241-
sbox2.restore(&snapshot2).unwrap();
244+
sbox2.restore(&snapshot).unwrap();
242245
}
243246
sbox3
244247
.call::<String>("Echo", "hello".to_string())
@@ -365,11 +368,7 @@ fn interrupt_spamming_host_call() {
365368
.call::<i32>("HostCallLoop", "HostFunc1".to_string())
366369
.unwrap_err();
367370

368-
assert!(
369-
matches!(res, HyperlightError::ExecutionCanceledByHost()),
370-
"Expected ExecutionCanceledByHost error but got: {:?}",
371-
res
372-
);
371+
assert!(matches!(res, HyperlightError::ExecutionCanceledByHost()));
373372

374373
thread.join().expect("Thread should finish");
375374
}

0 commit comments

Comments
 (0)