Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 41 additions & 7 deletions src/hyperlight_host/src/hypervisor/hyperv_linux.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use std::fmt::{Debug, Formatter};
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
use std::sync::{Arc, Mutex};

use log::{LevelFilter, error};
use log::{LevelFilter, error, info};
#[cfg(mshv2)]
use mshv_bindings::hv_message;
use mshv_bindings::{
Expand Down Expand Up @@ -86,7 +86,6 @@ use crate::sandbox::outb::handle_outb;
#[cfg(crashdump)]
use crate::sandbox::uninitialized::SandboxRuntimeConfig;
use crate::{Result, log_then_return, new_error};

#[cfg(gdb)]
mod debug {
use std::sync::{Arc, Mutex};
Expand Down Expand Up @@ -419,6 +418,7 @@ impl HypervLinuxDriver {
let interrupt_handle = Arc::new(LinuxInterruptHandle {
running: AtomicU64::new(0),
cancel_requested: AtomicBool::new(false),
cancelled: AtomicBool::new(false),
#[cfg(gdb)]
debug_interrupt: AtomicBool::new(false),
#[cfg(all(
Expand Down Expand Up @@ -824,11 +824,43 @@ impl Hypervisor for HypervLinuxDriver {
.interrupt_handle
.debug_interrupt
.load(Ordering::Relaxed);

// Note: if a `InterruptHandle::kill()` called while this thread is **here**
// Then `cancel_requested` will be set to true again, which will cancel the **next vcpu run**.
// Then `cancel_requested` will be set to true again
// Additionally signals will be sent to this thread until `running` is set to false.
// This is fine since the signal handler is a no-op.
// This is fine since the signal handler is a no-op. However we need to stop the cancel_requested flag being acted on the next time this vCPU is run
// So we check to see if we really cancelled the vCPU or if it was just a normal exit and set the `cancelled` flag accordingly
// The order of setting the flags is important here, we need to set `cancelled` before clearing `running` as the `InterruptHandle::send_signal()` checks `running` to know when to stop sending signals
// and then checks to see if the vCPU was cancelled or not and clears the `cancel_requested` flag if it was.
// This prevents the case where we receive a cancel request after the vCPU has as below we only return cancelled if the vCPU was actually cancelled

if let Err(e) = &exit_reason
&& e.errno() == libc::EINTR
&& cancel_requested
{
log::debug!("VCPU run() was interrupted with EINTR because it was cancelled.");
self.interrupt_handle
.cancelled
.store(true, Ordering::Relaxed);
}

// This wont catch every single case as the cancel_requested flag could be set between this statement and the clearing of running (which will reset the flag) or the reset of the flag below
if let Ok(_) = &exit_reason
&& cancel_requested
{
log::warn!(
"VCPU was requested to cancel, but run() returned Ok. This can happen if the vCPU was already exiting when the cancel was requested."
);
}

self.interrupt_handle.clear_running_bit();

// Reset the cancel_requested flag if it was set for this run as we no longer need to know if the cancellation was requested or not since we set the cancelled flag above if it was

self.interrupt_handle
.cancel_requested
.store(false, Ordering::Relaxed);

// At this point, `running` is false so no more signals will be sent to this thread,
// but we may still receive async signals that were sent before this point.
// To prevent those signals from interrupting subsequent calls to `run()`,
Expand Down Expand Up @@ -914,11 +946,13 @@ impl Hypervisor for HypervLinuxDriver {
Err(e) => match e.errno() {
// we send a signal to the thread to cancel execution this results in EINTR being returned by KVM so we return Cancelled
libc::EINTR => {
// If cancellation was not requested for this specific vm, the vcpu was interrupted because of debug interrupt or
// If this specific VM was not cancelled, the vcpu was interrupted because of debug interrupt or
// a stale signal that meant to be delivered to a previous/other vcpu on this same thread, so let's ignore it
if cancel_requested {
let cancelled = self.interrupt_handle.cancelled.load(Ordering::Relaxed);
if cancelled {
info!("VCPU run() was interrupted with EINTR because it was cancelled.");
self.interrupt_handle
.cancel_requested
.cancelled
.store(false, Ordering::Relaxed);
HyperlightExit::Cancelled()
} else {
Expand Down
47 changes: 40 additions & 7 deletions src/hyperlight_host/src/hypervisor/kvm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use std::sync::{Arc, Mutex};
use kvm_bindings::{kvm_fpu, kvm_regs, kvm_userspace_memory_region};
use kvm_ioctls::Cap::UserMemory;
use kvm_ioctls::{Kvm, VcpuExit, VcpuFd, VmFd};
use log::LevelFilter;
use log::{LevelFilter, info};
use tracing::{Span, instrument};
#[cfg(crashdump)]
use {super::crashdump, std::path::Path};
Expand Down Expand Up @@ -356,6 +356,7 @@ impl KVMDriver {
let interrupt_handle = Arc::new(LinuxInterruptHandle {
running: AtomicU64::new(0),
cancel_requested: AtomicBool::new(false),
cancelled: AtomicBool::new(false),
#[cfg(gdb)]
debug_interrupt: AtomicBool::new(false),
#[cfg(all(
Expand Down Expand Up @@ -685,7 +686,6 @@ impl Hypervisor for KVMDriver {
.debug_interrupt
.load(Ordering::Relaxed);
// Don't run the vcpu if `cancel_requested` is true
//
// Note: if a `InterruptHandle::kill()` called while this thread is **here**
// Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call
let exit_reason = if self
Expand Down Expand Up @@ -724,11 +724,42 @@ impl Hypervisor for KVMDriver {
.interrupt_handle
.debug_interrupt
.load(Ordering::Relaxed);

// Note: if a `InterruptHandle::kill()` called while this thread is **here**
// Then `cancel_requested` will be set to true again, which will cancel the **next vcpu run**.
// Then `cancel_requested` will be set to true again
// Additionally signals will be sent to this thread until `running` is set to false.
// This is fine since the signal handler is a no-op.
// This is fine since the signal handler is a no-op. However we need to stop the cancel_requested flag being acted on the next time this vCPU is run
// So we check to see if we really cancelled the vCPU or if it was just a normal exit and set the `cancelled` flag accordingly
// The order of setting the flags is important here, we need to set `cancelled` before clearing `running` as the `InterruptHandle::send_signal()` checks `running` to know when to stop sending signals
// and then checks to see if the vCPU was cancelled or not and clears the `cancel_requested` flag if it was.
// This prevents the case where we receive a cancel request after the vCPU has as below we only return cancelled if the vCPU was actually cancelled
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This sentence is a bit awkward


if let Err(e) = &exit_reason
&& e.errno() == libc::EINTR
&& cancel_requested
{
log::debug!("VCPU run() was interrupted with EINTR because it was cancelled.");
self.interrupt_handle
.cancelled
.store(true, Ordering::Relaxed);
}

// This wont catch every single case as the cancel_requested flag could be set between this statement and the clearing of running (which will reset the flag) or the reset of the flag below
if let Ok(_) = &exit_reason
&& cancel_requested
{
log::warn!(
"VCPU was requested to cancel, but run() returned Ok. This can happen if the vCPU was already exiting when the cancel was requested."
);
}

self.interrupt_handle.clear_running_bit();

// Reset the cancel_requested flag if it was set for this run as we no longer need to know if the cancellation was requested or not since we set the cancelled flag above if it was

self.interrupt_handle
.cancel_requested
.store(false, Ordering::Relaxed);
// At this point, `running` is false so no more signals will be sent to this thread,
// but we may still receive async signals that were sent before this point.
// To prevent those signals from interrupting subsequent calls to `run()` (on other vms!),
Expand Down Expand Up @@ -783,11 +814,13 @@ impl Hypervisor for KVMDriver {
Err(e) => match e.errno() {
// we send a signal to the thread to cancel execution this results in EINTR being returned by KVM so we return Cancelled
libc::EINTR => {
// If cancellation was not requested for this specific vm, the vcpu was interrupted because of debug interrupt or
// If this specific VM was not cancelled, the vcpu was interrupted because of debug interrupt or
// a stale signal that meant to be delivered to a previous/other vcpu on this same thread, so let's ignore it
if cancel_requested {
let cancelled = self.interrupt_handle.cancelled.load(Ordering::Relaxed);
if cancelled {
info!("VCPU run() was interrupted with EINTR because it was cancelled.");
self.interrupt_handle
.cancel_requested
.cancelled
.store(false, Ordering::Relaxed);
HyperlightExit::Cancelled()
} else {
Expand Down
10 changes: 9 additions & 1 deletion src/hyperlight_host/src/hypervisor/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -420,13 +420,17 @@ pub(super) struct LinuxInterruptHandle {
/// Note: multiple vms may have the same `tid`, but at most one vm will have `running` set to true.
tid: AtomicU64,
/// True when an "interruptor" has requested the VM to be cancelled. Set immediately when
/// `kill()` is called, and cleared when the vcpu is no longer running.
/// `kill()` is called, and cleared when the vcpu is no longer running or if the vcpu run returned OK (as we tried to cancel it but it exited cleanly in the meantime).
/// This is used to
/// 1. make sure stale signals do not interrupt the
/// the wrong vcpu (a vcpu may only be interrupted iff `cancel_requested` is true),
/// 2. ensure that if a vm is killed while a host call is running,
/// the vm will not re-enter the guest after the host call returns.
cancel_requested: AtomicBool,
/// True when the vcpu has been cancelled. Set after the VM has exited and a cancellation was requested.
/// Cleared when the cancellation is processed (i.e. before the cancelled error is returned to the user).
/// This is used to make sure that if a vm is killed but completes its run before the signal is delivered, we dont kill the next run.
cancelled: AtomicBool,
/// True when the debugger has requested the VM to be interrupted. Set immediately when
/// `kill_from_debugger()` is called, and cleared when the vcpu is no longer running.
/// This is used to make sure stale signals do not interrupt the the wrong vcpu
Expand Down Expand Up @@ -481,6 +485,10 @@ impl LinuxInterruptHandle {
let (running, generation) = self.get_running_and_generation();

if !running {
if self.cancelled.load(Ordering::Relaxed) {
log::debug!("VCPU run() was interrupted with EINTR because it was cancelled.");
self.cancel_requested.store(false, Ordering::Relaxed);
}
break;
}

Expand Down
Loading