Skip to content

Commit 5480e97

Browse files
committed
vmm: fix kicking vCPU out of KVM_RUN from signal handler
In the common scenario to break the KVM_RUN [0] call to interrupt the vCPU, KVM enforces for correct behavior that the `immediate_exit` flag [1] is set by the signal handler on each invocation. As the signal handler runs in the context of the vCPU thread, we can not use a Mutex for the vCPU structure. We therefore must write to the structure mutably from the signal handler, IGNORING ANY USERSPACE LOCKS. Otherwise, we have a deadlock. This introduces a (very hacky but good enough for a PoC) solution to this problem. [0] https://docs.kernel.org/virt/kvm/api.html#kvm-run [1] https://docs.kernel.org/virt/kvm/api.html#the-kvm-run-structure
1 parent e3b88fe commit 5480e97

File tree

6 files changed

+110
-3
lines changed

6 files changed

+110
-3
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

hypervisor/src/cpu.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
//
1111
//
1212

13+
#[cfg(feature = "kvm")]
14+
use std::os::fd::RawFd;
1315
#[cfg(target_arch = "aarch64")]
1416
use std::sync::Arc;
1517

@@ -602,4 +604,6 @@ pub trait Vcpu: Send + Sync {
602604
/// Trigger NMI interrupt
603605
///
604606
fn nmi(&self) -> Result<()>;
607+
#[cfg(feature = "kvm")]
608+
unsafe fn get_kvm_vcpu_raw_fd(&self) -> RawFd;
605609
}

hypervisor/src/kvm/mod.rs

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use std::collections::HashMap;
1616
use std::fs::File;
1717
#[cfg(target_arch = "x86_64")]
1818
use std::os::unix::io::AsRawFd;
19-
#[cfg(feature = "tdx")]
19+
#[cfg(any(feature = "tdx", feature = "kvm"))]
2020
use std::os::unix::io::RawFd;
2121
use std::result;
2222
#[cfg(target_arch = "x86_64")]
@@ -1987,7 +1987,8 @@ impl cpu::Vcpu for KvmVcpu {
19871987
/// Triggers the running of the current virtual CPU returning an exit reason.
19881988
///
19891989
fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> {
1990-
match self.fd.lock().unwrap().run() {
1990+
let mut lock = self.fd.lock().unwrap();
1991+
match lock.run() {
19911992
Ok(run) => match run {
19921993
#[cfg(target_arch = "x86_64")]
19931994
VcpuExit::IoIn(addr, data) => {
@@ -2066,7 +2067,11 @@ impl cpu::Vcpu for KvmVcpu {
20662067
},
20672068

20682069
Err(ref e) => match e.errno() {
2069-
libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore),
2070+
libc::EINTR => {
2071+
lock.set_kvm_immediate_exit(0);
2072+
Ok(cpu::VmExit::Ignore)
2073+
}
2074+
libc::EAGAIN => Ok(cpu::VmExit::Ignore),
20702075
_ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
20712076
"VCPU error {:?}",
20722077
e
@@ -2769,6 +2774,14 @@ impl cpu::Vcpu for KvmVcpu {
27692774
self.fd.lock().unwrap().set_kvm_immediate_exit(exit.into());
27702775
}
27712776

2777+
#[cfg(feature = "kvm")]
2778+
unsafe fn get_kvm_vcpu_raw_fd(&self) -> RawFd {
2779+
let kvm_vcpu = self.fd.lock().unwrap();
2780+
let kvm_vcpu = &*kvm_vcpu;
2781+
let kvm_vcpu_raw_fd = kvm_vcpu.as_raw_fd();
2782+
kvm_vcpu_raw_fd
2783+
}
2784+
27722785
///
27732786
/// Returns the details about TDX exit reason
27742787
///

vmm/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ linux-loader = { workspace = true, features = ["bzimage", "elf", "pe"] }
5252
log = "0.4.22"
5353
# Special fork of micro_http that combines HTTP traffic over a UNIX domain
5454
# socket with UNIX' SCM_RIGHTS mechanism for transferring file descriptors.
55+
kvm-bindings = "0.10.0"
5556
micro_http = { git = "https://github.com/firecracker-microvm/micro-http", branch = "main" }
5657
mshv-bindings = { workspace = true, features = [
5758
"fam-wrappers",

vmm/src/cpu.rs

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ use vm_migration::{
7373
use vmm_sys_util::eventfd::EventFd;
7474
use vmm_sys_util::signal::{register_signal_handler, SIGRTMIN};
7575
use zerocopy::{FromBytes, Immutable, IntoBytes};
76+
#[cfg(feature = "kvm")]
77+
use {kvm_bindings::kvm_run, std::os::fd::RawFd, std::sync::RwLock};
7678

7779
#[cfg(all(target_arch = "x86_64", feature = "guest_debug"))]
7880
use crate::coredump::{
@@ -90,6 +92,18 @@ use crate::vm::physical_bits;
9092
use crate::vm_config::CpusConfig;
9193
use crate::{GuestMemoryMmap, CPU_MANAGER_SNAPSHOT_ID};
9294

95+
struct SafeWrapper<T>(T);
96+
unsafe impl<T> Send for SafeWrapper<T> {}
97+
unsafe impl<T> Sync for SafeWrapper<T> {}
98+
99+
#[cfg(feature = "kvm")]
100+
static STATIC_VCPUS: RwLock<Vec<(u8 /* ID*/, SafeWrapper<*mut kvm_run>)>> = RwLock::new(vec![]);
101+
#[cfg(feature = "kvm")]
102+
/// Tell signal handler to not access certain stuff anymore during shutdown.
103+
/// Otherwise => panics.
104+
/// Better alternative would be to prevent signals.
105+
pub static IS_IN_SHUTDOWN: AtomicBool = AtomicBool::new(false);
106+
93107
#[cfg(all(target_arch = "aarch64", feature = "guest_debug"))]
94108
/// Extract the specified bits of a 64-bit integer.
95109
/// For example, to extrace 2 bits from offset 1 (zero based) of `6u64`,
@@ -487,6 +501,11 @@ impl Vcpu {
487501
.map_err(Error::VcpuSetGicrBaseAddr)?;
488502
Ok(())
489503
}
504+
505+
#[cfg(feature = "kvm")]
506+
pub fn get_kvm_vcpu_raw_fd(&self) -> RawFd {
507+
unsafe { self.vcpu.get_kvm_vcpu_raw_fd() }
508+
}
490509
}
491510

492511
impl Pausable for Vcpu {}
@@ -983,6 +1002,31 @@ impl CpuManager {
9831002
vcpu_thread_barrier: Arc<Barrier>,
9841003
inserting: bool,
9851004
) -> Result<()> {
1005+
{
1006+
#[cfg(feature = "kvm")]
1007+
{
1008+
let raw_kvm_fd = vcpu.lock().unwrap().get_kvm_vcpu_raw_fd();
1009+
1010+
let buffer = unsafe {
1011+
libc::mmap(
1012+
core::ptr::null_mut(),
1013+
4096,
1014+
libc::PROT_WRITE | libc::PROT_READ,
1015+
libc::MAP_SHARED,
1016+
raw_kvm_fd,
1017+
0,
1018+
)
1019+
};
1020+
assert_ne!(buffer, libc::MAP_FAILED);
1021+
1022+
let mut vcpus = STATIC_VCPUS.write().unwrap();
1023+
vcpus.push((
1024+
vcpu.lock().unwrap().id,
1025+
SafeWrapper(buffer.cast::<kvm_run>()),
1026+
));
1027+
}
1028+
}
1029+
9861030
let reset_evt = self.reset_evt.try_clone().unwrap();
9871031
let exit_evt = self.exit_evt.try_clone().unwrap();
9881032
#[cfg(feature = "kvm")]
@@ -1061,7 +1105,44 @@ impl CpuManager {
10611105
return;
10621106
}
10631107
}
1108+
1109+
#[cfg(not(feature = "kvm"))]
10641110
extern "C" fn handle_signal(_: i32, _: *mut siginfo_t, _: *mut c_void) {}
1111+
#[cfg(feature = "kvm")]
1112+
extern "C" fn handle_signal(_: i32, _: *mut siginfo_t, _: *mut c_void) {
1113+
if IS_IN_SHUTDOWN.load(Ordering::SeqCst) {
1114+
let mut lock = STATIC_VCPUS.write().unwrap();
1115+
// TODO We should also revert the mmap()
1116+
lock.drain(..);
1117+
1118+
// thread::current() panics in shutdown phase, when threadlocal data was
1119+
// already destroyed. We should instead prevent delivering signals in
1120+
// that step.
1121+
return;
1122+
}
1123+
1124+
let thread = self::thread::current();
1125+
let name = thread.name().unwrap();
1126+
let prefix = "vcpu";
1127+
let id = &name[prefix.len()..];
1128+
let vcpu_id = id.parse::<u8>().unwrap();
1129+
let vcpus = STATIC_VCPUS.read().unwrap();
1130+
for (vcpu_id_, kvm_run) in &*vcpus {
1131+
let kvm_run = kvm_run.0;
1132+
if *vcpu_id_ == vcpu_id {
1133+
/*let kvm_run = unsafe { kvm_run.as_mut() }.unwrap();
1134+
kvm_run.immediate_exit = std::hint::black_box(1);*/
1135+
1136+
// Write the second field: immediate exit
1137+
let ptr_kvm_run_immediate_exit = unsafe { kvm_run.cast::<u8>().add(1) };
1138+
unsafe {
1139+
core::ptr::write_volatile(ptr_kvm_run_immediate_exit, 1);
1140+
}
1141+
}
1142+
}
1143+
// TODO ::Release?
1144+
std::sync::atomic::fence(Ordering::SeqCst);
1145+
}
10651146
// This uses an async signal safe handler to kill the vcpu handles.
10661147
register_signal_handler(SIGRTMIN(), handle_signal)
10671148
.expect("Failed to register vcpu signal handler");

vmm/src/lib.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottabl
4646
use vmm_sys_util::eventfd::EventFd;
4747
use vmm_sys_util::signal::unblock_signal;
4848
use vmm_sys_util::sock_ctrl_msg::ScmSocket;
49+
#[cfg(feature = "kvm")]
50+
use {crate::cpu::IS_IN_SHUTDOWN, std::sync::atomic::Ordering};
4951

5052
use crate::api::{
5153
ApiRequest, ApiResponse, RequestHandler, VmInfoResponse, VmReceiveMigrationData,
@@ -1373,6 +1375,11 @@ impl Vmm {
13731375
vm.release_disk_locks()
13741376
.map_err(|e| MigratableError::UnlockError(anyhow!("{e}")))?;
13751377

1378+
#[cfg(feature = "kvm")]
1379+
// Prevent signal handler to access thread local storage when signals are received
1380+
// close to the end when thread-local storage is already destroyed.
1381+
IS_IN_SHUTDOWN.store(true, Ordering::SeqCst);
1382+
13761383
// Capture snapshot and send it
13771384
let vm_snapshot = vm.snapshot()?;
13781385
let snapshot_data = serde_json::to_vec(&vm_snapshot).unwrap();

0 commit comments

Comments
 (0)