Skip to content

Commit 668aaaf

Browse files
committed
vmm/linux/tee: Handle KVM_EXIT_HYPERCALL exits
SEV-SNP guests use KVM_EXIT_HYPERCALL exits to signal to the hypervisor it would like some pages set to private or shared. Implements a handler that manages guest memory and can set regions to private or shared. vCPUs can send "memory properties" messages to the handler indicating: - Guest GPA - Size of memory region - Whether the region should be set to private or shared The handler will read these messages and configure the memory regions accordingly. Signed-off-by: Tyler Fanelli <[email protected]>
1 parent d70169f commit 668aaaf

File tree

6 files changed

+198
-7
lines changed

6 files changed

+198
-7
lines changed

Cargo.lock

Lines changed: 9 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/libkrun/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ vmm = { path = "../vmm" }
3131
[target.'cfg(target_os = "macos")'.dependencies]
3232
hvf = { path = "../hvf" }
3333

34+
[target.'cfg(target_os = "linux")'.dependencies]
35+
kvm-bindings = ">=0.11"
36+
vm-memory = ">=0.13"
37+
3438
[lib]
3539
name = "krun"
3640
crate-type = ["cdylib"]

src/libkrun/src/lib.rs

Lines changed: 115 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ use std::collections::hash_map::Entry;
55
use std::collections::HashMap;
66
use std::convert::TryInto;
77
use std::env;
8+
#[cfg(feature = "tee")]
9+
use std::ffi::c_void;
810
use std::ffi::CStr;
911
#[cfg(target_os = "linux")]
1012
use std::ffi::CString;
@@ -20,7 +22,7 @@ use std::sync::atomic::{AtomicI32, Ordering};
2022
use std::sync::LazyLock;
2123
use std::sync::Mutex;
2224

23-
#[cfg(target_os = "macos")]
25+
#[cfg(any(target_os = "macos", feature = "tee"))]
2426
use crossbeam_channel::unbounded;
2527
#[cfg(feature = "blk")]
2628
use devices::virtio::block::ImageType;
@@ -54,6 +56,17 @@ use vmm::vmm_config::machine_config::VmConfig;
5456
use vmm::vmm_config::net::NetworkInterfaceConfig;
5557
use vmm::vmm_config::vsock::VsockDeviceConfig;
5658

59+
#[cfg(feature = "tee")]
60+
use kvm_bindings::{kvm_memory_attributes, KVM_MEMORY_ATTRIBUTE_PRIVATE};
61+
62+
#[cfg(feature = "tee")]
63+
use vm_memory::{guest_memory::GuestMemory, GuestAddress, GuestMemoryRegion, MemoryRegionAddress};
64+
65+
#[cfg(feature = "tee")]
66+
use libc::{
67+
fallocate, madvise, EFD_SEMAPHORE, FALLOC_FL_KEEP_SIZE, FALLOC_FL_PUNCH_HOLE, MADV_DONTNEED,
68+
};
69+
5770
// Value returned on success. We use libc's errors otherwise.
5871
const KRUN_SUCCESS: i32 = 0;
5972
// Maximum number of arguments/environment variables we allow
@@ -1454,12 +1467,25 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 {
14541467
#[cfg(target_os = "macos")]
14551468
let (sender, receiver) = unbounded();
14561469

1470+
#[cfg(feature = "tee")]
1471+
let (pm_sender, pm_receiver) = unbounded();
1472+
#[cfg(feature = "tee")]
1473+
let pm_efd =
1474+
EventFd::new(EFD_SEMAPHORE).expect("unable to create TEE memory properties eventfd");
1475+
14571476
let _vmm = match vmm::builder::build_microvm(
14581477
&ctx_cfg.vmr,
14591478
&mut event_manager,
14601479
ctx_cfg.shutdown_efd,
14611480
#[cfg(target_os = "macos")]
14621481
sender,
1482+
#[cfg(feature = "tee")]
1483+
(
1484+
pm_sender,
1485+
pm_efd
1486+
.try_clone()
1487+
.expect("unable to clone TEE memory properties eventfd"),
1488+
),
14631489
) {
14641490
Ok(vmm) => vmm,
14651491
Err(e) => {
@@ -1468,7 +1494,7 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 {
14681494
}
14691495
};
14701496

1471-
#[cfg(target_os = "macos")]
1497+
#[cfg(any(target_os = "macos", feature = "tee"))]
14721498
let mapper_vmm = _vmm.clone();
14731499

14741500
#[cfg(target_os = "macos")]
@@ -1491,6 +1517,93 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 {
14911517
.unwrap();
14921518
}
14931519

1520+
#[cfg(feature = "tee")]
1521+
let guest_mem = _vmm.lock().unwrap().guest_memory().clone();
1522+
1523+
#[cfg(feature = "tee")]
1524+
std::thread::Builder::new()
1525+
.name("TEE memory properties worker".into())
1526+
.spawn(move || loop {
1527+
match pm_receiver.recv() {
1528+
Err(e) => error!("Error in pm receiver: {:?}", e),
1529+
Ok(m) => {
1530+
let (guest_memfd, region_start) = mapper_vmm
1531+
.lock()
1532+
.unwrap()
1533+
.kvm_vm()
1534+
.guest_memfd_get(m.gpa)
1535+
.unwrap_or_else(|| panic!("unable to find KVM guest_memfd for memory region corresponding to GPA 0x{:x}", m.gpa));
1536+
1537+
let attributes: u64 = if m.private {
1538+
KVM_MEMORY_ATTRIBUTE_PRIVATE as u64
1539+
} else {
1540+
0
1541+
};
1542+
1543+
let attr = kvm_memory_attributes {
1544+
address: m.gpa,
1545+
size: m.size,
1546+
attributes,
1547+
flags: 0,
1548+
};
1549+
1550+
mapper_vmm
1551+
.lock()
1552+
.unwrap()
1553+
.kvm_vm()
1554+
.fd()
1555+
.set_memory_attributes(attr)
1556+
.unwrap_or_else(|_| panic!("unable to set memory attributes for memory region corresponding to guest address 0x{:x}", m.gpa));
1557+
1558+
let region = guest_mem.find_region(GuestAddress(m.gpa));
1559+
if region.is_none() {
1560+
error!("{}", format!("guest memory region corresponding to GPA 0x{:x} not found", m.gpa));
1561+
pm_efd.write(1).unwrap();
1562+
continue;
1563+
}
1564+
1565+
let offset = m.gpa - region_start;
1566+
1567+
if m.private {
1568+
let region_addr = MemoryRegionAddress(offset);
1569+
1570+
let host_startaddr = region
1571+
.unwrap()
1572+
.get_host_address(region_addr)
1573+
.expect("host address corresponding to memory region address 0x{:x} not found");
1574+
1575+
let ret = unsafe {
1576+
madvise(
1577+
host_startaddr as *mut c_void,
1578+
m.size.try_into().unwrap(),
1579+
MADV_DONTNEED,
1580+
)
1581+
};
1582+
1583+
if ret < 0 {
1584+
error!("{}", format!("unable to advise kernel that memory region corresponding to GPA 0x{:x} will likely not be needed (madvise)", m.gpa));
1585+
}
1586+
} else {
1587+
let ret = unsafe {
1588+
fallocate(
1589+
guest_memfd,
1590+
FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
1591+
offset as i64,
1592+
m.size as i64,
1593+
)
1594+
};
1595+
1596+
if ret < 0 {
1597+
error!("{}", format!("unable to allocate space in guest_memfd for shared memory (fallocate)"));
1598+
}
1599+
}
1600+
1601+
pm_efd.write(1).unwrap();
1602+
}
1603+
}
1604+
})
1605+
.unwrap();
1606+
14941607
loop {
14951608
match event_manager.run() {
14961609
Ok(_) => {}

src/vmm/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ libc = ">=0.2.39"
2121
linux-loader = { version = "0.13.0", features = ["bzimage", "elf", "pe"] }
2222
log = "0.4.0"
2323
vm-memory = { version = ">=0.13", features = ["backend-mmap"] }
24+
rangemap = "1.5.1"
2425

2526
arch = { path = "../arch" }
2627
devices = { path = "../devices" }

src/vmm/src/builder.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,11 @@
44
//! Enables pre-boot setup, instantiation and booting of a Firecracker VMM.
55
66
#[cfg(target_os = "macos")]
7-
use crossbeam_channel::{unbounded, Sender};
7+
use crossbeam_channel::unbounded;
8+
9+
#[cfg(any(target_os = "macos", feature = "tee"))]
10+
use crossbeam_channel::Sender;
11+
812
use kernel::cmdline::Cmdline;
913
#[cfg(target_os = "macos")]
1014
use std::collections::HashMap;
@@ -21,6 +25,8 @@ use super::{Error, Vmm};
2125
#[cfg(target_arch = "x86_64")]
2226
use crate::device_manager::legacy::PortIODeviceManager;
2327
use crate::device_manager::mmio::MMIODeviceManager;
28+
#[cfg(feature = "tee")]
29+
use crate::linux::vstate::MemoryProperties;
2430
use crate::resources::VmResources;
2531
use crate::vmm_config::external_kernel::{ExternalKernel, KernelFormat};
2632
#[cfg(all(target_os = "linux", target_arch = "aarch64"))]
@@ -507,6 +513,7 @@ pub fn build_microvm(
507513
event_manager: &mut EventManager,
508514
_shutdown_efd: Option<EventFd>,
509515
#[cfg(target_os = "macos")] _map_sender: Sender<MemoryMapping>,
516+
#[cfg(feature = "tee")] pm_sender: (Sender<MemoryProperties>, EventFd),
510517
) -> std::result::Result<Arc<Mutex<Vmm>>, StartMicrovmError> {
511518
let payload = choose_payload(vm_resources)?;
512519

@@ -672,6 +679,8 @@ pub fn build_microvm(
672679
payload_config.entry_addr,
673680
&pio_device_manager.io_bus,
674681
&exit_evt,
682+
#[cfg(feature = "tee")]
683+
pm_sender,
675684
)
676685
.map_err(StartMicrovmError::Internal)?;
677686
}
@@ -1419,6 +1428,7 @@ fn create_vcpus_x86_64(
14191428
entry_addr: GuestAddress,
14201429
io_bus: &devices::Bus,
14211430
exit_evt: &EventFd,
1431+
#[cfg(feature = "tee")] pm_sender: (Sender<MemoryProperties>, EventFd),
14221432
) -> super::Result<Vec<Vcpu>> {
14231433
let mut vcpus = Vec::with_capacity(vcpu_config.vcpu_count as usize);
14241434
for cpu_index in 0..vcpu_config.vcpu_count {
@@ -1429,6 +1439,8 @@ fn create_vcpus_x86_64(
14291439
vm.supported_msrs().clone(),
14301440
io_bus.clone(),
14311441
exit_evt.try_clone().map_err(Error::EventFd)?,
1442+
#[cfg(feature = "tee")]
1443+
(pm_sender.0.clone(), pm_sender.1.try_clone().unwrap()),
14321444
)
14331445
.map_err(Error::Vcpu)?;
14341446

0 commit comments

Comments
 (0)