Skip to content

Commit cb0a2f7

Browse files
committed
vmm/linux/tee: Handle KVM_EXIT_HYPERCALL exits
SEV-SNP guests use KVM_EXIT_HYPERCALL exits to signal to the hypervisor it would like some pages set to private or shared. Implements a handler that manages guest memory and can set regions to private or shared. vCPUs can send "memory properties" messages to the handler indicating: - Guest GPA - Size of memory region - Whether the region should be set to private or shared The handler will read these messages and configure the memory regions accordingly. Signed-off-by: Tyler Fanelli <[email protected]>
1 parent 49fe2d2 commit cb0a2f7

File tree

6 files changed

+207
-8
lines changed

6 files changed

+207
-8
lines changed

Cargo.lock

Lines changed: 9 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/libkrun/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ vmm = { path = "../vmm" }
3131
[target.'cfg(target_os = "macos")'.dependencies]
3232
hvf = { path = "../hvf" }
3333

34+
[target.'cfg(target_os = "linux")'.dependencies]
35+
kvm-bindings = ">=0.11"
36+
vm-memory = ">=0.13"
37+
3438
[lib]
3539
name = "krun"
3640
crate-type = ["cdylib"]

src/libkrun/src/lib.rs

Lines changed: 115 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ use std::collections::hash_map::Entry;
55
use std::collections::HashMap;
66
use std::convert::TryInto;
77
use std::env;
8+
#[cfg(feature = "tee")]
9+
use std::ffi::c_void;
810
use std::ffi::CStr;
911
#[cfg(target_os = "linux")]
1012
use std::ffi::CString;
@@ -20,7 +22,7 @@ use std::sync::atomic::{AtomicI32, Ordering};
2022
use std::sync::LazyLock;
2123
use std::sync::Mutex;
2224

23-
#[cfg(target_os = "macos")]
25+
#[cfg(any(target_os = "macos", feature = "tee"))]
2426
use crossbeam_channel::unbounded;
2527
#[cfg(feature = "blk")]
2628
use devices::virtio::block::ImageType;
@@ -54,6 +56,17 @@ use vmm::vmm_config::machine_config::VmConfig;
5456
use vmm::vmm_config::net::NetworkInterfaceConfig;
5557
use vmm::vmm_config::vsock::VsockDeviceConfig;
5658

59+
#[cfg(feature = "tee")]
60+
use kvm_bindings::{kvm_memory_attributes, KVM_MEMORY_ATTRIBUTE_PRIVATE};
61+
62+
#[cfg(feature = "tee")]
63+
use vm_memory::{guest_memory::GuestMemory, GuestAddress, GuestMemoryRegion, MemoryRegionAddress};
64+
65+
#[cfg(feature = "tee")]
66+
use libc::{
67+
fallocate, madvise, EFD_SEMAPHORE, FALLOC_FL_KEEP_SIZE, FALLOC_FL_PUNCH_HOLE, MADV_DONTNEED,
68+
};
69+
5770
// Value returned on success. We use libc's errors otherwise.
5871
const KRUN_SUCCESS: i32 = 0;
5972
// Maximum number of arguments/environment variables we allow
@@ -1468,12 +1481,25 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 {
14681481
#[cfg(target_os = "macos")]
14691482
let (sender, receiver) = unbounded();
14701483

1484+
#[cfg(feature = "tee")]
1485+
let (pm_sender, pm_receiver) = unbounded();
1486+
#[cfg(feature = "tee")]
1487+
let pm_efd =
1488+
EventFd::new(EFD_SEMAPHORE).expect("unable to create TEE memory properties eventfd");
1489+
14711490
let _vmm = match vmm::builder::build_microvm(
14721491
&ctx_cfg.vmr,
14731492
&mut event_manager,
14741493
ctx_cfg.shutdown_efd,
14751494
#[cfg(target_os = "macos")]
14761495
sender,
1496+
#[cfg(feature = "tee")]
1497+
(
1498+
pm_sender,
1499+
pm_efd
1500+
.try_clone()
1501+
.expect("unable to clone TEE memory properties eventfd"),
1502+
),
14771503
) {
14781504
Ok(vmm) => vmm,
14791505
Err(e) => {
@@ -1482,7 +1508,7 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 {
14821508
}
14831509
};
14841510

1485-
#[cfg(target_os = "macos")]
1511+
#[cfg(any(target_os = "macos", feature = "tee"))]
14861512
let mapper_vmm = _vmm.clone();
14871513

14881514
#[cfg(target_os = "macos")]
@@ -1505,6 +1531,93 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 {
15051531
.unwrap();
15061532
}
15071533

1534+
#[cfg(feature = "tee")]
1535+
let guest_mem = _vmm.lock().unwrap().guest_memory().clone();
1536+
1537+
#[cfg(feature = "tee")]
1538+
std::thread::Builder::new()
1539+
.name("TEE memory properties worker".into())
1540+
.spawn(move || loop {
1541+
match pm_receiver.recv() {
1542+
Err(e) => error!("Error in pm receiver: {:?}", e),
1543+
Ok(m) => {
1544+
let (guest_memfd, region_start) = mapper_vmm
1545+
.lock()
1546+
.unwrap()
1547+
.kvm_vm()
1548+
.guest_memfd_get(m.gpa)
1549+
.unwrap_or_else(|| panic!("unable to find KVM guest_memfd for memory region corresponding to GPA 0x{:x}", m.gpa));
1550+
1551+
let attributes: u64 = if m.private {
1552+
KVM_MEMORY_ATTRIBUTE_PRIVATE as u64
1553+
} else {
1554+
0
1555+
};
1556+
1557+
let attr = kvm_memory_attributes {
1558+
address: m.gpa,
1559+
size: m.size,
1560+
attributes,
1561+
flags: 0,
1562+
};
1563+
1564+
mapper_vmm
1565+
.lock()
1566+
.unwrap()
1567+
.kvm_vm()
1568+
.fd()
1569+
.set_memory_attributes(attr)
1570+
.unwrap_or_else(|_| panic!("unable to set memory attributes for memory region corresponding to guest address 0x{:x}", m.gpa));
1571+
1572+
let region = guest_mem.find_region(GuestAddress(m.gpa));
1573+
if region.is_none() {
1574+
error!("{}", format!("guest memory region corresponding to GPA 0x{:x} not found", m.gpa));
1575+
pm_efd.write(1).unwrap();
1576+
continue;
1577+
}
1578+
1579+
let offset = m.gpa - region_start;
1580+
1581+
if m.private {
1582+
let region_addr = MemoryRegionAddress(offset);
1583+
1584+
let host_startaddr = region
1585+
.unwrap()
1586+
.get_host_address(region_addr)
1587+
.expect("host address corresponding to memory region address 0x{:x} not found");
1588+
1589+
let ret = unsafe {
1590+
madvise(
1591+
host_startaddr as *mut c_void,
1592+
m.size.try_into().unwrap(),
1593+
MADV_DONTNEED,
1594+
)
1595+
};
1596+
1597+
if ret < 0 {
1598+
error!("{}", format!("unable to advise kernel that memory region corresponding to GPA 0x{:x} will likely not be needed (madvise)", m.gpa));
1599+
}
1600+
} else {
1601+
let ret = unsafe {
1602+
fallocate(
1603+
guest_memfd,
1604+
FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
1605+
offset as i64,
1606+
m.size as i64,
1607+
)
1608+
};
1609+
1610+
if ret < 0 {
1611+
error!("{}", format!("unable to allocate space in guest_memfd for shared memory (fallocate)"));
1612+
}
1613+
}
1614+
1615+
pm_efd.write(1).unwrap();
1616+
}
1617+
}
1618+
})
1619+
.unwrap();
1620+
15081621
loop {
15091622
match event_manager.run() {
15101623
Ok(_) => {}

src/vmm/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ libc = ">=0.2.39"
2121
linux-loader = { version = "0.13.0", features = ["bzimage", "elf", "pe"] }
2222
log = "0.4.0"
2323
vm-memory = { version = ">=0.13", features = ["backend-mmap"] }
24+
rangemap = "1.5.1"
2425

2526
arch = { path = "../arch" }
2627
devices = { path = "../devices" }

src/vmm/src/builder.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,11 @@
44
//! Enables pre-boot setup, instantiation and booting of a Firecracker VMM.
55
66
#[cfg(target_os = "macos")]
7-
use crossbeam_channel::{unbounded, Sender};
7+
use crossbeam_channel::unbounded;
8+
9+
#[cfg(any(target_os = "macos", feature = "tee"))]
10+
use crossbeam_channel::Sender;
11+
812
use kernel::cmdline::Cmdline;
913
#[cfg(target_os = "macos")]
1014
use std::collections::HashMap;
@@ -21,6 +25,8 @@ use super::{Error, Vmm};
2125
#[cfg(target_arch = "x86_64")]
2226
use crate::device_manager::legacy::PortIODeviceManager;
2327
use crate::device_manager::mmio::MMIODeviceManager;
28+
#[cfg(feature = "tee")]
29+
use crate::linux::vstate::MemoryProperties;
2430
use crate::resources::VmResources;
2531
use crate::vmm_config::external_kernel::{ExternalKernel, KernelFormat};
2632
#[cfg(all(target_os = "linux", target_arch = "aarch64"))]
@@ -507,6 +513,7 @@ pub fn build_microvm(
507513
event_manager: &mut EventManager,
508514
_shutdown_efd: Option<EventFd>,
509515
#[cfg(target_os = "macos")] _map_sender: Sender<MemoryMapping>,
516+
#[cfg(feature = "tee")] pm_sender: (Sender<MemoryProperties>, EventFd),
510517
) -> std::result::Result<Arc<Mutex<Vmm>>, StartMicrovmError> {
511518
let payload = choose_payload(vm_resources)?;
512519

@@ -672,6 +679,8 @@ pub fn build_microvm(
672679
payload_config.entry_addr,
673680
&pio_device_manager.io_bus,
674681
&exit_evt,
682+
#[cfg(feature = "tee")]
683+
pm_sender,
675684
)
676685
.map_err(StartMicrovmError::Internal)?;
677686
}
@@ -1422,6 +1431,7 @@ fn create_vcpus_x86_64(
14221431
entry_addr: GuestAddress,
14231432
io_bus: &devices::Bus,
14241433
exit_evt: &EventFd,
1434+
#[cfg(feature = "tee")] pm_sender: (Sender<MemoryProperties>, EventFd),
14251435
) -> super::Result<Vec<Vcpu>> {
14261436
let mut vcpus = Vec::with_capacity(vcpu_config.vcpu_count as usize);
14271437
for cpu_index in 0..vcpu_config.vcpu_count {
@@ -1432,6 +1442,8 @@ fn create_vcpus_x86_64(
14321442
vm.supported_msrs().clone(),
14331443
io_bus.clone(),
14341444
exit_evt.try_clone().map_err(Error::EventFd)?,
1445+
#[cfg(feature = "tee")]
1446+
(pm_sender.0.clone(), pm_sender.1.try_clone().unwrap()),
14351447
)
14361448
.map_err(Error::Vcpu)?;
14371449

0 commit comments

Comments
 (0)