Skip to content

Commit c941efd

Browse files
committed
vmm/linux/tee: Handle KVM_EXIT_HYPERCALL exits
SEV-SNP guests use KVM_EXIT_HYPERCALL exits to signal to the hypervisor it would like some pages set to private or shared. Implements a handler that manages guest memory and can set regions to private or shared. vCPUs can send "memory properties" messages to the handler indicating: - Guest GPA - Size of memory region - Whether the region should be set to private or shared The handler will read these messages and configure the memory regions accordingly. Signed-off-by: Tyler Fanelli <[email protected]>
1 parent 0456c90 commit c941efd

File tree

6 files changed

+198
-7
lines changed

6 files changed

+198
-7
lines changed

Cargo.lock

Lines changed: 9 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/libkrun/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ vmm = { path = "../vmm" }
3131
[target.'cfg(target_os = "macos")'.dependencies]
3232
hvf = { path = "../hvf" }
3333

34+
[target.'cfg(target_os = "linux")'.dependencies]
35+
kvm-bindings = ">=0.11"
36+
vm-memory = ">=0.13"
37+
3438
[lib]
3539
name = "krun"
3640
crate-type = ["cdylib"]

src/libkrun/src/lib.rs

Lines changed: 115 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ use std::collections::hash_map::Entry;
55
use std::collections::HashMap;
66
use std::convert::TryInto;
77
use std::env;
8+
#[cfg(feature = "tee")]
9+
use std::ffi::c_void;
810
use std::ffi::CStr;
911
#[cfg(target_os = "linux")]
1012
use std::ffi::CString;
@@ -20,7 +22,7 @@ use std::sync::atomic::{AtomicI32, Ordering};
2022
use std::sync::LazyLock;
2123
use std::sync::Mutex;
2224

23-
#[cfg(target_os = "macos")]
25+
#[cfg(any(target_os = "macos", feature = "tee"))]
2426
use crossbeam_channel::unbounded;
2527
#[cfg(feature = "blk")]
2628
use devices::virtio::block::ImageType;
@@ -54,6 +56,17 @@ use vmm::vmm_config::machine_config::VmConfig;
5456
use vmm::vmm_config::net::NetworkInterfaceConfig;
5557
use vmm::vmm_config::vsock::VsockDeviceConfig;
5658

59+
#[cfg(feature = "tee")]
60+
use kvm_bindings::{kvm_memory_attributes, KVM_MEMORY_ATTRIBUTE_PRIVATE};
61+
62+
#[cfg(feature = "tee")]
63+
use vm_memory::{guest_memory::GuestMemory, GuestAddress, GuestMemoryRegion, MemoryRegionAddress};
64+
65+
#[cfg(feature = "tee")]
66+
use libc::{
67+
fallocate, madvise, EFD_SEMAPHORE, FALLOC_FL_KEEP_SIZE, FALLOC_FL_PUNCH_HOLE, MADV_DONTNEED,
68+
};
69+
5770
// Value returned on success. We use libc's errors otherwise.
5871
const KRUN_SUCCESS: i32 = 0;
5972
// Maximum number of arguments/environment variables we allow
@@ -1471,12 +1484,25 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 {
14711484
#[cfg(target_os = "macos")]
14721485
let (sender, receiver) = unbounded();
14731486

1487+
#[cfg(feature = "tee")]
1488+
let (pm_sender, pm_receiver) = unbounded();
1489+
#[cfg(feature = "tee")]
1490+
let pm_efd =
1491+
EventFd::new(EFD_SEMAPHORE).expect("unable to create TEE memory properties eventfd");
1492+
14741493
let _vmm = match vmm::builder::build_microvm(
14751494
&ctx_cfg.vmr,
14761495
&mut event_manager,
14771496
ctx_cfg.shutdown_efd,
14781497
#[cfg(target_os = "macos")]
14791498
sender,
1499+
#[cfg(feature = "tee")]
1500+
(
1501+
pm_sender,
1502+
pm_efd
1503+
.try_clone()
1504+
.expect("unable to clone TEE memory properties eventfd"),
1505+
),
14801506
) {
14811507
Ok(vmm) => vmm,
14821508
Err(e) => {
@@ -1485,7 +1511,7 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 {
14851511
}
14861512
};
14871513

1488-
#[cfg(target_os = "macos")]
1514+
#[cfg(any(target_os = "macos", feature = "tee"))]
14891515
let mapper_vmm = _vmm.clone();
14901516

14911517
#[cfg(target_os = "macos")]
@@ -1508,6 +1534,93 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 {
15081534
.unwrap();
15091535
}
15101536

1537+
#[cfg(feature = "tee")]
1538+
let guest_mem = _vmm.lock().unwrap().guest_memory().clone();
1539+
1540+
#[cfg(feature = "tee")]
1541+
std::thread::Builder::new()
1542+
.name("TEE memory properties worker".into())
1543+
.spawn(move || loop {
1544+
match pm_receiver.recv() {
1545+
Err(e) => error!("Error in pm receiver: {:?}", e),
1546+
Ok(m) => {
1547+
let (guest_memfd, region_start) = mapper_vmm
1548+
.lock()
1549+
.unwrap()
1550+
.kvm_vm()
1551+
.guest_memfd_get(m.gpa)
1552+
.unwrap_or_else(|| panic!("unable to find KVM guest_memfd for memory region corresponding to GPA 0x{:x}", m.gpa));
1553+
1554+
let attributes: u64 = if m.private {
1555+
KVM_MEMORY_ATTRIBUTE_PRIVATE as u64
1556+
} else {
1557+
0
1558+
};
1559+
1560+
let attr = kvm_memory_attributes {
1561+
address: m.gpa,
1562+
size: m.size,
1563+
attributes,
1564+
flags: 0,
1565+
};
1566+
1567+
mapper_vmm
1568+
.lock()
1569+
.unwrap()
1570+
.kvm_vm()
1571+
.fd()
1572+
.set_memory_attributes(attr)
1573+
.unwrap_or_else(|_| panic!("unable to set memory attributes for memory region corresponding to guest address 0x{:x}", m.gpa));
1574+
1575+
let region = guest_mem.find_region(GuestAddress(m.gpa));
1576+
if region.is_none() {
1577+
error!("{}", format!("guest memory region corresponding to GPA 0x{:x} not found", m.gpa));
1578+
pm_efd.write(1).unwrap();
1579+
continue;
1580+
}
1581+
1582+
let offset = m.gpa - region_start;
1583+
1584+
if m.private {
1585+
let region_addr = MemoryRegionAddress(offset);
1586+
1587+
let host_startaddr = region
1588+
.unwrap()
1589+
.get_host_address(region_addr)
1590+
.expect("host address corresponding to memory region address 0x{:x} not found");
1591+
1592+
let ret = unsafe {
1593+
madvise(
1594+
host_startaddr as *mut c_void,
1595+
m.size.try_into().unwrap(),
1596+
MADV_DONTNEED,
1597+
)
1598+
};
1599+
1600+
if ret < 0 {
1601+
error!("{}", format!("unable to advise kernel that memory region corresponding to GPA 0x{:x} will likely not be needed (madvise)", m.gpa));
1602+
}
1603+
} else {
1604+
let ret = unsafe {
1605+
fallocate(
1606+
guest_memfd,
1607+
FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
1608+
offset as i64,
1609+
m.size as i64,
1610+
)
1611+
};
1612+
1613+
if ret < 0 {
1614+
error!("{}", format!("unable to allocate space in guest_memfd for shared memory (fallocate)"));
1615+
}
1616+
}
1617+
1618+
pm_efd.write(1).unwrap();
1619+
}
1620+
}
1621+
})
1622+
.unwrap();
1623+
15111624
loop {
15121625
match event_manager.run() {
15131626
Ok(_) => {}

src/vmm/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ libc = ">=0.2.39"
2121
linux-loader = { version = "0.13.0", features = ["bzimage", "elf", "pe"] }
2222
log = "0.4.0"
2323
vm-memory = { version = ">=0.13", features = ["backend-mmap"] }
24+
rangemap = "1.5.1"
2425

2526
arch = { path = "../arch" }
2627
devices = { path = "../devices" }

src/vmm/src/builder.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,11 @@
44
//! Enables pre-boot setup, instantiation and booting of a Firecracker VMM.
55
66
#[cfg(target_os = "macos")]
7-
use crossbeam_channel::{unbounded, Sender};
7+
use crossbeam_channel::unbounded;
8+
9+
#[cfg(any(target_os = "macos", feature = "tee"))]
10+
use crossbeam_channel::Sender;
11+
812
use kernel::cmdline::Cmdline;
913
#[cfg(target_os = "macos")]
1014
use std::collections::HashMap;
@@ -21,6 +25,8 @@ use super::{Error, Vmm};
2125
#[cfg(target_arch = "x86_64")]
2226
use crate::device_manager::legacy::PortIODeviceManager;
2327
use crate::device_manager::mmio::MMIODeviceManager;
28+
#[cfg(feature = "tee")]
29+
use crate::linux::vstate::MemoryProperties;
2430
use crate::resources::VmResources;
2531
use crate::vmm_config::external_kernel::{ExternalKernel, KernelFormat};
2632
#[cfg(all(target_os = "linux", target_arch = "aarch64"))]
@@ -507,6 +513,7 @@ pub fn build_microvm(
507513
event_manager: &mut EventManager,
508514
_shutdown_efd: Option<EventFd>,
509515
#[cfg(target_os = "macos")] _map_sender: Sender<MemoryMapping>,
516+
#[cfg(feature = "tee")] pm_sender: (Sender<MemoryProperties>, EventFd),
510517
) -> std::result::Result<Arc<Mutex<Vmm>>, StartMicrovmError> {
511518
let payload = choose_payload(vm_resources)?;
512519

@@ -672,6 +679,8 @@ pub fn build_microvm(
672679
payload_config.entry_addr,
673680
&pio_device_manager.io_bus,
674681
&exit_evt,
682+
#[cfg(feature = "tee")]
683+
pm_sender,
675684
)
676685
.map_err(StartMicrovmError::Internal)?;
677686
}
@@ -1422,6 +1431,7 @@ fn create_vcpus_x86_64(
14221431
entry_addr: GuestAddress,
14231432
io_bus: &devices::Bus,
14241433
exit_evt: &EventFd,
1434+
#[cfg(feature = "tee")] pm_sender: (Sender<MemoryProperties>, EventFd),
14251435
) -> super::Result<Vec<Vcpu>> {
14261436
let mut vcpus = Vec::with_capacity(vcpu_config.vcpu_count as usize);
14271437
for cpu_index in 0..vcpu_config.vcpu_count {
@@ -1432,6 +1442,8 @@ fn create_vcpus_x86_64(
14321442
vm.supported_msrs().clone(),
14331443
io_bus.clone(),
14341444
exit_evt.try_clone().map_err(Error::EventFd)?,
1445+
#[cfg(feature = "tee")]
1446+
(pm_sender.0.clone(), pm_sender.1.try_clone().unwrap()),
14351447
)
14361448
.map_err(Error::Vcpu)?;
14371449

0 commit comments

Comments
 (0)