Skip to content

Commit 5c3f283

Browse files
committed
Add support for KVM_EXIT_MEMORY_FAULT
The KVM_EXIT_MEMORY_FAULT vmexit is triggered when guest wants to switch a region of memory from private to shared and viceversa. To support this when tee is enabled, add an extra thread named sender_io that gets the parameters from the vcpu thread and triggers the set_memory_properties(). The vcpu fd is owned only by this thread. Signed-off-by: Matias Ezequiel Vara Larsen <[email protected]>
1 parent 7dedbf8 commit 5c3f283

File tree

3 files changed

+159
-7
lines changed

3 files changed

+159
-7
lines changed

src/libkrun/src/lib.rs

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
#[macro_use]
22
extern crate log;
33

4+
#[cfg(feature = "tee")]
5+
use crossbeam_channel::unbounded;
6+
#[cfg(feature = "tee")]
7+
use kvm_bindings::kvm_memory_attributes;
8+
#[cfg(feature = "tee")]
9+
use libc::{fallocate, MADV_DONTNEED, madvise, FALLOC_FL_KEEP_SIZE, FALLOC_FL_PUNCH_HOLE};
410
use std::collections::hash_map::Entry;
511
use std::collections::HashMap;
612
use std::convert::TryInto;
@@ -11,10 +17,14 @@ use std::ffi::CString;
1117
#[cfg(target_os = "linux")]
1218
use std::os::fd::AsRawFd;
1319
use std::os::fd::RawFd;
20+
#[cfg(feature = "tee")]
21+
use std::os::raw::c_void;
1422
use std::path::PathBuf;
1523
use std::slice;
1624
use std::sync::atomic::{AtomicI32, Ordering};
1725
use std::sync::Mutex;
26+
#[cfg(feature = "tee")]
27+
use vm_memory::{Address, GuestAddress, GuestMemory, GuestMemoryRegion, MemoryRegionAddress};
1828

1929
#[cfg(target_os = "macos")]
2030
use crossbeam_channel::unbounded;
@@ -1225,12 +1235,17 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 {
12251235
#[cfg(target_os = "macos")]
12261236
let (sender, receiver) = unbounded();
12271237

1238+
#[cfg(feature = "tee")]
1239+
let (io_sender, receiver) = unbounded();
1240+
12281241
let _vmm = match vmm::builder::build_microvm(
12291242
&ctx_cfg.vmr,
12301243
&mut event_manager,
12311244
ctx_cfg.shutdown_efd,
12321245
#[cfg(target_os = "macos")]
12331246
sender,
1247+
#[cfg(feature = "tee")]
1248+
io_sender,
12341249
) {
12351250
Ok(vmm) => vmm,
12361251
Err(e) => {
@@ -1242,6 +1257,83 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 {
12421257
#[cfg(target_os = "macos")]
12431258
let mapper_vmm = _vmm.clone();
12441259

1260+
#[cfg(feature = "tee")]
1261+
let vm = _vmm.lock().unwrap().kvm_vm().fd.clone();
1262+
#[cfg(feature = "tee")]
1263+
let guest_mem = _vmm.lock().unwrap().guest_memory().clone();
1264+
#[cfg(feature = "tee")]
1265+
let guest_memfd = _vmm.lock().unwrap().guest_memfd_vec.clone();
1266+
1267+
#[cfg(feature = "tee")]
1268+
std::thread::spawn(move || loop {
1269+
match receiver.recv() {
1270+
Err(e) => error!("Error in receiver: {:?}", e),
1271+
Ok(m) => {
1272+
let _ret = vm
1273+
.lock()
1274+
.unwrap()
1275+
.set_memory_attributes(kvm_memory_attributes {
1276+
address: m.addr,
1277+
size: m.size,
1278+
attributes: m.attributes as u64,
1279+
flags: 0,
1280+
});
1281+
1282+
let region = guest_mem.find_region(GuestAddress(m.addr));
1283+
1284+
if let None = region {
1285+
error!("Region not found");
1286+
continue;
1287+
}
1288+
1289+
let offset = m.addr - region.unwrap().start_addr().raw_value();
1290+
1291+
let mut guest_memfd_index = 0;
1292+
1293+
// loop through the regions to get the index of the guestmemfd fd
1294+
// in the future, the fd may be stored in the GuestRegionMmap
1295+
for (index, region) in guest_mem.iter().enumerate() {
1296+
if (region.start_addr().raw_value() + region.size() as u64) > m.addr {
1297+
break;
1298+
}
1299+
guest_memfd_index = index;
1300+
}
1301+
1302+
// from private to shared
1303+
if m.attributes == 0 {
1304+
let ret = unsafe {
1305+
fallocate(
1306+
*guest_memfd.get(guest_memfd_index).unwrap(),
1307+
FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
1308+
offset as i64,
1309+
m.size as i64,
1310+
)
1311+
};
1312+
if ret < 0 {
1313+
error!("fallocate has failed");
1314+
}
1315+
// from shared to private
1316+
} else {
1317+
let host_startaddr = region
1318+
.unwrap()
1319+
.get_host_address(MemoryRegionAddress(m.addr + offset));
1320+
1321+
let ret = unsafe {
1322+
madvise(
1323+
host_startaddr.unwrap() as *mut c_void,
1324+
m.size.try_into().unwrap(),
1325+
MADV_DONTNEED,
1326+
)
1327+
};
1328+
1329+
if ret < 0 {
1330+
error!("madvise has failed");
1331+
}
1332+
}
1333+
}
1334+
}
1335+
});
1336+
12451337
#[cfg(target_os = "macos")]
12461338
std::thread::Builder::new()
12471339
.name("mapping worker".into())

src/vmm/src/builder.rs

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,12 @@
33

44
//! Enables pre-boot setup, instantiation and booting of a Firecracker VMM.
55
6+
#[cfg(feature = "tee")]
7+
use crate::vstate::MemProperties;
68
#[cfg(target_os = "macos")]
7-
use crossbeam_channel::{unbounded, Sender};
9+
use crossbeam_channel::unbounded;
10+
#[cfg(any(target_os = "macos", feature = "tee"))]
11+
use crossbeam_channel::Sender;
812
use std::fmt::{Display, Formatter};
913
use std::fs::File;
1014
use std::io;
@@ -354,6 +358,7 @@ pub fn build_microvm(
354358
event_manager: &mut EventManager,
355359
_shutdown_efd: Option<EventFd>,
356360
#[cfg(target_os = "macos")] _map_sender: Sender<MemoryMapping>,
361+
#[cfg(feature = "tee")] io_sender: Sender<MemProperties>,
357362
) -> std::result::Result<Arc<Mutex<Vmm>>, StartMicrovmError> {
358363
#[cfg(not(feature = "efi"))]
359364
let kernel_bundle = vm_resources
@@ -556,6 +561,7 @@ pub fn build_microvm(
556561
boot_ip,
557562
&pio_device_manager.io_bus,
558563
&exit_evt,
564+
// TODO: missing the io_sender
559565
)
560566
.map_err(StartMicrovmError::Internal)?;
561567
}
@@ -572,6 +578,8 @@ pub fn build_microvm(
572578
&guest_memory,
573579
GuestAddress(kernel_bundle.guest_addr),
574580
&exit_evt,
581+
#[cfg(feature = "tee")]
582+
io_sender,
575583
)
576584
.map_err(StartMicrovmError::Internal)?;
577585

@@ -1105,13 +1113,16 @@ fn create_vcpus_aarch64(
11051113
guest_mem: &GuestMemoryMmap,
11061114
entry_addr: GuestAddress,
11071115
exit_evt: &EventFd,
1116+
#[cfg(feature = "tee")] sender_io: Sender<MemProperties>,
11081117
) -> super::Result<Vec<Vcpu>> {
11091118
let mut vcpus = Vec::with_capacity(vcpu_config.vcpu_count as usize);
11101119
for cpu_index in 0..vcpu_config.vcpu_count {
11111120
let mut vcpu = Vcpu::new_aarch64(
11121121
cpu_index,
11131122
&vm.fd.lock().unwrap(),
11141123
exit_evt.try_clone().map_err(Error::EventFd)?,
1124+
#[cfg(feature = "tee")]
1125+
sender_io.clone(),
11151126
)
11161127
.map_err(Error::Vcpu)?;
11171128

src/vmm/src/linux/vstate.rs

Lines changed: 55 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ use std::io;
1414
#[cfg(feature = "tee")]
1515
use std::os::unix::io::RawFd;
1616

17+
#[cfg(feature = "tee")]
18+
use kvm_ioctls::VcpuExit::Unsupported;
19+
1720
use std::sync::Arc;
1821
use std::sync::Mutex;
1922

@@ -51,7 +54,7 @@ use kvm_bindings::{
5154
#[cfg(feature = "tee")]
5255
use kvm_bindings::{
5356
kvm_create_guest_memfd, kvm_memory_attributes, kvm_userspace_memory_region2, KVM_API_VERSION,
54-
KVM_MEMORY_ATTRIBUTE_PRIVATE, KVM_MEM_GUEST_MEMFD,
57+
KVM_MEMORY_ATTRIBUTE_PRIVATE, KVM_MEMORY_EXIT_FLAG_PRIVATE, KVM_MEM_GUEST_MEMFD, KVM_EXIT_MEMORY_FAULT
5558
};
5659
#[cfg(not(feature = "tee"))]
5760
use kvm_bindings::{kvm_userspace_memory_region, KVM_API_VERSION};
@@ -835,6 +838,13 @@ pub struct VcpuConfig {
835838
pub cpu_template: Option<CpuFeaturesTemplate>,
836839
}
837840

841+
#[cfg(feature = "tee")]
842+
pub struct MemProperties {
843+
pub addr: u64,
844+
pub size: u64,
845+
pub attributes: u32,
846+
}
847+
838848
// Using this for easier explicit type-casting to help IDEs interpret the code.
839849
type VcpuCell = Cell<Option<*mut Vcpu>>;
840850

@@ -857,6 +867,10 @@ pub struct Vcpu {
857867
#[cfg(target_arch = "aarch64")]
858868
mpidr: u64,
859869

870+
// The transmitting end of the events channel which will be given to the vcpu side
871+
#[cfg(feature = "tee")]
872+
sender_io: Sender<MemProperties>,
873+
860874
// The receiving end of events channel owned by the vcpu side.
861875
event_receiver: Receiver<VcpuEvent>,
862876
// The transmitting end of the events channel which will be given to the handler.
@@ -998,7 +1012,12 @@ impl Vcpu {
9981012
/// * `exit_evt` - An `EventFd` that will be written into when this vcpu exits.
9991013
/// * `create_ts` - A timestamp used by the vcpu to calculate its lifetime.
10001014
#[cfg(target_arch = "aarch64")]
1001-
pub fn new_aarch64(id: u8, vm_fd: &VmFd, exit_evt: EventFd) -> Result<Self> {
1015+
pub fn new_aarch64(
1016+
id: u8,
1017+
vm_fd: &VmFd,
1018+
exit_evt: EventFd,
1019+
#[cfg(feature = "tee")] sender_io: Sender<MemProperties>,
1020+
) -> Result<Self> {
10021021
let kvm_vcpu = vm_fd.create_vcpu(id as u64).map_err(Error::VcpuFd)?;
10031022
let (event_sender, event_receiver) = unbounded();
10041023
let (response_sender, response_receiver) = unbounded();
@@ -1013,6 +1032,8 @@ impl Vcpu {
10131032
event_sender: Some(event_sender),
10141033
response_receiver: Some(response_receiver),
10151034
response_sender,
1035+
#[cfg(feature = "tee")]
1036+
sender_io,
10161037
})
10171038
}
10181039

@@ -1299,16 +1320,44 @@ impl Vcpu {
12991320
info!("Received KVM_EXIT_SHUTDOWN signal");
13001321
Ok(VcpuEmulation::Stopped)
13011322
}
1323+
#[cfg(feature = "tee")]
1324+
VcpuExit::MemoryFault { flags, gpa, size } => {
1325+
if flags & !KVM_MEMORY_EXIT_FLAG_PRIVATE as u64 != 0 {
1326+
error!("KVM_EXIT_MEMORY_FAULT: Unknown flag {}", flags);
1327+
Err(Error::VcpuUnhandledKvmExit)
1328+
} else {
1329+
// from private to shared
1330+
let mut attr = 0;
1331+
// from shared to private
1332+
if flags & KVM_MEMORY_EXIT_FLAG_PRIVATE as u64
1333+
== KVM_MEMORY_EXIT_FLAG_PRIVATE as u64
1334+
{
1335+
attr = KVM_MEMORY_ATTRIBUTE_PRIVATE;
1336+
};
1337+
1338+
let _ = self.sender_io.try_send(MemProperties {
1339+
addr: gpa,
1340+
size,
1341+
attributes: attr,
1342+
});
1343+
Ok(VcpuEmulation::Handled)
1344+
}
1345+
}
1346+
// Documentation specifices that when KVM exists with KVM_EXIT_MEMORY_FAULT,
1347+
// userspace should assume kvm_run.exit_reason is stale/undefined for error numbers
1348+
// different than EFAULT or EHWPOISON
1349+
#[cfg(feature = "tee")]
1350+
Unsupported(KVM_EXIT_MEMORY_FAULT) => Ok(VcpuEmulation::Handled),
1351+
VcpuExit::InternalError => {
1352+
error!("Received KVM_EXIT_INTERNAL_ERROR signal");
1353+
Err(Error::VcpuUnhandledKvmExit)
1354+
}
13021355
// Documentation specifies that below kvm exits are considered
13031356
// errors.
13041357
VcpuExit::FailEntry(reason, vcpu) => {
13051358
error!("Received KVM_EXIT_FAIL_ENTRY signal: reason={reason}, vcpu={vcpu}");
13061359
Err(Error::VcpuUnhandledKvmExit)
13071360
}
1308-
VcpuExit::InternalError => {
1309-
error!("Received KVM_EXIT_INTERNAL_ERROR signal");
1310-
Err(Error::VcpuUnhandledKvmExit)
1311-
}
13121361
r => {
13131362
// TODO: Are we sure we want to finish running a vcpu upon
13141363
// receiving a vm exit that is not necessarily an error?

0 commit comments

Comments
 (0)