Skip to content

Commit c8aab80

Browse files
kalyazinroypat
authored andcommitted
feat(vmm): extend register_memory_regions with userfault bitmap
If configured, userfault bitmap is registered with KVM and controls whether KVM will exit to userspace on a fault of the corresponding page. We are going to allocate the bitmap in a memfd in Firecracker, set bits for all pages to request notifications for vCPU faults and send it to the UFFD handler to delegate clearing the bits as pages get populated. Since the KVM userfault patches are still in review, set_user_memory_region2 is not aware of the userfault flag and the userfault bitmap address in its input structure. Define it in Firecracker code temporarily. Signed-off-by: Nikita Kalyazin <[email protected]>
1 parent b54fd29 commit c8aab80

File tree

3 files changed

+91
-20
lines changed

3 files changed

+91
-20
lines changed

src/vmm/src/builder.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ pub fn build_microvm_for_boot(
199199
.allocate_guest_memory(guest_memfd)
200200
.map_err(StartMicrovmError::GuestMemory)?;
201201

202-
vm.register_memory_regions(guest_memory)
202+
vm.register_memory_regions(guest_memory, None)
203203
.map_err(VmmError::Vm)?;
204204

205205
let mut device_manager = DeviceManager::new(
@@ -480,7 +480,7 @@ pub fn build_microvm_from_snapshot(
480480
.create_vcpus(vm_resources.machine_config.vcpu_count)
481481
.map_err(StartMicrovmError::Vm)?;
482482

483-
vm.register_memory_regions(guest_memory)
483+
vm.register_memory_regions(guest_memory, None)
484484
.map_err(StartMicrovmError::Vm)?;
485485

486486
#[cfg(target_arch = "x86_64")]

src/vmm/src/device_manager/mmio.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -594,7 +594,7 @@ pub(crate) mod tests {
594594
let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]);
595595
let kvm = Kvm::new(vec![]).expect("Cannot create Kvm");
596596
let mut vm = Vm::new(&kvm, false).unwrap();
597-
vm.register_memory_regions(guest_mem).unwrap();
597+
vm.register_memory_regions(guest_mem, None).unwrap();
598598
let mut device_manager = MMIODeviceManager::new();
599599

600600
let mut cmdline = kernel_cmdline::Cmdline::new(4096).unwrap();
@@ -640,7 +640,7 @@ pub(crate) mod tests {
640640
let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]);
641641
let kvm = Kvm::new(vec![]).expect("Cannot create Kvm");
642642
let mut vm = Vm::new(&kvm, false).unwrap();
643-
vm.register_memory_regions(guest_mem).unwrap();
643+
vm.register_memory_regions(guest_mem, None).unwrap();
644644
let mut device_manager = MMIODeviceManager::new();
645645

646646
let mut cmdline = kernel_cmdline::Cmdline::new(4096).unwrap();
@@ -693,7 +693,7 @@ pub(crate) mod tests {
693693
let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]);
694694
let kvm = Kvm::new(vec![]).expect("Cannot create Kvm");
695695
let mut vm = Vm::new(&kvm, false).unwrap();
696-
vm.register_memory_regions(guest_mem).unwrap();
696+
vm.register_memory_regions(guest_mem, None).unwrap();
697697

698698
#[cfg(target_arch = "x86_64")]
699699
vm.setup_irqchip().unwrap();

src/vmm/src/vstate/vm.rs

Lines changed: 86 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ use std::sync::{Arc, Mutex, MutexGuard};
1717
use kvm_bindings::KVM_IRQCHIP_IOAPIC;
1818
use kvm_bindings::{
1919
KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI, KVM_MEM_GUEST_MEMFD, KVM_MEM_LOG_DIRTY_PAGES,
20-
KVM_MSI_VALID_DEVID, KvmIrqRouting, kvm_create_guest_memfd, kvm_irq_routing_entry,
21-
kvm_userspace_memory_region, kvm_userspace_memory_region2,
20+
KVM_MSI_VALID_DEVID, KVMIO, KvmIrqRouting, kvm_create_guest_memfd, kvm_irq_routing_entry,
21+
kvm_userspace_memory_region,
2222
};
2323
use kvm_ioctls::{Cap, VmFd};
2424
use log::{debug, error};
@@ -29,6 +29,8 @@ use vm_device::interrupt::{
2929
};
3030
use vmm_sys_util::errno;
3131
use vmm_sys_util::eventfd::EventFd;
32+
use vmm_sys_util::ioctl::ioctl_with_ref;
33+
use vmm_sys_util::ioctl_iow_nr;
3234

3335
pub use crate::arch::{ArchVm as Vm, ArchVmError, VmState};
3436
use crate::arch::{GSI_MSI_END, host_page_size};
@@ -289,6 +291,24 @@ pub enum VmError {
289291
GuestMemfdNotSupported,
290292
}
291293

294+
// Upstream `kvm_userspace_memory_region2` definition does not include `userfault_bitmap` field yet.
295+
// TODO: revert to `kvm_userspace_memory_region2` from kvm-bindings
296+
#[allow(non_camel_case_types)]
297+
#[repr(C)]
298+
#[derive(Debug, Default, Copy, Clone, PartialEq)]
299+
struct kvm_userspace_memory_region2 {
300+
slot: u32,
301+
flags: u32,
302+
guest_phys_addr: u64,
303+
memory_size: u64,
304+
userspace_addr: u64,
305+
guest_memfd_offset: u64,
306+
guest_memfd: u32,
307+
pad1: u32,
308+
userfault_bitmap: u64,
309+
pad2: [u64; 13],
310+
}
311+
292312
/// Contains Vm functions that are usable across CPU architectures
293313
impl Vm {
294314
/// Create a KVM VM
@@ -395,16 +415,61 @@ impl Vm {
395415
pub fn register_memory_regions(
396416
&mut self,
397417
regions: Vec<GuestRegionMmap>,
418+
mut userfault_bitmap: Option<&mut [u8]>,
398419
) -> Result<(), VmError> {
399420
for region in regions {
400-
self.register_memory_region(region)?
421+
let bitmap_slice = if let Some(remaining) = userfault_bitmap {
422+
let region_len = u64_to_usize(region.len());
423+
// Firecracker does not allow sub-MB granularity when allocating guest memory
424+
assert_eq!(region_len % (host_page_size() * u8::BITS as usize), 0);
425+
let bitmap_len = region_len / host_page_size() / (u8::BITS as usize);
426+
let (head, tail) = remaining.split_at_mut(bitmap_len);
427+
userfault_bitmap = Some(tail);
428+
Some(head)
429+
} else {
430+
None
431+
};
432+
self.register_memory_region(region, bitmap_slice)?
401433
}
402-
403434
Ok(())
404435
}
405436

437+
// TODO: remove when userfault support is merged upstream
438+
fn set_user_memory_region2(
439+
&self,
440+
user_memory_region2: kvm_userspace_memory_region2,
441+
) -> Result<(), VmError> {
442+
ioctl_iow_nr!(
443+
KVM_SET_USER_MEMORY_REGION2,
444+
KVMIO,
445+
0x49,
446+
kvm_userspace_memory_region2
447+
);
448+
449+
#[allow(clippy::undocumented_unsafe_blocks)]
450+
let ret = unsafe {
451+
ioctl_with_ref(
452+
self.fd(),
453+
KVM_SET_USER_MEMORY_REGION2(),
454+
&user_memory_region2,
455+
)
456+
};
457+
if ret == 0 {
458+
Ok(())
459+
} else {
460+
Err(VmError::SetUserMemoryRegion(kvm_ioctls::Error::last()))
461+
}
462+
}
463+
406464
/// Register a new memory region to this [`Vm`].
407-
pub fn register_memory_region(&mut self, region: GuestRegionMmap) -> Result<(), VmError> {
465+
pub fn register_memory_region(
466+
&mut self,
467+
region: GuestRegionMmap,
468+
userfault_bitmap: Option<&mut [u8]>,
469+
) -> Result<(), VmError> {
470+
// TODO: take it from kvm-bindings when merged upstream
471+
const KVM_MEM_USERFAULT: u32 = 1 << 3;
472+
408473
let next_slot = self
409474
.guest_memory()
410475
.num_regions()
@@ -432,6 +497,14 @@ impl Vm {
432497
(0, 0)
433498
};
434499

500+
let userfault_bitmap = match userfault_bitmap {
501+
Some(addr) => {
502+
flags |= KVM_MEM_USERFAULT;
503+
addr.as_ptr() as u64
504+
}
505+
None => 0,
506+
};
507+
435508
let memory_region = kvm_userspace_memory_region2 {
436509
slot: next_slot,
437510
guest_phys_addr: region.start_addr().raw_value(),
@@ -440,24 +513,22 @@ impl Vm {
440513
flags,
441514
guest_memfd,
442515
guest_memfd_offset,
516+
userfault_bitmap,
443517
..Default::default()
444518
};
445519

446520
let new_guest_memory = self.common.guest_memory.insert_region(Arc::new(region))?;
447521

448522
if self.fd().check_extension(Cap::UserMemory2) {
449-
// SAFETY: We are passing a valid memory region and operate on a valid KVM FD.
450-
unsafe {
451-
self.fd()
452-
.set_user_memory_region2(memory_region)
453-
.map_err(VmError::SetUserMemoryRegion)?;
454-
}
523+
self.set_user_memory_region2(memory_region)?;
455524
} else {
456525
// Something is seriously wrong if we manage to set these fields on a host that doesn't
457526
// even allow creation of guest_memfds!
458527
assert_eq!(memory_region.guest_memfd, 0);
459528
assert_eq!(memory_region.guest_memfd_offset, 0);
529+
assert_eq!(memory_region.userfault_bitmap, 0);
460530
assert_eq!(memory_region.flags & KVM_MEM_GUEST_MEMFD, 0);
531+
assert_eq!(memory_region.flags & KVM_MEM_USERFAULT, 0);
461532

462533
// SAFETY: We are passing a valid memory region and operate on a valid KVM FD.
463534
unsafe {
@@ -789,7 +860,7 @@ pub(crate) mod tests {
789860
pub(crate) fn setup_vm_with_memory(mem_size: usize) -> (Kvm, Vm) {
790861
let (kvm, mut vm) = setup_vm();
791862
let gm = single_region_mem_raw(mem_size);
792-
vm.register_memory_regions(gm).unwrap();
863+
vm.register_memory_regions(gm, None).unwrap();
793864
(kvm, vm)
794865
}
795866

@@ -819,14 +890,14 @@ pub(crate) mod tests {
819890
// Trying to set a memory region with a size that is not a multiple of GUEST_PAGE_SIZE
820891
// will result in error.
821892
let gm = single_region_mem_raw(0x10);
822-
let res = vm.register_memory_regions(gm);
893+
let res = vm.register_memory_regions(gm, None);
823894
assert_eq!(
824895
res.unwrap_err().to_string(),
825896
"Cannot set the memory regions: Invalid argument (os error 22)"
826897
);
827898

828899
let gm = single_region_mem_raw(0x1000);
829-
let res = vm.register_memory_regions(gm);
900+
let res = vm.register_memory_regions(gm, None);
830901
res.unwrap();
831902
}
832903

@@ -861,7 +932,7 @@ pub(crate) mod tests {
861932

862933
let region = GuestRegionMmap::new(region, GuestAddress(i as u64 * 0x1000)).unwrap();
863934

864-
let res = vm.register_memory_region(region);
935+
let res = vm.register_memory_region(region, None);
865936

866937
if max_nr_regions <= i {
867938
assert!(

0 commit comments

Comments
 (0)