Skip to content

Commit 10fe7f0

Browse files
kalyazinroypat
authored andcommitted
feat(vmm): extend register_memory_regions with userfault bitmap
If configured, userfault bitmap is registered with KVM and controls whether KVM will exit to userspace on a fault of the corresponding page. We are going to allocate the bitmap in a memfd in Firecracker, set bits for all pages to request notifications for vCPU faults and send it to the UFFD handler to delegate clearing the bits as pages get populated. Since the KVM userfault patches are still in review, set_user_memory_region2 is not aware of the userfault flag and the userfault bitmap address in its input structure. Define it in Firecracker code temporarily. Signed-off-by: Nikita Kalyazin <[email protected]>
1 parent 906a186 commit 10fe7f0

File tree

3 files changed

+91
-20
lines changed

3 files changed

+91
-20
lines changed

src/vmm/src/builder.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ pub fn build_microvm_for_boot(
199199
.allocate_guest_memory(guest_memfd)
200200
.map_err(StartMicrovmError::GuestMemory)?;
201201

202-
vm.register_memory_regions(guest_memory)
202+
vm.register_memory_regions(guest_memory, None)
203203
.map_err(VmmError::Vm)?;
204204

205205
let mut device_manager = DeviceManager::new(event_manager, &vcpus_exit_evt, &vm)?;
@@ -471,7 +471,7 @@ pub fn build_microvm_from_snapshot(
471471
.create_vcpus(vm_resources.machine_config.vcpu_count)
472472
.map_err(StartMicrovmError::Vm)?;
473473

474-
vm.register_memory_regions(guest_memory)
474+
vm.register_memory_regions(guest_memory, None)
475475
.map_err(StartMicrovmError::Vm)?;
476476

477477
#[cfg(target_arch = "x86_64")]

src/vmm/src/device_manager/mmio.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -596,7 +596,7 @@ pub(crate) mod tests {
596596
let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]);
597597
let kvm = Kvm::new(vec![]).expect("Cannot create Kvm");
598598
let mut vm = Vm::new(&kvm, false).unwrap();
599-
vm.register_memory_regions(guest_mem).unwrap();
599+
vm.register_memory_regions(guest_mem, None).unwrap();
600600
let mut device_manager = MMIODeviceManager::new();
601601

602602
let mut cmdline = kernel_cmdline::Cmdline::new(4096).unwrap();
@@ -642,7 +642,7 @@ pub(crate) mod tests {
642642
let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]);
643643
let kvm = Kvm::new(vec![]).expect("Cannot create Kvm");
644644
let mut vm = Vm::new(&kvm, false).unwrap();
645-
vm.register_memory_regions(guest_mem).unwrap();
645+
vm.register_memory_regions(guest_mem, None).unwrap();
646646
let mut device_manager = MMIODeviceManager::new();
647647

648648
let mut cmdline = kernel_cmdline::Cmdline::new(4096).unwrap();
@@ -695,7 +695,7 @@ pub(crate) mod tests {
695695
let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]);
696696
let kvm = Kvm::new(vec![]).expect("Cannot create Kvm");
697697
let mut vm = Vm::new(&kvm, false).unwrap();
698-
vm.register_memory_regions(guest_mem).unwrap();
698+
vm.register_memory_regions(guest_mem, None).unwrap();
699699

700700
#[cfg(target_arch = "x86_64")]
701701
vm.setup_irqchip().unwrap();

src/vmm/src/vstate/vm.rs

Lines changed: 86 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ use std::sync::{Arc, Mutex, MutexGuard};
1717
use kvm_bindings::KVM_IRQCHIP_IOAPIC;
1818
use kvm_bindings::{
1919
KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI, KVM_MEM_GUEST_MEMFD, KVM_MEM_LOG_DIRTY_PAGES,
20-
KVM_MSI_VALID_DEVID, KvmIrqRouting, kvm_create_guest_memfd, kvm_irq_routing_entry,
21-
kvm_userspace_memory_region, kvm_userspace_memory_region2,
20+
KVM_MSI_VALID_DEVID, KVMIO, KvmIrqRouting, kvm_create_guest_memfd, kvm_irq_routing_entry,
21+
kvm_userspace_memory_region,
2222
};
2323
use kvm_ioctls::{Cap, VmFd};
2424
use log::{debug, error};
@@ -29,6 +29,8 @@ use vm_device::interrupt::{
2929
};
3030
use vmm_sys_util::errno;
3131
use vmm_sys_util::eventfd::EventFd;
32+
use vmm_sys_util::ioctl::ioctl_with_ref;
33+
use vmm_sys_util::ioctl_iow_nr;
3234

3335
pub use crate::arch::{ArchVm as Vm, ArchVmError, VmState};
3436
use crate::arch::{GSI_MSI_END, host_page_size};
@@ -289,6 +291,24 @@ pub enum VmError {
289291
GuestMemfdNotSupported,
290292
}
291293

294+
// Upstream `kvm_userspace_memory_region2` definition does not include `userfault_bitmap` field yet.
295+
// TODO: revert to `kvm_userspace_memory_region2` from kvm-bindings
296+
#[allow(non_camel_case_types)]
297+
#[repr(C)]
298+
#[derive(Debug, Default, Copy, Clone, PartialEq)]
299+
struct kvm_userspace_memory_region2 {
300+
slot: u32,
301+
flags: u32,
302+
guest_phys_addr: u64,
303+
memory_size: u64,
304+
userspace_addr: u64,
305+
guest_memfd_offset: u64,
306+
guest_memfd: u32,
307+
pad1: u32,
308+
userfault_bitmap: u64,
309+
pad2: [u64; 13],
310+
}
311+
292312
/// Contains Vm functions that are usable across CPU architectures
293313
impl Vm {
294314
/// Create a KVM VM
@@ -395,16 +415,61 @@ impl Vm {
395415
pub fn register_memory_regions(
396416
&mut self,
397417
regions: Vec<GuestRegionMmap>,
418+
mut userfault_bitmap: Option<&mut [u8]>,
398419
) -> Result<(), VmError> {
399420
for region in regions {
400-
self.register_memory_region(region)?
421+
let bitmap_slice = if let Some(remaining) = userfault_bitmap {
422+
let region_len = u64_to_usize(region.len());
423+
// Firecracker does not allow sub-MB granularity when allocating guest memory
424+
assert_eq!(region_len % (host_page_size() * u8::BITS as usize), 0);
425+
let bitmap_len = region_len / host_page_size() / (u8::BITS as usize);
426+
let (head, tail) = remaining.split_at_mut(bitmap_len);
427+
userfault_bitmap = Some(tail);
428+
Some(head)
429+
} else {
430+
None
431+
};
432+
self.register_memory_region(region, bitmap_slice)?
401433
}
402-
403434
Ok(())
404435
}
405436

437+
// TODO: remove when userfault support is merged upstream
438+
fn set_user_memory_region2(
439+
&self,
440+
user_memory_region2: kvm_userspace_memory_region2,
441+
) -> Result<(), VmError> {
442+
ioctl_iow_nr!(
443+
KVM_SET_USER_MEMORY_REGION2,
444+
KVMIO,
445+
0x49,
446+
kvm_userspace_memory_region2
447+
);
448+
449+
#[allow(clippy::undocumented_unsafe_blocks)]
450+
let ret = unsafe {
451+
ioctl_with_ref(
452+
self.fd(),
453+
KVM_SET_USER_MEMORY_REGION2(),
454+
&user_memory_region2,
455+
)
456+
};
457+
if ret == 0 {
458+
Ok(())
459+
} else {
460+
Err(VmError::SetUserMemoryRegion(kvm_ioctls::Error::last()))
461+
}
462+
}
463+
406464
/// Register a new memory region to this [`Vm`].
407-
pub fn register_memory_region(&mut self, region: GuestRegionMmap) -> Result<(), VmError> {
465+
pub fn register_memory_region(
466+
&mut self,
467+
region: GuestRegionMmap,
468+
userfault_bitmap: Option<&mut [u8]>,
469+
) -> Result<(), VmError> {
470+
// TODO: take it from kvm-bindings when merged upstream
471+
const KVM_MEM_USERFAULT: u32 = 1 << 3;
472+
408473
let next_slot = self
409474
.guest_memory()
410475
.num_regions()
@@ -432,6 +497,14 @@ impl Vm {
432497
(0, 0)
433498
};
434499

500+
let userfault_bitmap = match userfault_bitmap {
501+
Some(addr) => {
502+
flags |= KVM_MEM_USERFAULT;
503+
addr.as_ptr() as u64
504+
}
505+
None => 0,
506+
};
507+
435508
let memory_region = kvm_userspace_memory_region2 {
436509
slot: next_slot,
437510
guest_phys_addr: region.start_addr().raw_value(),
@@ -440,24 +513,22 @@ impl Vm {
440513
flags,
441514
guest_memfd,
442515
guest_memfd_offset,
516+
userfault_bitmap,
443517
..Default::default()
444518
};
445519

446520
let new_guest_memory = self.common.guest_memory.insert_region(Arc::new(region))?;
447521

448522
if self.fd().check_extension(Cap::UserMemory2) {
449-
// SAFETY: We are passing a valid memory region and operate on a valid KVM FD.
450-
unsafe {
451-
self.fd()
452-
.set_user_memory_region2(memory_region)
453-
.map_err(VmError::SetUserMemoryRegion)?;
454-
}
523+
self.set_user_memory_region2(memory_region)?;
455524
} else {
456525
// Something is seriously wrong if we manage to set these fields on a host that doesn't
457526
// even allow creation of guest_memfds!
458527
assert_eq!(memory_region.guest_memfd, 0);
459528
assert_eq!(memory_region.guest_memfd_offset, 0);
529+
assert_eq!(memory_region.userfault_bitmap, 0);
460530
assert_eq!(memory_region.flags & KVM_MEM_GUEST_MEMFD, 0);
531+
assert_eq!(memory_region.flags & KVM_MEM_USERFAULT, 0);
461532

462533
// SAFETY: We are passing a valid memory region and operate on a valid KVM FD.
463534
unsafe {
@@ -789,7 +860,7 @@ pub(crate) mod tests {
789860
pub(crate) fn setup_vm_with_memory(mem_size: usize) -> (Kvm, Vm) {
790861
let (kvm, mut vm) = setup_vm();
791862
let gm = single_region_mem_raw(mem_size);
792-
vm.register_memory_regions(gm).unwrap();
863+
vm.register_memory_regions(gm, None).unwrap();
793864
(kvm, vm)
794865
}
795866

@@ -819,14 +890,14 @@ pub(crate) mod tests {
819890
// Trying to set a memory region with a size that is not a multiple of GUEST_PAGE_SIZE
820891
// will result in error.
821892
let gm = single_region_mem_raw(0x10);
822-
let res = vm.register_memory_regions(gm);
893+
let res = vm.register_memory_regions(gm, None);
823894
assert_eq!(
824895
res.unwrap_err().to_string(),
825896
"Cannot set the memory regions: Invalid argument (os error 22)"
826897
);
827898

828899
let gm = single_region_mem_raw(0x1000);
829-
let res = vm.register_memory_regions(gm);
900+
let res = vm.register_memory_regions(gm, None);
830901
res.unwrap();
831902
}
832903

@@ -861,7 +932,7 @@ pub(crate) mod tests {
861932

862933
let region = GuestRegionMmap::new(region, GuestAddress(i as u64 * 0x1000)).unwrap();
863934

864-
let res = vm.register_memory_region(region);
935+
let res = vm.register_memory_region(region, None);
865936

866937
if max_nr_regions <= i {
867938
assert!(

0 commit comments

Comments
 (0)