Skip to content

Commit 5c63541

Browse files
committed
feat(vmm): extend register_memory_regions with userfault bitmap
If configured, userfault bitmap is registered with KVM and controls whether KVM will exit to userspace on a fault of the corresponding page. We are going to allocate the bitmap in a memfd in Firecracker, set bits for all pages to request notifications for vCPU faults and send it to the UFFD handler to delegate clearing the bits as pages get populated. Since the KVM userfault patches are still in review, set_user_memory_region2 is not aware of the userfault flag and the userfault bitmap address in its input structure. Define it in Firecracker code temporarily. Signed-off-by: Nikita Kalyazin <[email protected]>
1 parent 22fb5c8 commit 5c63541

File tree

3 files changed

+108
-19
lines changed

3 files changed

+108
-19
lines changed

src/vmm/src/builder.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ pub fn build_microvm_for_boot(
260260
.map_err(StartMicrovmError::GuestMemory)?;
261261

262262
vmm.vm
263-
.register_memory_regions(guest_memory)
263+
.register_memory_regions(guest_memory, None)
264264
.map_err(VmmError::Vm)?;
265265

266266
#[cfg(target_arch = "x86_64")]
@@ -487,7 +487,7 @@ pub fn build_microvm_from_snapshot(
487487
.map_err(StartMicrovmError::Internal)?;
488488

489489
vmm.vm
490-
.register_memory_regions(guest_memory)
490+
.register_memory_regions(guest_memory, None)
491491
.map_err(VmmError::Vm)
492492
.map_err(StartMicrovmError::Internal)?;
493493
vmm.uffd = uffd;

src/vmm/src/device_manager/mmio.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -660,7 +660,7 @@ mod tests {
660660
let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]);
661661
let kvm = Kvm::new(vec![]).expect("Cannot create Kvm");
662662
let mut vm = Vm::new(&kvm, false).unwrap();
663-
vm.register_memory_regions(guest_mem).unwrap();
663+
vm.register_memory_regions(guest_mem, None).unwrap();
664664
let mut device_manager = MMIODeviceManager::new();
665665
let mut resource_allocator = ResourceAllocator::new().unwrap();
666666

@@ -691,7 +691,7 @@ mod tests {
691691
let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]);
692692
let kvm = Kvm::new(vec![]).expect("Cannot create Kvm");
693693
let mut vm = Vm::new(&kvm, false).unwrap();
694-
vm.register_memory_regions(guest_mem).unwrap();
694+
vm.register_memory_regions(guest_mem, None).unwrap();
695695
let mut device_manager = MMIODeviceManager::new();
696696
let mut resource_allocator = ResourceAllocator::new().unwrap();
697697

@@ -747,7 +747,7 @@ mod tests {
747747
let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]);
748748
let kvm = Kvm::new(vec![]).expect("Cannot create Kvm");
749749
let mut vm = Vm::new(&kvm, false).unwrap();
750-
vm.register_memory_regions(guest_mem).unwrap();
750+
vm.register_memory_regions(guest_mem, None).unwrap();
751751

752752
#[cfg(target_arch = "x86_64")]
753753
vm.setup_irqchip().unwrap();

src/vmm/src/vstate/vm.rs

Lines changed: 103 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,13 @@ use std::path::Path;
1313
use std::sync::Arc;
1414

1515
use kvm_bindings::{
16-
KVM_MEM_GUEST_MEMFD, KVM_MEM_LOG_DIRTY_PAGES, KVM_MEMORY_ATTRIBUTE_PRIVATE,
16+
KVM_MEM_GUEST_MEMFD, KVM_MEM_LOG_DIRTY_PAGES, KVM_MEMORY_ATTRIBUTE_PRIVATE, KVMIO,
1717
kvm_create_guest_memfd, kvm_memory_attributes, kvm_userspace_memory_region,
18-
kvm_userspace_memory_region2,
1918
};
2019
use kvm_ioctls::{Cap, VmFd};
2120
use vmm_sys_util::eventfd::EventFd;
21+
use vmm_sys_util::ioctl::ioctl_with_ref;
22+
use vmm_sys_util::{ioctl_ioc_nr, ioctl_iow_nr};
2223

2324
pub use crate::arch::{ArchVm as Vm, ArchVmError, VmState};
2425
use crate::arch::{VM_TYPE_FOR_SECRET_FREEDOM, host_page_size};
@@ -73,6 +74,24 @@ pub enum VmError {
7374
SetMemoryAttributes(kvm_ioctls::Error),
7475
}
7576

77+
// Upstream `kvm_userspace_memory_region2` definition does not include `userfault_bitmap` field yet.
78+
// TODO: revert to `kvm_userspace_memory_region2` from kvm-bindings
79+
#[allow(non_camel_case_types)]
80+
#[repr(C)]
81+
#[derive(Debug, Default, Copy, Clone, PartialEq)]
82+
struct kvm_userspace_memory_region2 {
83+
slot: u32,
84+
flags: u32,
85+
guest_phys_addr: u64,
86+
memory_size: u64,
87+
userspace_addr: u64,
88+
guest_memfd_offset: u64,
89+
guest_memfd: u32,
90+
pad1: u32,
91+
userfault_bitmap: u64,
92+
pad2: [u64; 13],
93+
}
94+
7695
/// Contains Vm functions that are usable across CPU architectures
7796
impl Vm {
7897
/// Create a KVM VM
@@ -181,16 +200,78 @@ impl Vm {
181200
pub fn register_memory_regions(
182201
&mut self,
183202
regions: Vec<GuestRegionMmap>,
203+
userfault_bitmap_memfd: Option<&File>,
184204
) -> Result<(), VmError> {
205+
let addr = match userfault_bitmap_memfd {
206+
Some(file) => {
207+
// SAFETY: the arguments to mmap cannot cause any memory unsafety in the rust sense
208+
let addr = unsafe {
209+
libc::mmap(
210+
std::ptr::null_mut(),
211+
usize::try_from(file.metadata().unwrap().len())
212+
.expect("userfault bitmap file size is too large"),
213+
libc::PROT_WRITE,
214+
libc::MAP_SHARED,
215+
file.as_raw_fd(),
216+
0,
217+
)
218+
};
219+
220+
if addr == libc::MAP_FAILED {
221+
panic!(
222+
"Failed to mmap userfault bitmap file: {}",
223+
std::io::Error::last_os_error()
224+
);
225+
}
226+
227+
Some(addr as u64)
228+
}
229+
None => None,
230+
};
231+
185232
for region in regions {
186-
self.register_memory_region(region)?
233+
self.register_memory_region(region, addr)?
187234
}
188235

189236
Ok(())
190237
}
191238

239+
// TODO: remove when userfault support is merged upstream
240+
fn set_user_memory_region2(
241+
&self,
242+
user_memory_region2: kvm_userspace_memory_region2,
243+
) -> Result<(), VmError> {
244+
ioctl_iow_nr!(
245+
KVM_SET_USER_MEMORY_REGION2,
246+
KVMIO,
247+
0x49,
248+
kvm_userspace_memory_region2
249+
);
250+
251+
#[allow(clippy::undocumented_unsafe_blocks)]
252+
let ret = unsafe {
253+
ioctl_with_ref(
254+
self.fd(),
255+
KVM_SET_USER_MEMORY_REGION2(),
256+
&user_memory_region2,
257+
)
258+
};
259+
if ret == 0 {
260+
Ok(())
261+
} else {
262+
Err(VmError::SetUserMemoryRegion(kvm_ioctls::Error::last()))
263+
}
264+
}
265+
192266
/// Register a new memory region to this [`Vm`].
193-
pub fn register_memory_region(&mut self, region: GuestRegionMmap) -> Result<(), VmError> {
267+
pub fn register_memory_region(
268+
&mut self,
269+
region: GuestRegionMmap,
270+
userfault_addr: Option<u64>,
271+
) -> Result<(), VmError> {
272+
// TODO: take it from kvm-bindings when merged upstream
273+
const KVM_MEM_USERFAULT: u32 = 1 << 3;
274+
194275
let next_slot = self
195276
.guest_memory()
196277
.num_regions()
@@ -218,6 +299,18 @@ impl Vm {
218299
(0, 0)
219300
};
220301

302+
let userfault_bitmap = match userfault_addr {
303+
Some(addr) => {
304+
flags |= KVM_MEM_USERFAULT;
305+
306+
let file_offset_start = region.file_offset().unwrap().start();
307+
let pages_offset = file_offset_start / (host_page_size() as u64);
308+
let bytes_offset = pages_offset / (u8::BITS as u64);
309+
addr + bytes_offset
310+
}
311+
None => 0,
312+
};
313+
221314
let memory_region = kvm_userspace_memory_region2 {
222315
slot: next_slot,
223316
guest_phys_addr: region.start_addr().raw_value(),
@@ -226,18 +319,14 @@ impl Vm {
226319
flags,
227320
guest_memfd,
228321
guest_memfd_offset,
322+
userfault_bitmap,
229323
..Default::default()
230324
};
231325

232326
let new_guest_memory = self.common.guest_memory.insert_region(Arc::new(region))?;
233327

234328
if self.fd().check_extension(Cap::UserMemory2) {
235-
// SAFETY: We are passing a valid memory region and operate on a valid KVM FD.
236-
unsafe {
237-
self.fd()
238-
.set_user_memory_region2(memory_region)
239-
.map_err(VmError::SetUserMemoryRegion)?;
240-
}
329+
self.set_user_memory_region2(memory_region)?;
241330
} else {
242331
// Something is seriously wrong if we manage to set these fields on a host that doesn't
243332
// even allow creation of guest_memfds!
@@ -417,7 +506,7 @@ pub(crate) mod tests {
417506
pub(crate) fn setup_vm_with_memory(mem_size: usize) -> (Kvm, Vm) {
418507
let (kvm, mut vm) = setup_vm();
419508
let gm = single_region_mem_raw(mem_size);
420-
vm.register_memory_regions(gm).unwrap();
509+
vm.register_memory_regions(gm, None).unwrap();
421510
(kvm, vm)
422511
}
423512

@@ -447,14 +536,14 @@ pub(crate) mod tests {
447536
// Trying to set a memory region with a size that is not a multiple of GUEST_PAGE_SIZE
448537
// will result in error.
449538
let gm = single_region_mem_raw(0x10);
450-
let res = vm.register_memory_regions(gm);
539+
let res = vm.register_memory_regions(gm, None);
451540
assert_eq!(
452541
res.unwrap_err().to_string(),
453542
"Cannot set the memory regions: Invalid argument (os error 22)"
454543
);
455544

456545
let gm = single_region_mem_raw(0x1000);
457-
let res = vm.register_memory_regions(gm);
546+
let res = vm.register_memory_regions(gm, None);
458547
res.unwrap();
459548
}
460549

@@ -489,7 +578,7 @@ pub(crate) mod tests {
489578

490579
let region = GuestRegionMmap::new(region, GuestAddress(i as u64 * 0x1000)).unwrap();
491580

492-
let res = vm.register_memory_region(region);
581+
let res = vm.register_memory_region(region, None);
493582

494583
if i >= max_nr_regions {
495584
assert!(

0 commit comments

Comments
 (0)