Skip to content

Commit 8a7300e

Browse files
committed
feat(vmm): extend register_memory_regions with userfault bitmap
If configured, userfault bitmap is registered with KVM and controls whether KVM will exit to userspace on a fault of the corresponding page. We are going to allocate the bitmap in a memfd in Firecracker, set bits for all pages to request notifications for vCPU faults and send it to the UFFD handler to delegate clearing the bits as pages get populated. Since the KVM userfault patches are still in review, set_user_memory_region2 is not aware of the userfault flag and the userfault bitmap address in its input structure. Define it in Firecracker code temporarily. Signed-off-by: Nikita Kalyazin <[email protected]>
1 parent adba190 commit 8a7300e

File tree

3 files changed

+90
-20
lines changed

3 files changed

+90
-20
lines changed

src/vmm/src/builder.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ pub fn build_microvm_for_boot(
260260
.map_err(StartMicrovmError::GuestMemory)?;
261261

262262
vmm.vm
263-
.register_memory_regions(guest_memory)
263+
.register_memory_regions(guest_memory, None)
264264
.map_err(VmmError::Vm)?;
265265

266266
#[cfg(target_arch = "x86_64")]
@@ -487,7 +487,7 @@ pub fn build_microvm_from_snapshot(
487487
.map_err(StartMicrovmError::Internal)?;
488488

489489
vmm.vm
490-
.register_memory_regions(guest_memory)
490+
.register_memory_regions(guest_memory, None)
491491
.map_err(VmmError::Vm)
492492
.map_err(StartMicrovmError::Internal)?;
493493
vmm.uffd = uffd;

src/vmm/src/device_manager/mmio.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -660,7 +660,7 @@ mod tests {
660660
let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]);
661661
let kvm = Kvm::new(vec![]).expect("Cannot create Kvm");
662662
let mut vm = Vm::new(&kvm, false).unwrap();
663-
vm.register_memory_regions(guest_mem).unwrap();
663+
vm.register_memory_regions(guest_mem, None).unwrap();
664664
let mut device_manager = MMIODeviceManager::new();
665665
let mut resource_allocator = ResourceAllocator::new().unwrap();
666666

@@ -691,7 +691,7 @@ mod tests {
691691
let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]);
692692
let kvm = Kvm::new(vec![]).expect("Cannot create Kvm");
693693
let mut vm = Vm::new(&kvm, false).unwrap();
694-
vm.register_memory_regions(guest_mem).unwrap();
694+
vm.register_memory_regions(guest_mem, None).unwrap();
695695
let mut device_manager = MMIODeviceManager::new();
696696
let mut resource_allocator = ResourceAllocator::new().unwrap();
697697

@@ -747,7 +747,7 @@ mod tests {
747747
let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]);
748748
let kvm = Kvm::new(vec![]).expect("Cannot create Kvm");
749749
let mut vm = Vm::new(&kvm, false).unwrap();
750-
vm.register_memory_regions(guest_mem).unwrap();
750+
vm.register_memory_regions(guest_mem, None).unwrap();
751751

752752
#[cfg(target_arch = "x86_64")]
753753
vm.setup_irqchip().unwrap();

src/vmm/src/vstate/vm.rs

Lines changed: 85 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,13 @@ use std::path::Path;
1313
use std::sync::Arc;
1414

1515
use kvm_bindings::{
16-
KVM_MEM_GUEST_MEMFD, KVM_MEM_LOG_DIRTY_PAGES, KVM_MEMORY_ATTRIBUTE_PRIVATE,
16+
KVM_MEM_GUEST_MEMFD, KVM_MEM_LOG_DIRTY_PAGES, KVM_MEMORY_ATTRIBUTE_PRIVATE, KVMIO,
1717
kvm_create_guest_memfd, kvm_memory_attributes, kvm_userspace_memory_region,
18-
kvm_userspace_memory_region2,
1918
};
2019
use kvm_ioctls::{Cap, VmFd};
2120
use vmm_sys_util::eventfd::EventFd;
21+
use vmm_sys_util::ioctl::ioctl_with_ref;
22+
use vmm_sys_util::{ioctl_ioc_nr, ioctl_iow_nr};
2223

2324
pub use crate::arch::{ArchVm as Vm, ArchVmError, VmState};
2425
use crate::arch::{VM_TYPE_FOR_SECRET_FREEDOM, host_page_size};
@@ -73,6 +74,24 @@ pub enum VmError {
7374
SetMemoryAttributes(kvm_ioctls::Error),
7475
}
7576

77+
// Upstream `kvm_userspace_memory_region2` definition does not include `userfault_bitmap` field yet.
78+
// TODO: revert to `kvm_userspace_memory_region2` from kvm-bindings
79+
#[allow(non_camel_case_types)]
80+
#[repr(C)]
81+
#[derive(Debug, Default, Copy, Clone, PartialEq)]
82+
struct kvm_userspace_memory_region2 {
83+
slot: u32,
84+
flags: u32,
85+
guest_phys_addr: u64,
86+
memory_size: u64,
87+
userspace_addr: u64,
88+
guest_memfd_offset: u64,
89+
guest_memfd: u32,
90+
pad1: u32,
91+
userfault_bitmap: u64,
92+
pad2: [u64; 13],
93+
}
94+
7695
/// Contains Vm functions that are usable across CPU architectures
7796
impl Vm {
7897
/// Create a KVM VM
@@ -181,16 +200,61 @@ impl Vm {
181200
pub fn register_memory_regions(
182201
&mut self,
183202
regions: Vec<GuestRegionMmap>,
203+
mut userfault_bitmap: Option<&mut [u8]>,
184204
) -> Result<(), VmError> {
185205
for region in regions {
186-
self.register_memory_region(region)?
206+
let bitmap_slice = if let Some(remaining) = userfault_bitmap {
207+
let region_len = u64_to_usize(region.len());
208+
// Firecracker does not allow sub-MB granularity when allocating guest memory
209+
assert_eq!(region_len % (host_page_size() * u8::BITS as usize), 0);
210+
let bitmap_len = region_len / host_page_size() / (u8::BITS as usize);
211+
let (head, tail) = remaining.split_at_mut(bitmap_len);
212+
userfault_bitmap = Some(tail);
213+
Some(head)
214+
} else {
215+
None
216+
};
217+
self.register_memory_region(region, bitmap_slice)?
187218
}
188-
189219
Ok(())
190220
}
191221

222+
// TODO: remove when userfault support is merged upstream
223+
fn set_user_memory_region2(
224+
&self,
225+
user_memory_region2: kvm_userspace_memory_region2,
226+
) -> Result<(), VmError> {
227+
ioctl_iow_nr!(
228+
KVM_SET_USER_MEMORY_REGION2,
229+
KVMIO,
230+
0x49,
231+
kvm_userspace_memory_region2
232+
);
233+
234+
#[allow(clippy::undocumented_unsafe_blocks)]
235+
let ret = unsafe {
236+
ioctl_with_ref(
237+
self.fd(),
238+
KVM_SET_USER_MEMORY_REGION2(),
239+
&user_memory_region2,
240+
)
241+
};
242+
if ret == 0 {
243+
Ok(())
244+
} else {
245+
Err(VmError::SetUserMemoryRegion(kvm_ioctls::Error::last()))
246+
}
247+
}
248+
192249
/// Register a new memory region to this [`Vm`].
193-
pub fn register_memory_region(&mut self, region: GuestRegionMmap) -> Result<(), VmError> {
250+
pub fn register_memory_region(
251+
&mut self,
252+
region: GuestRegionMmap,
253+
userfault_bitmap: Option<&mut [u8]>,
254+
) -> Result<(), VmError> {
255+
// TODO: take it from kvm-bindings when merged upstream
256+
const KVM_MEM_USERFAULT: u32 = 1 << 3;
257+
194258
let next_slot = self
195259
.guest_memory()
196260
.num_regions()
@@ -218,6 +282,14 @@ impl Vm {
218282
(0, 0)
219283
};
220284

285+
let userfault_bitmap = match userfault_bitmap {
286+
Some(addr) => {
287+
flags |= KVM_MEM_USERFAULT;
288+
addr.as_ptr() as u64
289+
}
290+
None => 0,
291+
};
292+
221293
let memory_region = kvm_userspace_memory_region2 {
222294
slot: next_slot,
223295
guest_phys_addr: region.start_addr().raw_value(),
@@ -226,24 +298,22 @@ impl Vm {
226298
flags,
227299
guest_memfd,
228300
guest_memfd_offset,
301+
userfault_bitmap,
229302
..Default::default()
230303
};
231304

232305
let new_guest_memory = self.common.guest_memory.insert_region(Arc::new(region))?;
233306

234307
if self.fd().check_extension(Cap::UserMemory2) {
235-
// SAFETY: We are passing a valid memory region and operate on a valid KVM FD.
236-
unsafe {
237-
self.fd()
238-
.set_user_memory_region2(memory_region)
239-
.map_err(VmError::SetUserMemoryRegion)?;
240-
}
308+
self.set_user_memory_region2(memory_region)?;
241309
} else {
242310
// Something is seriously wrong if we manage to set these fields on a host that doesn't
243311
// even allow creation of guest_memfds!
244312
assert_eq!(memory_region.guest_memfd, 0);
245313
assert_eq!(memory_region.guest_memfd_offset, 0);
314+
assert_eq!(memory_region.userfault_bitmap, 0);
246315
assert_eq!(memory_region.flags & KVM_MEM_GUEST_MEMFD, 0);
316+
assert_eq!(memory_region.flags & KVM_MEM_USERFAULT, 0);
247317

248318
// SAFETY: We are passing a valid memory region and operate on a valid KVM FD.
249319
unsafe {
@@ -417,7 +487,7 @@ pub(crate) mod tests {
417487
pub(crate) fn setup_vm_with_memory(mem_size: usize) -> (Kvm, Vm) {
418488
let (kvm, mut vm) = setup_vm();
419489
let gm = single_region_mem_raw(mem_size);
420-
vm.register_memory_regions(gm).unwrap();
490+
vm.register_memory_regions(gm, None).unwrap();
421491
(kvm, vm)
422492
}
423493

@@ -447,14 +517,14 @@ pub(crate) mod tests {
447517
// Trying to set a memory region with a size that is not a multiple of GUEST_PAGE_SIZE
448518
// will result in error.
449519
let gm = single_region_mem_raw(0x10);
450-
let res = vm.register_memory_regions(gm);
520+
let res = vm.register_memory_regions(gm, None);
451521
assert_eq!(
452522
res.unwrap_err().to_string(),
453523
"Cannot set the memory regions: Invalid argument (os error 22)"
454524
);
455525

456526
let gm = single_region_mem_raw(0x1000);
457-
let res = vm.register_memory_regions(gm);
527+
let res = vm.register_memory_regions(gm, None);
458528
res.unwrap();
459529
}
460530

@@ -489,7 +559,7 @@ pub(crate) mod tests {
489559

490560
let region = GuestRegionMmap::new(region, GuestAddress(i as u64 * 0x1000)).unwrap();
491561

492-
let res = vm.register_memory_region(region);
562+
let res = vm.register_memory_region(region, None);
493563

494564
if i >= max_nr_regions {
495565
assert!(

0 commit comments

Comments
 (0)