Skip to content

Commit 187a1f0

Browse files
committed
Use guest_memfd to back non swiotlb regions
If the `secret_free` field of the memory_config is set to true in the /machine-config endpoint, back all non-swiotlb regions using guest_memfd. For our setup, this means both setting the guest_memfd[_offset] fields in kvm_user_memory_region2, as well as mmaping the guest memory and reflecting this VMA back into the memslot's userspace_addr (which is how kvm internal accesses to guest memory will work for these guest_memfd regions, such as mmio emulation on x86). Signed-off-by: Patrick Roy <[email protected]>
1 parent b763666 commit 187a1f0

File tree

7 files changed

+122
-66
lines changed

7 files changed

+122
-66
lines changed

src/vmm/benches/memory_access.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ fn bench_single_page_fault(c: &mut Criterion, configuration: VmResources) {
99
c.bench_function("page_fault", |b| {
1010
b.iter_batched(
1111
|| {
12-
let memory = configuration.allocate_guest_memory().unwrap();
12+
let memory = configuration.allocate_guest_memory(None).unwrap();
1313
// Get a pointer to the first memory region (cannot do `.get_slice(GuestAddress(0),
1414
// 1)`, because on ARM64 guest memory does not start at physical
1515
// address 0).

src/vmm/src/builder.rs

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -217,14 +217,6 @@ pub fn build_microvm_for_boot(
217217
.as_ref()
218218
.ok_or(MissingKernelConfig)?;
219219

220-
let guest_memory = vm_resources
221-
.allocate_guest_memory()
222-
.map_err(StartMicrovmError::GuestMemory)?;
223-
224-
let swiotlb = vm_resources
225-
.allocate_swiotlb_region()
226-
.map_err(StartMicrovmError::GuestMemory)?;
227-
228220
// Clone the command-line so that a failed boot doesn't pollute the original.
229221
#[allow(unused_mut)]
230222
let mut boot_cmdline = boot_config.cmdline.clone();
@@ -234,16 +226,35 @@ pub fn build_microvm_for_boot(
234226
.cpu_template
235227
.get_cpu_template()?;
236228

229+
let secret_free = vm_resources.machine_config.mem_config.secret_free;
230+
237231
let (mut vmm, mut vcpus) = create_vmm_and_vcpus(
238232
instance_info,
239233
event_manager,
240234
vm_resources.machine_config.vcpu_count,
241235
cpu_template.kvm_capabilities.clone(),
242236
)?;
243237

238+
let guest_memfd = match secret_free {
239+
true => Some(
240+
vmm.vm
241+
.create_guest_memfd(vm_resources.memory_size())
242+
.map_err(VmmError::Vm)?,
243+
),
244+
false => None,
245+
};
246+
247+
let guest_memory = vm_resources
248+
.allocate_guest_memory(guest_memfd)
249+
.map_err(StartMicrovmError::GuestMemory)?;
250+
251+
let swiotlb = vm_resources
252+
.allocate_swiotlb_region()
253+
.map_err(StartMicrovmError::GuestMemory)?;
254+
244255
for region in guest_memory {
245256
vmm.vm
246-
.register_memory_region(region)
257+
.register_memory_region(region, secret_free)
247258
.map_err(VmmError::Vm)?;
248259
}
249260

@@ -254,10 +265,7 @@ pub fn build_microvm_for_boot(
254265
}
255266

256267
let entry_point = load_kernel(
257-
Bounce::new(
258-
&boot_config.kernel_file,
259-
vm_resources.machine_config.mem_config.secret_free,
260-
),
268+
Bounce::new(&boot_config.kernel_file, secret_free),
261269
vmm.vm.guest_memory(),
262270
)?;
263271
let initrd = match &boot_config.initrd_file {
@@ -269,10 +277,7 @@ pub fn build_microvm_for_boot(
269277

270278
Some(InitrdConfig::from_reader(
271279
vmm.vm.guest_memory(),
272-
Bounce::new(
273-
initrd_file,
274-
vm_resources.machine_config.mem_config.secret_free,
275-
),
280+
Bounce::new(initrd_file, secret_free),
276281
u64_to_usize(size),
277282
)?)
278283
}
@@ -495,8 +500,9 @@ pub fn build_microvm_from_snapshot(
495500
guest_memory.iter().map(|r| r.len()).sum(),
496501
)?;
497502

503+
// TODO: sort out gmem support for snapshot restore
498504
vmm.vm
499-
.register_memory_regions(guest_memory)
505+
.register_memory_regions(guest_memory, false)
500506
.map_err(VmmError::Vm)
501507
.map_err(StartMicrovmError::Internal)?;
502508

src/vmm/src/device_manager/mmio.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,7 @@ mod tests {
656656
let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]);
657657
let kvm = Kvm::new(vec![]).expect("Cannot create Kvm");
658658
let mut vm = Vm::new(&kvm).unwrap();
659-
vm.register_memory_regions(guest_mem).unwrap();
659+
vm.register_memory_regions(guest_mem, false).unwrap();
660660
let mut device_manager = MMIODeviceManager::new();
661661
let mut resource_allocator = ResourceAllocator::new().unwrap();
662662

@@ -687,7 +687,7 @@ mod tests {
687687
let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]);
688688
let kvm = Kvm::new(vec![]).expect("Cannot create Kvm");
689689
let mut vm = Vm::new(&kvm).unwrap();
690-
vm.register_memory_regions(guest_mem).unwrap();
690+
vm.register_memory_regions(guest_mem, false).unwrap();
691691
let mut device_manager = MMIODeviceManager::new();
692692
let mut resource_allocator = ResourceAllocator::new().unwrap();
693693

@@ -743,7 +743,7 @@ mod tests {
743743
let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]);
744744
let kvm = Kvm::new(vec![]).expect("Cannot create Kvm");
745745
let mut vm = Vm::new(&kvm).unwrap();
746-
vm.register_memory_regions(guest_mem).unwrap();
746+
vm.register_memory_regions(guest_mem, false).unwrap();
747747

748748
#[cfg(target_arch = "x86_64")]
749749
vm.setup_irqchip().unwrap();

src/vmm/src/persist.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ pub fn restore_memory(
329329
}
330330

331331
let mem_file = File::open(path)?;
332-
memory::snapshot_file(mem_file, state.regions(), track_dirty, offset)?
332+
memory::file_private(mem_file, state.regions(), track_dirty, offset)?
333333
}
334334
None => memory::anonymous(state.regions(), track_dirty, huge_pages)?,
335335
};

src/vmm/src/resources.rs

Lines changed: 41 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// SPDX-License-Identifier: Apache-2.0
33

44
use std::convert::From;
5+
use std::fs::File;
56
use std::path::PathBuf;
67
use std::sync::{Arc, Mutex, MutexGuard};
78

@@ -30,7 +31,7 @@ use crate::vmm_config::mmds::{MmdsConfig, MmdsConfigError};
3031
use crate::vmm_config::net::*;
3132
use crate::vmm_config::vsock::*;
3233
use crate::vstate::memory;
33-
use crate::vstate::memory::{GuestRegionMmap, MemoryError};
34+
use crate::vstate::memory::{GuestRegionMmap, MemoryError, create_memfd};
3435

3536
/// Errors encountered when configuring microVM resources.
3637
#[derive(Debug, thiserror::Error, displaydoc::Display)]
@@ -476,6 +477,12 @@ impl VmResources {
476477
0
477478
}
478479

480+
/// Gets the size of the "traditional" memory region, e.g. total memory excluding the swiotlb
481+
/// region.
482+
pub fn memory_size(&self) -> usize {
483+
self.machine_config.mem_size_mib - self.swiotlb_size_mib()
484+
}
485+
479486
/// Whether the use of swiotlb was requested
480487
pub fn swiotlb_used(&self) -> bool {
481488
self.swiotlb_size_mib() > 0
@@ -486,28 +493,45 @@ impl VmResources {
486493
offset: usize,
487494
size: usize,
488495
vhost_accessible: bool,
496+
file: Option<File>,
489497
) -> Result<Vec<GuestRegionMmap>, MemoryError> {
490-
let regions = crate::arch::arch_memory_regions(offset, size);
491-
if vhost_accessible {
492-
memory::memfd_backed(
493-
regions.as_ref(),
494-
self.machine_config.track_dirty_pages,
495-
self.machine_config.huge_pages,
496-
)
497-
} else {
498-
memory::anonymous(
499-
regions.into_iter(),
498+
let regions = crate::arch::arch_memory_regions(offset, size).into_iter();
499+
match file {
500+
Some(file) => memory::file_shared(
501+
file,
502+
regions,
500503
self.machine_config.track_dirty_pages,
501504
self.machine_config.huge_pages,
502-
)
505+
),
506+
None => {
507+
if vhost_accessible {
508+
let memfd = create_memfd(size as u64, self.machine_config.huge_pages.into())?
509+
.into_file();
510+
memory::file_shared(
511+
memfd,
512+
regions,
513+
self.machine_config.track_dirty_pages,
514+
self.machine_config.huge_pages,
515+
)
516+
} else {
517+
memory::anonymous(
518+
regions.into_iter(),
519+
self.machine_config.track_dirty_pages,
520+
self.machine_config.huge_pages,
521+
)
522+
}
523+
}
503524
}
504525
}
505526

506527
/// Allocates guest memory in a configuration most appropriate for these [`VmResources`].
507528
///
508529
/// If vhost-user-blk devices are in use, allocates memfd-backed shared memory, otherwise
509530
/// prefers anonymous memory for performance reasons.
510-
pub fn allocate_guest_memory(&self) -> Result<Vec<GuestRegionMmap>, MemoryError> {
531+
pub fn allocate_guest_memory(
532+
&self,
533+
guest_memfd: Option<File>,
534+
) -> Result<Vec<GuestRegionMmap>, MemoryError> {
511535
// Page faults are more expensive for shared memory mapping, including memfd.
512536
// For this reason, we only back guest memory with a memfd
513537
// if a vhost-user-blk device is configured in the VM, otherwise we fall back to
@@ -522,8 +546,9 @@ impl VmResources {
522546
// that would not be worth the effort.
523547
self.allocate_memory(
524548
0,
525-
mib_to_bytes(self.machine_config.mem_size_mib - self.swiotlb_size_mib()),
549+
mib_to_bytes(self.memory_size()),
526550
self.vhost_user_devices_used() && !self.swiotlb_used(),
551+
guest_memfd,
527552
)
528553
}
529554

@@ -537,7 +562,8 @@ impl VmResources {
537562
let start = mib_to_bytes(self.machine_config.mem_size_mib) - swiotlb_size;
538563
let start = start.max(crate::arch::bytes_before_last_gap());
539564

540-
let mut mem = self.allocate_memory(start, swiotlb_size, self.vhost_user_devices_used())?;
565+
let mut mem =
566+
self.allocate_memory(start, swiotlb_size, self.vhost_user_devices_used(), None)?;
541567

542568
assert_eq!(mem.len(), 1);
543569

src/vmm/src/vstate/memory.rs

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use std::os::fd::AsRawFd;
1212
use std::ptr::null_mut;
1313
use std::sync::Arc;
1414

15-
use kvm_bindings::{KVM_MEM_LOG_DIRTY_PAGES, kvm_userspace_memory_region2};
15+
use kvm_bindings::{KVM_MEM_GUEST_MEMFD, KVM_MEM_LOG_DIRTY_PAGES, kvm_userspace_memory_region2};
1616
use serde::{Deserialize, Serialize};
1717
pub use vm_memory::bitmap::{AtomicBitmap, BS, Bitmap, BitmapSlice};
1818
pub use vm_memory::mmap::MmapRegionBuilder;
@@ -139,14 +139,16 @@ impl KvmRegion {
139139
pub(crate) fn from_mmap_region(
140140
region: GuestRegionMmap,
141141
slot: u32,
142-
guest_memfd: Option<&FileOffset>,
142+
guest_memfd: Option<FileOffset>,
143143
) -> Self {
144144
let region = ManuallyDrop::new(region);
145-
let flags = if region.bitmap().is_some() {
146-
KVM_MEM_LOG_DIRTY_PAGES
147-
} else {
148-
0
149-
};
145+
let mut flags = 0;
146+
if region.bitmap().is_some() {
147+
flags |= KVM_MEM_LOG_DIRTY_PAGES;
148+
}
149+
if guest_memfd.is_some() {
150+
flags |= KVM_MEM_GUEST_MEMFD;
151+
}
150152

151153
#[allow(clippy::cast_sign_loss)]
152154
let (guest_memfd, guest_memfd_offset) = guest_memfd
@@ -304,18 +306,16 @@ pub fn create(
304306
}
305307

306308
/// Creates a GuestMemoryMmap with `size` in MiB backed by a memfd.
307-
pub fn memfd_backed(
308-
regions: &[(GuestAddress, usize)],
309+
pub fn file_shared(
310+
file: File,
311+
regions: impl Iterator<Item = (GuestAddress, usize)>,
309312
track_dirty_pages: bool,
310313
huge_pages: HugePageConfig,
311314
) -> Result<Vec<GuestRegionMmap>, MemoryError> {
312-
let size = regions.iter().map(|&(_, size)| size as u64).sum();
313-
let memfd_file = create_memfd(size, huge_pages.into())?.into_file();
314-
315315
create(
316-
regions.iter().copied(),
316+
regions,
317317
libc::MAP_SHARED | huge_pages.mmap_flags(),
318-
Some(memfd_file),
318+
Some(file),
319319
track_dirty_pages,
320320
0,
321321
)
@@ -338,7 +338,7 @@ pub fn anonymous(
338338

339339
/// Creates a GuestMemoryMmap given a `file` containing the data
340340
/// and a `state` containing mapping information.
341-
pub fn snapshot_file(
341+
pub fn file_private(
342342
file: File,
343343
regions: impl Iterator<Item = (GuestAddress, usize)>,
344344
track_dirty_pages: bool,
@@ -529,7 +529,8 @@ impl GuestMemoryExtension for GuestMemoryMmap {
529529
}
530530
}
531531

532-
fn create_memfd(
532+
/// Creates a memfd of the given size and huge pages configuration
533+
pub fn create_memfd(
533534
mem_size: u64,
534535
hugetlb_size: Option<memfd::HugetlbSize>,
535536
) -> Result<memfd::Memfd, MemoryError> {
@@ -782,7 +783,7 @@ mod tests {
782783
guest_memory.dump(&mut memory_file).unwrap();
783784

784785
let restored_guest_memory =
785-
kvmify(snapshot_file(memory_file, memory_state.regions(), false, 0).unwrap());
786+
kvmify(file_private(memory_file, memory_state.regions(), false, 0).unwrap());
786787

787788
// Check that the region contents are the same.
788789
let mut restored_region = vec![0u8; page_size * 2];
@@ -841,7 +842,7 @@ mod tests {
841842

842843
// We can restore from this because this is the first dirty dump.
843844
let restored_guest_memory =
844-
kvmify(snapshot_file(file, memory_state.regions(), false, 0).unwrap());
845+
kvmify(file_private(file, memory_state.regions(), false, 0).unwrap());
845846

846847
// Check that the region contents are the same.
847848
let mut restored_region = vec![0u8; region_size];

0 commit comments

Comments
 (0)