Skip to content

Commit 73f627a

Browse files
committed
Use guest_memfd to back non swiotlb regions
If the `secret_free` field of the memory_config is set to true in the /machine-config endpoint, back all non-swiotlb regions using guest_memfd. For our setup, this means both setting the guest_memfd[_offset] fields in kvm_user_memory_region2, as well as mmaping the guest memory and reflecting this VMA back into the memslot's userspace_addr (which is how kvm internal accesses to guest memory will work for these guest_memfd regions, such as mmio emulation on x86). Signed-off-by: Patrick Roy <[email protected]>
1 parent 62309b2 commit 73f627a

File tree

7 files changed

+129
-72
lines changed

7 files changed

+129
-72
lines changed

src/vmm/benches/memory_access.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ fn bench_single_page_fault(c: &mut Criterion, configuration: VmResources) {
99
c.bench_function("page_fault", |b| {
1010
b.iter_batched(
1111
|| {
12-
let memory = configuration.allocate_guest_memory().unwrap();
12+
let memory = configuration.allocate_guest_memory(None).unwrap();
1313
// Get a pointer to the first memory region (cannot do `.get_slice(GuestAddress(0),
1414
// 1)`, because on ARM64 guest memory does not start at physical
1515
// address 0).

src/vmm/src/builder.rs

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ use crate::vmm_config::snapshot::{MemBackendConfig, MemBackendType};
6565
use crate::vstate::kvm::Kvm;
6666
use crate::vstate::memory::Bounce;
6767
use crate::vstate::vcpu::{Vcpu, VcpuError};
68-
use crate::vstate::vm::Vm;
68+
use crate::vstate::vm::{KVM_GMEM_NO_DIRECT_MAP, Vm};
6969
use crate::{EventManager, Vmm, VmmError, device_manager};
7070

7171
/// Errors associated with starting the instance.
@@ -217,14 +217,6 @@ pub fn build_microvm_for_boot(
217217
.as_ref()
218218
.ok_or(MissingKernelConfig)?;
219219

220-
let guest_memory = vm_resources
221-
.allocate_guest_memory()
222-
.map_err(StartMicrovmError::GuestMemory)?;
223-
224-
let swiotlb = vm_resources
225-
.allocate_swiotlb_region()
226-
.map_err(StartMicrovmError::GuestMemory)?;
227-
228220
// Clone the command-line so that a failed boot doesn't pollute the original.
229221
#[allow(unused_mut)]
230222
let mut boot_cmdline = boot_config.cmdline.clone();
@@ -234,16 +226,37 @@ pub fn build_microvm_for_boot(
234226
.cpu_template
235227
.get_cpu_template()?;
236228

229+
let secret_free = vm_resources.machine_config.mem_config.secret_free;
230+
237231
let (mut vmm, mut vcpus) = create_vmm_and_vcpus(
238232
instance_info,
239233
event_manager,
240234
vm_resources.machine_config.vcpu_count,
241235
cpu_template.kvm_capabilities.clone(),
242236
)?;
243237

244-
vmm.vm
245-
.register_memory_regions(guest_memory)
246-
.map_err(VmmError::Vm)?;
238+
let guest_memfd = match secret_free {
239+
true => Some(
240+
vmm.vm
241+
.create_guest_memfd(vm_resources.memory_size(), KVM_GMEM_NO_DIRECT_MAP)
242+
.map_err(VmmError::Vm)?,
243+
),
244+
false => None,
245+
};
246+
247+
let guest_memory = vm_resources
248+
.allocate_guest_memory(guest_memfd)
249+
.map_err(StartMicrovmError::GuestMemory)?;
250+
251+
let swiotlb = vm_resources
252+
.allocate_swiotlb_region()
253+
.map_err(StartMicrovmError::GuestMemory)?;
254+
255+
for region in guest_memory {
256+
vmm.vm
257+
.register_memory_region(region, secret_free)
258+
.map_err(VmmError::Vm)?;
259+
}
247260

248261
if let Some(swiotlb) = swiotlb {
249262
vmm.vm
@@ -252,10 +265,7 @@ pub fn build_microvm_for_boot(
252265
}
253266

254267
let entry_point = load_kernel(
255-
Bounce::new(
256-
&boot_config.kernel_file,
257-
vm_resources.machine_config.mem_config.secret_free,
258-
),
268+
Bounce::new(&boot_config.kernel_file, secret_free),
259269
vmm.vm.guest_memory(),
260270
)?;
261271
let initrd = match &boot_config.initrd_file {
@@ -267,10 +277,7 @@ pub fn build_microvm_for_boot(
267277

268278
Some(InitrdConfig::from_reader(
269279
vmm.vm.guest_memory(),
270-
Bounce::new(
271-
initrd_file,
272-
vm_resources.machine_config.mem_config.secret_free,
273-
),
280+
Bounce::new(initrd_file, secret_free),
274281
u64_to_usize(size),
275282
)?)
276283
}
@@ -493,8 +500,9 @@ pub fn build_microvm_from_snapshot(
493500
guest_memory.iter().map(|r| r.len()).sum(),
494501
)?;
495502

503+
// TODO: sort out gmem support for snapshot restore
496504
vmm.vm
497-
.register_memory_regions(guest_memory)
505+
.register_memory_regions(guest_memory, false)
498506
.map_err(VmmError::Vm)
499507
.map_err(StartMicrovmError::Internal)?;
500508

src/vmm/src/device_manager/mmio.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,7 @@ mod tests {
656656
let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]);
657657
let kvm = Kvm::new(vec![]).expect("Cannot create Kvm");
658658
let mut vm = Vm::new(&kvm).unwrap();
659-
vm.register_memory_regions(guest_mem).unwrap();
659+
vm.register_memory_regions(guest_mem, false).unwrap();
660660
let mut device_manager = MMIODeviceManager::new();
661661
let mut resource_allocator = ResourceAllocator::new().unwrap();
662662

@@ -687,7 +687,7 @@ mod tests {
687687
let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]);
688688
let kvm = Kvm::new(vec![]).expect("Cannot create Kvm");
689689
let mut vm = Vm::new(&kvm).unwrap();
690-
vm.register_memory_regions(guest_mem).unwrap();
690+
vm.register_memory_regions(guest_mem, false).unwrap();
691691
let mut device_manager = MMIODeviceManager::new();
692692
let mut resource_allocator = ResourceAllocator::new().unwrap();
693693

@@ -743,7 +743,7 @@ mod tests {
743743
let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]);
744744
let kvm = Kvm::new(vec![]).expect("Cannot create Kvm");
745745
let mut vm = Vm::new(&kvm).unwrap();
746-
vm.register_memory_regions(guest_mem).unwrap();
746+
vm.register_memory_regions(guest_mem, false).unwrap();
747747

748748
#[cfg(target_arch = "x86_64")]
749749
vm.setup_irqchip().unwrap();

src/vmm/src/persist.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ pub fn restore_memory(
330330
}
331331

332332
let mem_file = File::open(path)?;
333-
memory::snapshot_file(mem_file, state.regions(), track_dirty, offset)?
333+
memory::file_private(mem_file, state.regions(), track_dirty, offset)?
334334
}
335335
None => memory::anonymous(state.regions(), track_dirty, huge_pages)?,
336336
};

src/vmm/src/resources.rs

Lines changed: 39 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// SPDX-License-Identifier: Apache-2.0
33

44
use std::convert::From;
5+
use std::fs::File;
56
use std::path::PathBuf;
67
use std::sync::{Arc, Mutex, MutexGuard};
78

@@ -30,7 +31,7 @@ use crate::vmm_config::mmds::{MmdsConfig, MmdsConfigError};
3031
use crate::vmm_config::net::*;
3132
use crate::vmm_config::vsock::*;
3233
use crate::vstate::memory;
33-
use crate::vstate::memory::{GuestRegionMmap, MemoryError};
34+
use crate::vstate::memory::{GuestRegionMmap, MemoryError, create_memfd};
3435

3536
/// Errors encountered when configuring microVM resources.
3637
#[derive(Debug, thiserror::Error, displaydoc::Display)]
@@ -492,28 +493,45 @@ impl VmResources {
492493
offset: usize,
493494
size: usize,
494495
vhost_accessible: bool,
496+
file: Option<File>,
495497
) -> Result<Vec<GuestRegionMmap>, MemoryError> {
496-
let regions = crate::arch::arch_memory_regions(offset, size);
497-
if vhost_accessible {
498-
memory::memfd_backed(
499-
regions.as_ref(),
498+
let regions = crate::arch::arch_memory_regions(offset, size).into_iter();
499+
match file {
500+
Some(file) => memory::file_shared(
501+
file,
502+
regions,
500503
self.machine_config.track_dirty_pages,
501504
self.machine_config.huge_pages,
502-
)
503-
} else {
504-
memory::anonymous(
505-
regions.into_iter(),
506-
self.machine_config.track_dirty_pages,
507-
self.machine_config.huge_pages,
508-
)
505+
),
506+
None => {
507+
if vhost_accessible {
508+
let memfd = create_memfd(size as u64, self.machine_config.huge_pages.into())?
509+
.into_file();
510+
memory::file_shared(
511+
memfd,
512+
regions,
513+
self.machine_config.track_dirty_pages,
514+
self.machine_config.huge_pages,
515+
)
516+
} else {
517+
memory::anonymous(
518+
regions.into_iter(),
519+
self.machine_config.track_dirty_pages,
520+
self.machine_config.huge_pages,
521+
)
522+
}
523+
}
509524
}
510525
}
511526

512527
/// Allocates guest memory in a configuration most appropriate for these [`VmResources`].
513528
///
514529
/// If vhost-user-blk devices are in use, allocates memfd-backed shared memory, otherwise
515530
/// prefers anonymous memory for performance reasons.
516-
pub fn allocate_guest_memory(&self) -> Result<Vec<GuestRegionMmap>, MemoryError> {
531+
pub fn allocate_guest_memory(
532+
&self,
533+
guest_memfd: Option<File>,
534+
) -> Result<Vec<GuestRegionMmap>, MemoryError> {
517535
// Page faults are more expensive for shared memory mapping, including memfd.
518536
// For this reason, we only back guest memory with a memfd
519537
// if a vhost-user-blk device is configured in the VM, otherwise we fall back to
@@ -530,6 +548,7 @@ impl VmResources {
530548
0,
531549
self.memory_size(),
532550
self.vhost_user_devices_used() && !self.swiotlb_used(),
551+
guest_memfd,
533552
)
534553
}
535554

@@ -542,8 +561,12 @@ impl VmResources {
542561
let start = self.memory_size();
543562
let start = start.max(crate::arch::bytes_before_last_gap());
544563

545-
let mut mem =
546-
self.allocate_memory(start, self.swiotlb_size(), self.vhost_user_devices_used())?;
564+
let mut mem = self.allocate_memory(
565+
start,
566+
self.swiotlb_size(),
567+
self.vhost_user_devices_used(),
568+
None,
569+
)?;
547570

548571
assert_eq!(mem.len(), 1);
549572

@@ -1614,7 +1637,7 @@ mod tests {
16141637
..Default::default()
16151638
};
16161639

1617-
let normal_mem = resources.allocate_guest_memory().unwrap();
1640+
let normal_mem = resources.allocate_guest_memory(None).unwrap();
16181641
assert_eq!(
16191642
normal_mem.iter().map(|r| r.len()).sum::<usize>(),
16201643
mib_to_bytes(16)

src/vmm/src/vstate/memory.rs

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use std::os::fd::AsRawFd;
1212
use std::ptr::null_mut;
1313
use std::sync::Arc;
1414

15-
use kvm_bindings::{KVM_MEM_LOG_DIRTY_PAGES, kvm_userspace_memory_region2};
15+
use kvm_bindings::{KVM_MEM_GUEST_MEMFD, KVM_MEM_LOG_DIRTY_PAGES, kvm_userspace_memory_region2};
1616
use serde::{Deserialize, Serialize};
1717
pub use vm_memory::bitmap::{AtomicBitmap, BS, Bitmap, BitmapSlice};
1818
pub use vm_memory::mmap::MmapRegionBuilder;
@@ -138,14 +138,16 @@ impl KvmRegion {
138138
pub(crate) fn from_mmap_region(
139139
region: GuestRegionMmap,
140140
slot: u32,
141-
guest_memfd: Option<&FileOffset>,
141+
guest_memfd: Option<FileOffset>,
142142
) -> Self {
143143
let region = ManuallyDrop::new(region);
144-
let flags = if region.bitmap().is_some() {
145-
KVM_MEM_LOG_DIRTY_PAGES
146-
} else {
147-
0
148-
};
144+
let mut flags = 0;
145+
if region.bitmap().is_some() {
146+
flags |= KVM_MEM_LOG_DIRTY_PAGES;
147+
}
148+
if guest_memfd.is_some() {
149+
flags |= KVM_MEM_GUEST_MEMFD;
150+
}
149151

150152
#[allow(clippy::cast_sign_loss)]
151153
let (guest_memfd, guest_memfd_offset) = guest_memfd
@@ -301,18 +303,16 @@ pub fn create(
301303
}
302304

303305
/// Creates a GuestMemoryMmap with `size` in MiB backed by a memfd.
304-
pub fn memfd_backed(
305-
regions: &[(GuestAddress, usize)],
306+
pub fn file_shared(
307+
file: File,
308+
regions: impl Iterator<Item = (GuestAddress, usize)>,
306309
track_dirty_pages: bool,
307310
huge_pages: HugePageConfig,
308311
) -> Result<Vec<GuestRegionMmap>, MemoryError> {
309-
let size = regions.iter().map(|&(_, size)| size as u64).sum();
310-
let memfd_file = create_memfd(size, huge_pages.into())?.into_file();
311-
312312
create(
313-
regions.iter().copied(),
313+
regions,
314314
libc::MAP_SHARED | huge_pages.mmap_flags(),
315-
Some(memfd_file),
315+
Some(file),
316316
track_dirty_pages,
317317
0,
318318
)
@@ -335,7 +335,7 @@ pub fn anonymous(
335335

336336
/// Creates a GuestMemoryMmap given a `file` containing the data
337337
/// and a `state` containing mapping information.
338-
pub fn snapshot_file(
338+
pub fn file_private(
339339
file: File,
340340
regions: impl Iterator<Item = (GuestAddress, usize)>,
341341
track_dirty_pages: bool,
@@ -528,7 +528,8 @@ impl GuestMemoryExtension for GuestMemoryMmap {
528528
}
529529
}
530530

531-
fn create_memfd(
531+
/// Creates a memfd of the given size and huge pages configuration
532+
pub fn create_memfd(
532533
mem_size: u64,
533534
hugetlb_size: Option<memfd::HugetlbSize>,
534535
) -> Result<memfd::Memfd, MemoryError> {
@@ -781,7 +782,7 @@ mod tests {
781782
guest_memory.dump(&mut memory_file).unwrap();
782783

783784
let restored_guest_memory =
784-
kvmify(snapshot_file(memory_file, memory_state.regions(), false, 0).unwrap());
785+
kvmify(file_private(memory_file, memory_state.regions(), false, 0).unwrap());
785786

786787
// Check that the region contents are the same.
787788
let mut restored_region = vec![0u8; page_size * 2];
@@ -839,7 +840,7 @@ mod tests {
839840

840841
// We can restore from this because this is the first dirty dump.
841842
let restored_guest_memory =
842-
kvmify(snapshot_file(file, memory_state.regions(), false, 0).unwrap());
843+
kvmify(file_private(file, memory_state.regions(), false, 0).unwrap());
843844

844845
// Check that the region contents are the same.
845846
let mut restored_region = vec![0u8; region_size];

0 commit comments

Comments
 (0)