Skip to content

Commit fb8631f

Browse files
committed
Use guest_memfd to back non swiotlb regions
If the `secret_free` field of the memory_config is set to true in the /machine-config endpoint, back all non-swiotlb regions using guest_memfd. For our setup, this means both setting the guest_memfd[_offset] fields in kvm_user_memory_region2, as well as mmaping the guest memory and reflecting this VMA back into the memslot's userspace_addr (which is how kvm internal accesses to guest memory will work for these guest_memfd regions, such as mmio emulation on x86). Signed-off-by: Patrick Roy <[email protected]>
1 parent 63cf05a commit fb8631f

File tree

7 files changed

+129
-72
lines changed

7 files changed

+129
-72
lines changed

src/vmm/benches/memory_access.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ fn bench_single_page_fault(c: &mut Criterion, configuration: VmResources) {
99
c.bench_function("page_fault", |b| {
1010
b.iter_batched(
1111
|| {
12-
let memory = configuration.allocate_guest_memory().unwrap();
12+
let memory = configuration.allocate_guest_memory(None).unwrap();
1313
// Get a pointer to the first memory region (cannot do `.get_slice(GuestAddress(0),
1414
// 1)`, because on ARM64 guest memory does not start at physical
1515
// address 0).

src/vmm/src/builder.rs

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ use crate::vmm_config::snapshot::{MemBackendConfig, MemBackendType};
6565
use crate::vstate::kvm::Kvm;
6666
use crate::vstate::memory::Bounce;
6767
use crate::vstate::vcpu::{Vcpu, VcpuError};
68-
use crate::vstate::vm::Vm;
68+
use crate::vstate::vm::{KVM_GMEM_NO_DIRECT_MAP, Vm};
6969
use crate::{EventManager, Vmm, VmmError, device_manager};
7070

7171
/// Errors associated with starting the instance.
@@ -217,14 +217,6 @@ pub fn build_microvm_for_boot(
217217
.as_ref()
218218
.ok_or(MissingKernelConfig)?;
219219

220-
let guest_memory = vm_resources
221-
.allocate_guest_memory()
222-
.map_err(StartMicrovmError::GuestMemory)?;
223-
224-
let swiotlb = vm_resources
225-
.allocate_swiotlb_region()
226-
.map_err(StartMicrovmError::GuestMemory)?;
227-
228220
// Clone the command-line so that a failed boot doesn't pollute the original.
229221
#[allow(unused_mut)]
230222
let mut boot_cmdline = boot_config.cmdline.clone();
@@ -234,16 +226,37 @@ pub fn build_microvm_for_boot(
234226
.cpu_template
235227
.get_cpu_template()?;
236228

229+
let secret_free = vm_resources.machine_config.mem_config.secret_free;
230+
237231
let (mut vmm, mut vcpus) = create_vmm_and_vcpus(
238232
instance_info,
239233
event_manager,
240234
vm_resources.machine_config.vcpu_count,
241235
cpu_template.kvm_capabilities.clone(),
242236
)?;
243237

244-
vmm.vm
245-
.register_memory_regions(guest_memory)
246-
.map_err(VmmError::Vm)?;
238+
let guest_memfd = match secret_free {
239+
true => Some(
240+
vmm.vm
241+
.create_guest_memfd(vm_resources.memory_size(), KVM_GMEM_NO_DIRECT_MAP)
242+
.map_err(VmmError::Vm)?,
243+
),
244+
false => None,
245+
};
246+
247+
let guest_memory = vm_resources
248+
.allocate_guest_memory(guest_memfd)
249+
.map_err(StartMicrovmError::GuestMemory)?;
250+
251+
let swiotlb = vm_resources
252+
.allocate_swiotlb_region()
253+
.map_err(StartMicrovmError::GuestMemory)?;
254+
255+
for region in guest_memory {
256+
vmm.vm
257+
.register_memory_region(region, secret_free)
258+
.map_err(VmmError::Vm)?;
259+
}
247260

248261
if let Some(swiotlb) = swiotlb {
249262
vmm.vm
@@ -252,10 +265,7 @@ pub fn build_microvm_for_boot(
252265
}
253266

254267
let entry_point = load_kernel(
255-
Bounce(
256-
&boot_config.kernel_file,
257-
vm_resources.machine_config.mem_config.secret_free,
258-
),
268+
Bounce(&boot_config.kernel_file, secret_free),
259269
vmm.vm.guest_memory(),
260270
)?;
261271
let initrd = match &boot_config.initrd_file {
@@ -267,10 +277,7 @@ pub fn build_microvm_for_boot(
267277

268278
Some(InitrdConfig::from_reader(
269279
vmm.vm.guest_memory(),
270-
Bounce(
271-
initrd_file,
272-
vm_resources.machine_config.mem_config.secret_free,
273-
),
280+
Bounce(initrd_file, secret_free),
274281
u64_to_usize(size),
275282
)?)
276283
}
@@ -493,8 +500,9 @@ pub fn build_microvm_from_snapshot(
493500
guest_memory.iter().map(|r| r.len()).sum(),
494501
)?;
495502

503+
// TODO: sort out gmem support for snapshot restore
496504
vmm.vm
497-
.register_memory_regions(guest_memory)
505+
.register_memory_regions(guest_memory, false)
498506
.map_err(VmmError::Vm)
499507
.map_err(StartMicrovmError::Internal)?;
500508

src/vmm/src/device_manager/mmio.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,7 @@ mod tests {
656656
let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]);
657657
let kvm = Kvm::new(vec![]).expect("Cannot create Kvm");
658658
let mut vm = Vm::new(&kvm).unwrap();
659-
vm.register_memory_regions(guest_mem).unwrap();
659+
vm.register_memory_regions(guest_mem, false).unwrap();
660660
let mut device_manager = MMIODeviceManager::new();
661661
let mut resource_allocator = ResourceAllocator::new().unwrap();
662662

@@ -687,7 +687,7 @@ mod tests {
687687
let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]);
688688
let kvm = Kvm::new(vec![]).expect("Cannot create Kvm");
689689
let mut vm = Vm::new(&kvm).unwrap();
690-
vm.register_memory_regions(guest_mem).unwrap();
690+
vm.register_memory_regions(guest_mem, false).unwrap();
691691
let mut device_manager = MMIODeviceManager::new();
692692
let mut resource_allocator = ResourceAllocator::new().unwrap();
693693

@@ -743,7 +743,7 @@ mod tests {
743743
let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]);
744744
let kvm = Kvm::new(vec![]).expect("Cannot create Kvm");
745745
let mut vm = Vm::new(&kvm).unwrap();
746-
vm.register_memory_regions(guest_mem).unwrap();
746+
vm.register_memory_regions(guest_mem, false).unwrap();
747747

748748
#[cfg(target_arch = "x86_64")]
749749
vm.setup_irqchip().unwrap();

src/vmm/src/persist.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ pub fn restore_memory(
330330
}
331331

332332
let mem_file = File::open(path)?;
333-
memory::snapshot_file(mem_file, state.regions(), track_dirty, offset)?
333+
memory::file_private(mem_file, state.regions(), track_dirty, offset)?
334334
}
335335
None => memory::anonymous(state.regions(), track_dirty, huge_pages)?,
336336
};

src/vmm/src/resources.rs

Lines changed: 39 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// SPDX-License-Identifier: Apache-2.0
33

44
use std::convert::From;
5+
use std::fs::File;
56
use std::path::PathBuf;
67
use std::sync::{Arc, Mutex, MutexGuard};
78

@@ -30,7 +31,7 @@ use crate::vmm_config::mmds::{MmdsConfig, MmdsConfigError};
3031
use crate::vmm_config::net::*;
3132
use crate::vmm_config::vsock::*;
3233
use crate::vstate::memory;
33-
use crate::vstate::memory::{GuestRegionMmap, MemoryError};
34+
use crate::vstate::memory::{GuestRegionMmap, MemoryError, create_memfd};
3435

3536
/// Errors encountered when configuring microVM resources.
3637
#[derive(Debug, thiserror::Error, displaydoc::Display)]
@@ -521,28 +522,45 @@ impl VmResources {
521522
offset: usize,
522523
size: usize,
523524
vhost_accessible: bool,
525+
file: Option<File>,
524526
) -> Result<Vec<GuestRegionMmap>, MemoryError> {
525-
let regions = crate::arch::arch_memory_regions(offset, size);
526-
if vhost_accessible {
527-
memory::memfd_backed(
528-
regions.as_ref(),
527+
let regions = crate::arch::arch_memory_regions(offset, size).into_iter();
528+
match file {
529+
Some(file) => memory::file_shared(
530+
file,
531+
regions,
529532
self.machine_config.track_dirty_pages,
530533
self.machine_config.huge_pages,
531-
)
532-
} else {
533-
memory::anonymous(
534-
regions.into_iter(),
535-
self.machine_config.track_dirty_pages,
536-
self.machine_config.huge_pages,
537-
)
534+
),
535+
None => {
536+
if vhost_accessible {
537+
let memfd = create_memfd(size as u64, self.machine_config.huge_pages.into())?
538+
.into_file();
539+
memory::file_shared(
540+
memfd,
541+
regions,
542+
self.machine_config.track_dirty_pages,
543+
self.machine_config.huge_pages,
544+
)
545+
} else {
546+
memory::anonymous(
547+
regions.into_iter(),
548+
self.machine_config.track_dirty_pages,
549+
self.machine_config.huge_pages,
550+
)
551+
}
552+
}
538553
}
539554
}
540555

541556
/// Allocates guest memory in a configuration most appropriate for these [`VmResources`].
542557
///
543558
/// If vhost-user-blk devices are in use, allocates memfd-backed shared memory, otherwise
544559
/// prefers anonymous memory for performance reasons.
545-
pub fn allocate_guest_memory(&self) -> Result<Vec<GuestRegionMmap>, MemoryError> {
560+
pub fn allocate_guest_memory(
561+
&self,
562+
guest_memfd: Option<File>,
563+
) -> Result<Vec<GuestRegionMmap>, MemoryError> {
546564
// Page faults are more expensive for shared memory mapping, including memfd.
547565
// For this reason, we only back guest memory with a memfd
548566
// if a vhost-user-blk device is configured in the VM, otherwise we fall back to
@@ -559,6 +577,7 @@ impl VmResources {
559577
0,
560578
self.memory_size(),
561579
self.vhost_user_devices_used() && !self.swiotlb_used(),
580+
guest_memfd,
562581
)
563582
}
564583

@@ -571,8 +590,12 @@ impl VmResources {
571590
let start = self.memory_size();
572591
let start = start.max(crate::arch::offset_after_last_gap());
573592

574-
let mut mem =
575-
self.allocate_memory(start, self.swiotlb_size(), self.vhost_user_devices_used())?;
593+
let mut mem = self.allocate_memory(
594+
start,
595+
self.swiotlb_size(),
596+
self.vhost_user_devices_used(),
597+
None,
598+
)?;
576599

577600
assert_eq!(mem.len(), 1);
578601

@@ -1643,7 +1666,7 @@ mod tests {
16431666
..Default::default()
16441667
};
16451668

1646-
let normal_mem = resources.allocate_guest_memory().unwrap();
1669+
let normal_mem = resources.allocate_guest_memory(None).unwrap();
16471670
assert_eq!(
16481671
normal_mem.iter().map(|r| r.len()).sum::<usize>(),
16491672
mib_to_bytes(16)

src/vmm/src/vstate/memory.rs

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use std::os::fd::{AsFd, AsRawFd};
1212
use std::ptr::null_mut;
1313
use std::sync::Arc;
1414

15-
use kvm_bindings::{KVM_MEM_LOG_DIRTY_PAGES, kvm_userspace_memory_region2};
15+
use kvm_bindings::{KVM_MEM_GUEST_MEMFD, KVM_MEM_LOG_DIRTY_PAGES, kvm_userspace_memory_region2};
1616
use serde::{Deserialize, Serialize};
1717
pub use vm_memory::bitmap::{AtomicBitmap, BS, Bitmap, BitmapSlice};
1818
pub use vm_memory::mmap::MmapRegionBuilder;
@@ -129,14 +129,16 @@ impl KvmRegion {
129129
pub(crate) fn from_mmap_region(
130130
region: GuestRegionMmap,
131131
slot: u32,
132-
guest_memfd: Option<&FileOffset>,
132+
guest_memfd: Option<FileOffset>,
133133
) -> Self {
134134
let region = ManuallyDrop::new(region);
135-
let flags = if region.bitmap().is_some() {
136-
KVM_MEM_LOG_DIRTY_PAGES
137-
} else {
138-
0
139-
};
135+
let mut flags = 0;
136+
if region.bitmap().is_some() {
137+
flags |= KVM_MEM_LOG_DIRTY_PAGES;
138+
}
139+
if guest_memfd.is_some() {
140+
flags |= KVM_MEM_GUEST_MEMFD;
141+
}
140142

141143
#[allow(clippy::cast_sign_loss)]
142144
let (guest_memfd, guest_memfd_offset) = guest_memfd
@@ -292,18 +294,16 @@ pub fn create(
292294
}
293295

294296
/// Creates a GuestMemoryMmap with `size` in MiB backed by a memfd.
295-
pub fn memfd_backed(
296-
regions: &[(GuestAddress, usize)],
297+
pub fn file_shared(
298+
file: File,
299+
regions: impl Iterator<Item = (GuestAddress, usize)>,
297300
track_dirty_pages: bool,
298301
huge_pages: HugePageConfig,
299302
) -> Result<Vec<GuestRegionMmap>, MemoryError> {
300-
let size = regions.iter().map(|&(_, size)| size as u64).sum();
301-
let memfd_file = create_memfd(size, huge_pages.into())?.into_file();
302-
303303
create(
304-
regions.iter().copied(),
304+
regions,
305305
libc::MAP_SHARED | huge_pages.mmap_flags(),
306-
Some(memfd_file),
306+
Some(file),
307307
track_dirty_pages,
308308
0,
309309
)
@@ -326,7 +326,7 @@ pub fn anonymous(
326326

327327
/// Creates a GuestMemoryMmap given a `file` containing the data
328328
/// and a `state` containing mapping information.
329-
pub fn snapshot_file(
329+
pub fn file_private(
330330
file: File,
331331
regions: impl Iterator<Item = (GuestAddress, usize)>,
332332
track_dirty_pages: bool,
@@ -519,7 +519,8 @@ impl GuestMemoryExtension for GuestMemoryMmap {
519519
}
520520
}
521521

522-
fn create_memfd(
522+
/// Creates a memfd of the given size and huge pages configuration
523+
pub fn create_memfd(
523524
mem_size: u64,
524525
hugetlb_size: Option<memfd::HugetlbSize>,
525526
) -> Result<memfd::Memfd, MemoryError> {
@@ -772,7 +773,7 @@ mod tests {
772773
guest_memory.dump(&mut memory_file).unwrap();
773774

774775
let restored_guest_memory =
775-
kvmify(snapshot_file(memory_file, memory_state.regions(), false, 0).unwrap());
776+
kvmify(file_private(memory_file, memory_state.regions(), false, 0).unwrap());
776777

777778
// Check that the region contents are the same.
778779
let mut restored_region = vec![0u8; page_size * 2];
@@ -830,7 +831,7 @@ mod tests {
830831

831832
// We can restore from this because this is the first dirty dump.
832833
let restored_guest_memory =
833-
kvmify(snapshot_file(file, memory_state.regions(), false, 0).unwrap());
834+
kvmify(file_private(file, memory_state.regions(), false, 0).unwrap());
834835

835836
// Check that the region contents are the same.
836837
let mut restored_region = vec![0u8; region_size];

0 commit comments

Comments
 (0)