Skip to content

Commit d257d0e

Browse files
committed
Use guest_memfd to back non swiotlb regions
If the `secret_free` field of the memory_config is set to true in the /machine-config endpoint, back all non-swiotlb regions using guest_memfd. For our setup, this means both setting the guest_memfd[_offset] fields in kvm_user_memory_region2, as well as mmaping the guest memory and reflecting this VMA back into the memslot's userspace_addr (which is how kvm internal accesses to guest memory will work for these guest_memfd regions, such as mmio emulation on x86). Signed-off-by: Patrick Roy <[email protected]>
1 parent 76a4479 commit d257d0e

File tree

7 files changed

+125
-70
lines changed

7 files changed

+125
-70
lines changed

src/vmm/benches/memory_access.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ fn bench_single_page_fault(c: &mut Criterion, configuration: VmResources) {
99
c.bench_function("page_fault", |b| {
1010
b.iter_batched(
1111
|| {
12-
let memory = configuration.allocate_guest_memory().unwrap();
12+
let memory = configuration.allocate_guest_memory(None).unwrap();
1313
// Get a pointer to the first memory region (cannot do `.get_slice(GuestAddress(0),
1414
// 1)`, because on ARM64 guest memory does not start at physical
1515
// address 0).

src/vmm/src/builder.rs

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ use crate::vmm_config::snapshot::{MemBackendConfig, MemBackendType};
6565
use crate::vstate::kvm::Kvm;
6666
use crate::vstate::memory::Bounce;
6767
use crate::vstate::vcpu::{Vcpu, VcpuError};
68-
use crate::vstate::vm::Vm;
68+
use crate::vstate::vm::{KVM_GMEM_NO_DIRECT_MAP, Vm};
6969
use crate::{EventManager, Vmm, VmmError, device_manager};
7070

7171
/// Errors associated with starting the instance.
@@ -217,14 +217,6 @@ pub fn build_microvm_for_boot(
217217
.as_ref()
218218
.ok_or(MissingKernelConfig)?;
219219

220-
let guest_memory = vm_resources
221-
.allocate_guest_memory()
222-
.map_err(StartMicrovmError::GuestMemory)?;
223-
224-
let swiotlb = vm_resources
225-
.allocate_swiotlb_region()
226-
.map_err(StartMicrovmError::GuestMemory)?;
227-
228220
// Clone the command-line so that a failed boot doesn't pollute the original.
229221
#[allow(unused_mut)]
230222
let mut boot_cmdline = boot_config.cmdline.clone();
@@ -234,15 +226,34 @@ pub fn build_microvm_for_boot(
234226
.cpu_template
235227
.get_cpu_template()?;
236228

229+
let secret_free = vm_resources.machine_config.mem_config.secret_free;
230+
237231
let (mut vmm, mut vcpus) = create_vmm_and_vcpus(
238232
instance_info,
239233
event_manager,
240234
vm_resources.machine_config.vcpu_count,
241235
cpu_template.kvm_capabilities.clone(),
242236
)?;
243237

238+
let guest_memfd = match secret_free {
239+
true => Some(
240+
vmm.vm
241+
.create_guest_memfd(vm_resources.memory_size(), KVM_GMEM_NO_DIRECT_MAP)
242+
.map_err(VmmError::Vm)?,
243+
),
244+
false => None,
245+
};
246+
247+
let guest_memory = vm_resources
248+
.allocate_guest_memory(guest_memfd)
249+
.map_err(StartMicrovmError::GuestMemory)?;
250+
251+
let swiotlb = vm_resources
252+
.allocate_swiotlb_region()
253+
.map_err(StartMicrovmError::GuestMemory)?;
254+
244255
vmm.vm
245-
.register_memory_regions(guest_memory)
256+
.register_memory_regions(guest_memory, secret_free)
246257
.map_err(VmmError::Vm)?;
247258

248259
if let Some(swiotlb) = swiotlb {
@@ -252,10 +263,7 @@ pub fn build_microvm_for_boot(
252263
}
253264

254265
let entry_point = load_kernel(
255-
Bounce(
256-
&boot_config.kernel_file,
257-
vm_resources.machine_config.mem_config.secret_free,
258-
),
266+
Bounce(&boot_config.kernel_file, secret_free),
259267
vmm.vm.guest_memory(),
260268
)?;
261269
let initrd = match &boot_config.initrd_file {
@@ -267,10 +275,7 @@ pub fn build_microvm_for_boot(
267275

268276
Some(InitrdConfig::from_reader(
269277
vmm.vm.guest_memory(),
270-
Bounce(
271-
initrd_file,
272-
vm_resources.machine_config.mem_config.secret_free,
273-
),
278+
Bounce(initrd_file, secret_free),
274279
u64_to_usize(size),
275280
)?)
276281
}
@@ -493,8 +498,9 @@ pub fn build_microvm_from_snapshot(
493498
guest_memory.iter().map(|r| r.len()).sum(),
494499
)?;
495500

501+
// TODO: sort out gmem support for snapshot restore
496502
vmm.vm
497-
.register_memory_regions(guest_memory)
503+
.register_memory_regions(guest_memory, false)
498504
.map_err(VmmError::Vm)
499505
.map_err(StartMicrovmError::Internal)?;
500506

src/vmm/src/device_manager/mmio.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,7 @@ mod tests {
656656
let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]);
657657
let kvm = Kvm::new(vec![]).expect("Cannot create Kvm");
658658
let mut vm = Vm::new(&kvm).unwrap();
659-
vm.register_memory_regions(guest_mem).unwrap();
659+
vm.register_memory_regions(guest_mem, false).unwrap();
660660
let mut device_manager = MMIODeviceManager::new();
661661
let mut resource_allocator = ResourceAllocator::new().unwrap();
662662

@@ -687,7 +687,7 @@ mod tests {
687687
let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]);
688688
let kvm = Kvm::new(vec![]).expect("Cannot create Kvm");
689689
let mut vm = Vm::new(&kvm).unwrap();
690-
vm.register_memory_regions(guest_mem).unwrap();
690+
vm.register_memory_regions(guest_mem, false).unwrap();
691691
let mut device_manager = MMIODeviceManager::new();
692692
let mut resource_allocator = ResourceAllocator::new().unwrap();
693693

@@ -743,7 +743,7 @@ mod tests {
743743
let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]);
744744
let kvm = Kvm::new(vec![]).expect("Cannot create Kvm");
745745
let mut vm = Vm::new(&kvm).unwrap();
746-
vm.register_memory_regions(guest_mem).unwrap();
746+
vm.register_memory_regions(guest_mem, false).unwrap();
747747

748748
#[cfg(target_arch = "x86_64")]
749749
vm.setup_irqchip().unwrap();

src/vmm/src/persist.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ pub fn restore_memory(
330330
}
331331

332332
let mem_file = File::open(path)?;
333-
memory::snapshot_file(mem_file, state.regions(), track_dirty, offset)?
333+
memory::file_private(mem_file, state.regions(), track_dirty, offset)?
334334
}
335335
None => memory::anonymous(state.regions(), track_dirty, huge_pages)?,
336336
};

src/vmm/src/resources.rs

Lines changed: 39 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// SPDX-License-Identifier: Apache-2.0
33

44
use std::convert::From;
5+
use std::fs::File;
56
use std::path::PathBuf;
67
use std::sync::{Arc, Mutex, MutexGuard};
78

@@ -30,7 +31,7 @@ use crate::vmm_config::mmds::{MmdsConfig, MmdsConfigError};
3031
use crate::vmm_config::net::*;
3132
use crate::vmm_config::vsock::*;
3233
use crate::vstate::memory;
33-
use crate::vstate::memory::{GuestRegionMmap, MemoryError};
34+
use crate::vstate::memory::{GuestRegionMmap, MemoryError, create_memfd};
3435

3536
/// Errors encountered when configuring microVM resources.
3637
#[derive(Debug, thiserror::Error, displaydoc::Display)]
@@ -532,28 +533,45 @@ impl VmResources {
532533
offset: usize,
533534
size: usize,
534535
vhost_accessible: bool,
536+
file: Option<File>,
535537
) -> Result<Vec<GuestRegionMmap>, MemoryError> {
536-
let regions = crate::arch::arch_memory_regions(offset, size);
537-
if vhost_accessible {
538-
memory::memfd_backed(
539-
regions.as_ref(),
538+
let regions = crate::arch::arch_memory_regions(offset, size).into_iter();
539+
match file {
540+
Some(file) => memory::file_shared(
541+
file,
542+
regions,
540543
self.machine_config.track_dirty_pages,
541544
self.machine_config.huge_pages,
542-
)
543-
} else {
544-
memory::anonymous(
545-
regions.into_iter(),
546-
self.machine_config.track_dirty_pages,
547-
self.machine_config.huge_pages,
548-
)
545+
),
546+
None => {
547+
if vhost_accessible {
548+
let memfd = create_memfd(size as u64, self.machine_config.huge_pages.into())?
549+
.into_file();
550+
memory::file_shared(
551+
memfd,
552+
regions,
553+
self.machine_config.track_dirty_pages,
554+
self.machine_config.huge_pages,
555+
)
556+
} else {
557+
memory::anonymous(
558+
regions.into_iter(),
559+
self.machine_config.track_dirty_pages,
560+
self.machine_config.huge_pages,
561+
)
562+
}
563+
}
549564
}
550565
}
551566

552567
/// Allocates guest memory in a configuration most appropriate for these [`VmResources`].
553568
///
554569
/// If vhost-user-blk devices are in use, allocates memfd-backed shared memory, otherwise
555570
/// prefers anonymous memory for performance reasons.
556-
pub fn allocate_guest_memory(&self) -> Result<Vec<GuestRegionMmap>, MemoryError> {
571+
pub fn allocate_guest_memory(
572+
&self,
573+
guest_memfd: Option<File>,
574+
) -> Result<Vec<GuestRegionMmap>, MemoryError> {
557575
// Page faults are more expensive for shared memory mapping, including memfd.
558576
// For this reason, we only back guest memory with a memfd
559577
// if a vhost-user-blk device is configured in the VM, otherwise we fall back to
@@ -570,6 +588,7 @@ impl VmResources {
570588
0,
571589
self.memory_size(),
572590
self.vhost_user_devices_used() && !self.swiotlb_used(),
591+
guest_memfd,
573592
)
574593
}
575594

@@ -582,8 +601,12 @@ impl VmResources {
582601
let start = self.memory_size();
583602
let start = start.max(crate::arch::offset_after_last_gap());
584603

585-
let mut mem =
586-
self.allocate_memory(start, self.swiotlb_size(), self.vhost_user_devices_used())?;
604+
let mut mem = self.allocate_memory(
605+
start,
606+
self.swiotlb_size(),
607+
self.vhost_user_devices_used(),
608+
None,
609+
)?;
587610

588611
assert_eq!(mem.len(), 1);
589612

@@ -1654,7 +1677,7 @@ mod tests {
16541677
..Default::default()
16551678
};
16561679

1657-
let normal_mem = resources.allocate_guest_memory().unwrap();
1680+
let normal_mem = resources.allocate_guest_memory(None).unwrap();
16581681
assert_eq!(
16591682
normal_mem.iter().map(|r| r.len()).sum::<usize>(),
16601683
mib_to_bytes(16)

src/vmm/src/vstate/memory.rs

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use std::os::fd::{AsFd, AsRawFd};
1212
use std::ptr::null_mut;
1313
use std::sync::Arc;
1414

15-
use kvm_bindings::{KVM_MEM_LOG_DIRTY_PAGES, kvm_userspace_memory_region2};
15+
use kvm_bindings::{KVM_MEM_GUEST_MEMFD, KVM_MEM_LOG_DIRTY_PAGES, kvm_userspace_memory_region2};
1616
use serde::{Deserialize, Serialize};
1717
pub use vm_memory::bitmap::{AtomicBitmap, BS, Bitmap, BitmapSlice};
1818
pub use vm_memory::mmap::MmapRegionBuilder;
@@ -129,14 +129,16 @@ impl KvmRegion {
129129
pub(crate) fn from_mmap_region(
130130
region: GuestRegionMmap,
131131
slot: u32,
132-
guest_memfd: Option<&FileOffset>,
132+
guest_memfd: Option<FileOffset>,
133133
) -> Self {
134134
let region = ManuallyDrop::new(region);
135-
let flags = if region.bitmap().is_some() {
136-
KVM_MEM_LOG_DIRTY_PAGES
137-
} else {
138-
0
139-
};
135+
let mut flags = 0;
136+
if region.bitmap().is_some() {
137+
flags |= KVM_MEM_LOG_DIRTY_PAGES;
138+
}
139+
if guest_memfd.is_some() {
140+
flags |= KVM_MEM_GUEST_MEMFD;
141+
}
140142

141143
#[allow(clippy::cast_sign_loss)]
142144
let (guest_memfd, guest_memfd_offset) = guest_memfd
@@ -292,18 +294,16 @@ pub fn create(
292294
}
293295

294296
/// Creates a GuestMemoryMmap with `size` in MiB backed by a memfd.
295-
pub fn memfd_backed(
296-
regions: &[(GuestAddress, usize)],
297+
pub fn file_shared(
298+
file: File,
299+
regions: impl Iterator<Item = (GuestAddress, usize)>,
297300
track_dirty_pages: bool,
298301
huge_pages: HugePageConfig,
299302
) -> Result<Vec<GuestRegionMmap>, MemoryError> {
300-
let size = regions.iter().map(|&(_, size)| size as u64).sum();
301-
let memfd_file = create_memfd(size, huge_pages.into())?.into_file();
302-
303303
create(
304-
regions.iter().copied(),
304+
regions,
305305
libc::MAP_SHARED | huge_pages.mmap_flags(),
306-
Some(memfd_file),
306+
Some(file),
307307
track_dirty_pages,
308308
0,
309309
)
@@ -326,7 +326,7 @@ pub fn anonymous(
326326

327327
/// Creates a GuestMemoryMmap given a `file` containing the data
328328
/// and a `state` containing mapping information.
329-
pub fn snapshot_file(
329+
pub fn file_private(
330330
file: File,
331331
regions: impl Iterator<Item = (GuestAddress, usize)>,
332332
track_dirty_pages: bool,
@@ -519,7 +519,8 @@ impl GuestMemoryExtension for GuestMemoryMmap {
519519
}
520520
}
521521

522-
fn create_memfd(
522+
/// Creates a memfd of the given size and huge pages configuration
523+
pub fn create_memfd(
523524
mem_size: u64,
524525
hugetlb_size: Option<memfd::HugetlbSize>,
525526
) -> Result<memfd::Memfd, MemoryError> {
@@ -772,7 +773,7 @@ mod tests {
772773
guest_memory.dump(&mut memory_file).unwrap();
773774

774775
let restored_guest_memory =
775-
kvmify(snapshot_file(memory_file, memory_state.regions(), false, 0).unwrap());
776+
kvmify(file_private(memory_file, memory_state.regions(), false, 0).unwrap());
776777

777778
// Check that the region contents are the same.
778779
let mut restored_region = vec![0u8; page_size * 2];
@@ -830,7 +831,7 @@ mod tests {
830831

831832
// We can restore from this because this is the first dirty dump.
832833
let restored_guest_memory =
833-
kvmify(snapshot_file(file, memory_state.regions(), false, 0).unwrap());
834+
kvmify(file_private(file, memory_state.regions(), false, 0).unwrap());
834835

835836
// Check that the region contents are the same.
836837
let mut restored_region = vec![0u8; region_size];

0 commit comments

Comments
 (0)