Skip to content

Commit c0b9ac8

Browse files
committed
Use guest_memfd to back memory if secret freedom is enabled
If the `secret_free` field of the memory_config is set to true in the /machine-config endpoint, back all memory regions using guest_memfd. For our setup, this means both setting the guest_memfd[_offset] fields in kvm_user_memory_region2, as well as mmaping the guest memory and reflecting this VMA back into the memslot's userspace_addr (which is how kvm internal accesses to guest memory will work for these guest_memfd regions, such as mmio emulation on x86). Signed-off-by: Patrick Roy <[email protected]>
1 parent 24ad6a5 commit c0b9ac8

File tree

6 files changed

+91
-49
lines changed

6 files changed

+91
-49
lines changed

src/vmm/benches/memory_access.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ fn bench_single_page_fault(c: &mut Criterion, configuration: VmResources) {
99
c.bench_function("page_fault", |b| {
1010
b.iter_batched(
1111
|| {
12-
let memory = configuration.allocate_guest_memory().unwrap();
12+
let memory = configuration.allocate_guest_memory(None).unwrap();
1313
// Get a pointer to the first memory region (cannot do `.get_slice(GuestAddress(0),
1414
// 1)`, because on ARM64 guest memory does not start at physical
1515
// address 0).

src/vmm/src/builder.rs

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ use crate::vmm_config::machine_config::MachineConfigError;
6262
use crate::vstate::kvm::Kvm;
6363
use crate::vstate::memory::{GuestRegionMmap, MaybeBounce};
6464
use crate::vstate::vcpu::{Vcpu, VcpuError};
65-
use crate::vstate::vm::Vm;
65+
use crate::vstate::vm::{KVM_GMEM_NO_DIRECT_MAP, Vm};
6666
use crate::{EventManager, Vmm, VmmError, device_manager};
6767

6868
/// Errors associated with starting the instance.
@@ -217,10 +217,6 @@ pub fn build_microvm_for_boot(
217217
.as_ref()
218218
.ok_or(MissingKernelConfig)?;
219219

220-
let guest_memory = vm_resources
221-
.allocate_guest_memory()
222-
.map_err(StartMicrovmError::GuestMemory)?;
223-
224220
// Clone the command-line so that a failed boot doesn't pollute the original.
225221
#[allow(unused_mut)]
226222
let mut boot_cmdline = boot_config.cmdline.clone();
@@ -230,6 +226,8 @@ pub fn build_microvm_for_boot(
230226
.cpu_template
231227
.get_cpu_template()?;
232228

229+
let secret_free = vm_resources.machine_config.secret_free;
230+
233231
let (mut vmm, mut vcpus) = create_vmm_and_vcpus(
234232
instance_info,
235233
event_manager,
@@ -238,15 +236,25 @@ pub fn build_microvm_for_boot(
238236
vm_resources.machine_config.secret_free,
239237
)?;
240238

239+
let guest_memfd = match secret_free {
240+
true => Some(
241+
vmm.vm
242+
.create_guest_memfd(vm_resources.memory_size(), KVM_GMEM_NO_DIRECT_MAP)
243+
.map_err(VmmError::Vm)?,
244+
),
245+
false => None,
246+
};
247+
248+
let guest_memory = vm_resources
249+
.allocate_guest_memory(guest_memfd)
250+
.map_err(StartMicrovmError::GuestMemory)?;
251+
241252
vmm.vm
242253
.register_memory_regions(guest_memory)
243254
.map_err(VmmError::Vm)?;
244255

245256
let entry_point = load_kernel(
246-
MaybeBounce::new(
247-
boot_config.kernel_file.try_clone().unwrap(),
248-
vmm.vm.secret_free(),
249-
),
257+
MaybeBounce::new(boot_config.kernel_file.try_clone().unwrap(), secret_free),
250258
vmm.vm.guest_memory(),
251259
)?;
252260
let initrd = match &boot_config.initrd_file {
@@ -258,7 +266,7 @@ pub fn build_microvm_for_boot(
258266

259267
Some(InitrdConfig::from_reader(
260268
vmm.vm.guest_memory(),
261-
MaybeBounce::new(initrd_file.as_fd(), vmm.vm.secret_free()),
269+
MaybeBounce::new(initrd_file.as_fd(), secret_free),
262270
u64_to_usize(size),
263271
)?)
264272
}

src/vmm/src/persist.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,7 @@ fn guest_memory_from_file(
457457
track_dirty_pages: bool,
458458
) -> Result<Vec<GuestRegionMmap>, GuestMemoryFromFileError> {
459459
let mem_file = File::open(mem_file_path)?;
460-
let guest_mem = memory::snapshot_file(mem_file, mem_state.regions(), track_dirty_pages)?;
460+
let guest_mem = memory::file_private(mem_file, mem_state.regions(), track_dirty_pages)?;
461461
Ok(guest_mem)
462462
}
463463

src/vmm/src/resources.rs

Lines changed: 36 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// SPDX-License-Identifier: Apache-2.0
33

44
use std::convert::From;
5+
use std::fs::File;
56
use std::path::PathBuf;
67
use std::sync::{Arc, Mutex, MutexGuard};
78

@@ -31,7 +32,7 @@ use crate::vmm_config::mmds::{MmdsConfig, MmdsConfigError};
3132
use crate::vmm_config::net::*;
3233
use crate::vmm_config::vsock::*;
3334
use crate::vstate::memory;
34-
use crate::vstate::memory::{GuestRegionMmap, MemoryError};
35+
use crate::vstate::memory::{GuestRegionMmap, MemoryError, create_memfd};
3536

3637
/// Errors encountered when configuring microVM resources.
3738
#[derive(Debug, thiserror::Error, displaydoc::Display)]
@@ -502,12 +503,20 @@ impl VmResources {
502503
})
503504
}
504505

506+
/// Gets the size of the "traditional" memory region, e.g. total memory excluidng the
507+
/// swiotlb region.
508+
pub fn memory_size(&self) -> usize {
509+
mib_to_bytes(self.machine_config.mem_size_mib)
510+
}
511+
505512
/// Allocates guest memory in a configuration most appropriate for these [`VmResources`].
506513
///
507514
/// If vhost-user-blk devices are in use, allocates memfd-backed shared memory, otherwise
508515
/// prefers anonymous memory for performance reasons.
509-
pub fn allocate_guest_memory(&self) -> Result<Vec<GuestRegionMmap>, MemoryError> {
510-
// Page faults are more expensive for shared memory mapping, including memfd.
516+
pub fn allocate_guest_memory(
517+
&self,
518+
guest_memfd: Option<File>,
519+
) -> Result<Vec<GuestRegionMmap>, MemoryError> {
511520
// For this reason, we only back guest memory with a memfd
512521
// if a vhost-user-blk device is configured in the VM, otherwise we fall back to
513522
// an anonymous private memory.
@@ -517,19 +526,32 @@ impl VmResources {
517526
// a single way of backing guest memory for vhost-user and non-vhost-user cases,
518527
// that would not be worth the effort.
519528
let regions =
520-
crate::arch::arch_memory_regions(0, mib_to_bytes(self.machine_config.mem_size_mib));
521-
if self.vhost_user_devices_used() {
522-
memory::memfd_backed(
523-
regions.as_ref(),
524-
self.machine_config.track_dirty_pages,
525-
self.machine_config.huge_pages,
526-
)
527-
} else {
528-
memory::anonymous(
529-
regions.into_iter(),
529+
crate::arch::arch_memory_regions(0, self.memory_size()).into_iter();
530+
match guest_memfd {
531+
Some(file) => memory::file_shared(
532+
file,
533+
regions,
530534
self.machine_config.track_dirty_pages,
531535
self.machine_config.huge_pages,
532-
)
536+
),
537+
None => {
538+
if self.vhost_user_devices_used() {
539+
let memfd = create_memfd(self.memory_size() as u64, self.machine_config.huge_pages.into())?
540+
.into_file();
541+
memory::file_shared(
542+
memfd,
543+
regions,
544+
self.machine_config.track_dirty_pages,
545+
self.machine_config.huge_pages,
546+
)
547+
} else {
548+
memory::anonymous(
549+
regions.into_iter(),
550+
self.machine_config.track_dirty_pages,
551+
self.machine_config.huge_pages,
552+
)
553+
}
554+
}
533555
}
534556
}
535557
}

src/vmm/src/vstate/memory.rs

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -260,18 +260,16 @@ pub fn create(
260260
}
261261

262262
/// Creates a GuestMemoryMmap with `size` in MiB backed by a memfd.
263-
pub fn memfd_backed(
264-
regions: &[(GuestAddress, usize)],
263+
pub fn file_shared(
264+
file: File,
265+
regions: impl Iterator<Item = (GuestAddress, usize)>,
265266
track_dirty_pages: bool,
266267
huge_pages: HugePageConfig,
267268
) -> Result<Vec<GuestRegionMmap>, MemoryError> {
268-
let size = regions.iter().map(|&(_, size)| size as u64).sum();
269-
let memfd_file = create_memfd(size, huge_pages.into())?.into_file();
270-
271269
create(
272-
regions.iter().copied(),
270+
regions,
273271
libc::MAP_SHARED | huge_pages.mmap_flags(),
274-
Some(memfd_file),
272+
Some(file),
275273
track_dirty_pages,
276274
)
277275
}
@@ -292,7 +290,7 @@ pub fn anonymous(
292290

293291
/// Creates a GuestMemoryMmap given a `file` containing the data
294292
/// and a `state` containing mapping information.
295-
pub fn snapshot_file(
293+
pub fn file_private(
296294
file: File,
297295
regions: impl Iterator<Item = (GuestAddress, usize)>,
298296
track_dirty_pages: bool,
@@ -478,7 +476,8 @@ impl GuestMemoryExtension for GuestMemoryMmap {
478476
}
479477
}
480478

481-
fn create_memfd(
479+
/// Creates a memfd of the given size and huge pages configuration
480+
pub fn create_memfd(
482481
mem_size: u64,
483482
hugetlb_size: Option<memfd::HugetlbSize>,
484483
) -> Result<memfd::Memfd, MemoryError> {
@@ -732,7 +731,7 @@ mod tests {
732731
guest_memory.dump(&mut memory_file).unwrap();
733732

734733
let restored_guest_memory = GuestMemoryMmap::from_regions(
735-
snapshot_file(memory_file, memory_state.regions(), false).unwrap(),
734+
file_private(memory_file, memory_state.regions(), false).unwrap(),
736735
)
737736
.unwrap();
738737

@@ -794,7 +793,7 @@ mod tests {
794793

795794
// We can restore from this because this is the first dirty dump.
796795
let restored_guest_memory = GuestMemoryMmap::from_regions(
797-
snapshot_file(file, memory_state.regions(), false).unwrap(),
796+
file_private(file, memory_state.regions(), false).unwrap(),
798797
)
799798
.unwrap();
800799

src/vmm/src/vstate/vm.rs

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,11 @@
88
use std::collections::HashMap;
99
use std::fs::{File, OpenOptions};
1010
use std::io::Write;
11-
use std::os::fd::FromRawFd;
11+
use std::os::fd::{AsRawFd, FromRawFd};
1212
use std::path::Path;
1313
use std::sync::Arc;
1414

15-
use kvm_bindings::{
16-
KVM_MEM_LOG_DIRTY_PAGES, kvm_create_guest_memfd, kvm_userspace_memory_region,
17-
kvm_userspace_memory_region2,
18-
};
15+
use kvm_bindings::{KVM_MEM_LOG_DIRTY_PAGES, kvm_create_guest_memfd, kvm_userspace_memory_region, kvm_userspace_memory_region2, KVM_MEM_GUEST_MEMFD};
1916
use kvm_ioctls::{Cap, VmFd};
2017
use vmm_sys_util::eventfd::EventFd;
2118

@@ -31,6 +28,8 @@ use crate::vstate::memory::{
3128
use crate::vstate::vcpu::VcpuError;
3229
use crate::{DirtyBitmap, Vcpu, mem_size_mib};
3330

31+
pub(crate) const KVM_GMEM_NO_DIRECT_MAP: u64 = 1;
32+
3433
/// Architecture independent parts of a VM.
3534
#[derive(Debug)]
3635
pub struct VmCommon {
@@ -157,10 +156,6 @@ impl Vm {
157156
"guest_memfd size must be page aligned"
158157
);
159158

160-
if !self.fd().check_extension(Cap::GuestMemfd) {
161-
return Err(VmError::GuestMemfdNotSupported);
162-
}
163-
164159
let kvm_gmem = kvm_create_guest_memfd {
165160
size: size as u64,
166161
flags,
@@ -198,10 +193,20 @@ impl Vm {
198193
return Err(VmError::NotEnoughMemorySlots);
199194
}
200195

201-
let flags = if region.bitmap().is_some() {
202-
KVM_MEM_LOG_DIRTY_PAGES
196+
let mut flags = 0;
197+
if region.bitmap().is_some() {
198+
flags |= KVM_MEM_LOG_DIRTY_PAGES;
199+
}
200+
201+
let (guest_memfd, guest_memfd_offset) = if self.secret_free() {
202+
flags |= KVM_MEM_GUEST_MEMFD;
203+
204+
let fo = region.file_offset()
205+
.expect("secret hidden VMs must mmap guest_memfd for memslots");
206+
207+
(fo.file().as_raw_fd() as u32, fo.start())
203208
} else {
204-
0
209+
(0, 0)
205210
};
206211

207212
let memory_region = kvm_userspace_memory_region2 {
@@ -210,6 +215,8 @@ impl Vm {
210215
memory_size: region.len(),
211216
userspace_addr: region.as_ptr() as u64,
212217
flags,
218+
guest_memfd,
219+
guest_memfd_offset,
213220
..Default::default()
214221
};
215222

@@ -223,6 +230,12 @@ impl Vm {
223230
.map_err(VmError::SetUserMemoryRegion)?;
224231
}
225232
} else {
233+
// Something is seriously wrong if we manage to set these fields on a host that doesn't
234+
// even allow creation of guest_memfds!
235+
assert_eq!(memory_region.guest_memfd, 0);
236+
assert_eq!(memory_region.guest_memfd_offset, 0);
237+
assert_eq!(memory_region.flags & KVM_MEM_GUEST_MEMFD, 0);
238+
226239
// SAFETY: We are passing a valid memory region and operate on a valid KVM FD.
227240
unsafe {
228241
self.fd()

0 commit comments

Comments
 (0)