44//! Enables pre-boot setup, instantiation and booting of a Firecracker VMM.
55
66use std:: fmt:: Debug ;
7- use std:: io;
8- use std:: os:: fd:: AsFd ;
7+ use std:: fs:: File ;
8+ use std:: io:: { self } ;
9+ use std:: os:: fd:: { AsFd , AsRawFd } ;
910use std:: os:: unix:: fs:: MetadataExt ;
1011#[ cfg( feature = "gdb" ) ]
1112use std:: sync:: mpsc;
@@ -14,7 +15,6 @@ use std::sync::{Arc, Mutex};
1415use event_manager:: { MutEventSubscriber , SubscriberOps } ;
1516use libc:: EFD_NONBLOCK ;
1617use linux_loader:: cmdline:: Cmdline as LoaderKernelCmdline ;
17- use userfaultfd:: Uffd ;
1818use utils:: time:: TimestampUs ;
1919#[ cfg( target_arch = "aarch64" ) ]
2020use vm_memory:: GuestAddress ;
@@ -23,7 +23,7 @@ use vm_superio::Rtc;
2323use vm_superio:: Serial ;
2424use vmm_sys_util:: eventfd:: EventFd ;
2525
26- use crate :: arch:: { ConfigurationError , configure_system_for_boot, load_kernel} ;
26+ use crate :: arch:: { ConfigurationError , configure_system_for_boot, host_page_size , load_kernel} ;
2727#[ cfg( target_arch = "aarch64" ) ]
2828use crate :: construct_kvm_mpidrs;
2929use crate :: cpu_config:: templates:: {
@@ -54,15 +54,19 @@ use crate::devices::virtio::vsock::{Vsock, VsockUnixBackend};
5454use crate :: gdb;
5555use crate :: initrd:: { InitrdConfig , InitrdError } ;
5656use crate :: logger:: { debug, error} ;
57- use crate :: persist:: { MicrovmState , MicrovmStateError } ;
57+ use crate :: persist:: {
58+ GuestMemoryFromFileError , GuestMemoryFromUffdError , MicrovmState , MicrovmStateError ,
59+ guest_memory_from_file, guest_memory_from_uffd,
60+ } ;
5861use crate :: resources:: VmResources ;
5962use crate :: seccomp:: BpfThreadMap ;
6063use crate :: snapshot:: Persist ;
6164use crate :: utils:: u64_to_usize;
6265use crate :: vmm_config:: instance_info:: InstanceInfo ;
6366use crate :: vmm_config:: machine_config:: MachineConfigError ;
67+ use crate :: vmm_config:: snapshot:: { LoadSnapshotParams , MemBackendType } ;
6468use crate :: vstate:: kvm:: Kvm ;
65- use crate :: vstate:: memory:: { GuestRegionMmap , MaybeBounce } ;
69+ use crate :: vstate:: memory:: { MaybeBounce , create_memfd } ;
6670use crate :: vstate:: vcpu:: { Vcpu , VcpuError } ;
6771use crate :: vstate:: vm:: { KVM_GMEM_NO_DIRECT_MAP , Vm } ;
6872use crate :: { EventManager , Vmm , VmmError , device_manager} ;
@@ -188,6 +192,7 @@ fn create_vmm_and_vcpus(
188192 kvm,
189193 vm,
190194 uffd : None ,
195+ uffd_socket : None ,
191196 vcpus_handles : Vec :: new ( ) ,
192197 vcpus_exit_evt,
193198 resource_allocator,
@@ -422,6 +427,17 @@ pub fn build_and_boot_microvm(
422427 Ok ( vmm)
423428}
424429
430+ /// Sub-Error type for [`build_microvm_from_snapshot`] to contain either
431+ /// [`GuestMemoryFromFileError`] or [`GuestMemoryFromUffdError`] within
432+ /// [`BuildMicrovmFromSnapshotError`].
433+ #[ derive( Debug , thiserror:: Error , displaydoc:: Display ) ]
434+ pub enum BuildMicrovmFromSnapshotErrorGuestMemoryError {
435+ /// Error creating guest memory from file: {0}
436+ File ( #[ from] GuestMemoryFromFileError ) ,
437+ /// Error creating guest memory from uffd: {0}
438+ Uffd ( #[ from] GuestMemoryFromUffdError ) ,
439+ }
440+
425441/// Error type for [`build_microvm_from_snapshot`].
426442#[ derive( Debug , thiserror:: Error , displaydoc:: Display ) ]
427443pub enum BuildMicrovmFromSnapshotError {
@@ -459,6 +475,47 @@ pub enum BuildMicrovmFromSnapshotError {
459475 ACPIDeviManager ( #[ from] ACPIDeviceManagerRestoreError ) ,
460476 /// VMGenID update failed: {0}
461477 VMGenIDUpdate ( std:: io:: Error ) ,
478+ /// Internal error while restoring microVM: {0}
479+ Internal ( #[ from] VmmError ) ,
480+ /// Failed to load guest memory: {0}
481+ GuestMemory ( #[ from] BuildMicrovmFromSnapshotErrorGuestMemoryError ) ,
482+ /// Userfault bitmap memfd error: {0}
483+ UserfaultBitmapMemfd ( #[ from] crate :: vstate:: memory:: MemoryError ) ,
484+ }
485+
486+ fn memfd_to_slice ( memfd : & Option < File > ) -> Option < & mut [ u8 ] > {
487+ if let Some ( bitmap_file) = memfd {
488+ let len = u64_to_usize (
489+ bitmap_file
490+ . metadata ( )
491+ . expect ( "Failed to get metadata" )
492+ . len ( ) ,
493+ ) ;
494+
495+ // SAFETY: the arguments to mmap cannot cause any memory unsafety in the rust sense
496+ let bitmap_addr = unsafe {
497+ libc:: mmap (
498+ std:: ptr:: null_mut ( ) ,
499+ len,
500+ libc:: PROT_WRITE ,
501+ libc:: MAP_SHARED ,
502+ bitmap_file. as_raw_fd ( ) ,
503+ 0 ,
504+ )
505+ } ;
506+
507+ if bitmap_addr == libc:: MAP_FAILED {
508+ panic ! (
509+ "Failed to mmap userfault bitmap file: {}" ,
510+ std:: io:: Error :: last_os_error( )
511+ ) ;
512+ }
513+
514+ // SAFETY: `bitmap_addr` is a valid memory address returned by `mmap`.
515+ Some ( unsafe { std:: slice:: from_raw_parts_mut ( bitmap_addr. cast ( ) , len) } )
516+ } else {
517+ None
518+ }
462519}
463520
464521/// Builds and starts a microVM based on the provided MicrovmState.
@@ -470,27 +527,100 @@ pub fn build_microvm_from_snapshot(
470527 instance_info : & InstanceInfo ,
471528 event_manager : & mut EventManager ,
472529 microvm_state : MicrovmState ,
473- guest_memory : Vec < GuestRegionMmap > ,
474- uffd : Option < Uffd > ,
475530 seccomp_filters : & BpfThreadMap ,
531+ params : & LoadSnapshotParams ,
476532 vm_resources : & mut VmResources ,
477533) -> Result < Arc < Mutex < Vmm > > , BuildMicrovmFromSnapshotError > {
534+ // TODO: take it from kvm-bindings when userfault support is merged upstream
535+ const KVM_CAP_USERFAULT : u32 = 241 ;
536+
478537 // Build Vmm.
479538 debug ! ( "event_start: build microvm from snapshot" ) ;
539+
540+ let secret_free = vm_resources. machine_config . secret_free ;
541+
542+ let mut kvm_capabilities = microvm_state. kvm_state . kvm_cap_modifiers . clone ( ) ;
543+ if secret_free {
544+ kvm_capabilities. push ( KvmCapability :: Add ( KVM_CAP_USERFAULT ) ) ;
545+ }
546+
480547 let ( mut vmm, mut vcpus) = create_vmm_and_vcpus (
481548 instance_info,
482549 event_manager,
483550 vm_resources. machine_config . vcpu_count ,
484- microvm_state . kvm_state . kvm_cap_modifiers . clone ( ) ,
485- false ,
551+ kvm_capabilities ,
552+ secret_free ,
486553 )
487554 . map_err ( StartMicrovmError :: Internal ) ?;
488555
556+ let guest_memfd = match secret_free {
557+ true => Some (
558+ vmm. vm
559+ . create_guest_memfd ( vm_resources. memory_size ( ) , KVM_GMEM_NO_DIRECT_MAP )
560+ . map_err ( VmmError :: Vm ) ?,
561+ ) ,
562+ false => None ,
563+ } ;
564+
565+ let userfault_bitmap_memfd = if secret_free {
566+ let bitmap_size = vm_resources. memory_size ( ) / host_page_size ( ) / u8:: BITS as usize ;
567+ let bitmap_file = create_memfd ( bitmap_size as u64 , None ) ?;
568+
569+ Some ( bitmap_file. into_file ( ) )
570+ } else {
571+ None
572+ } ;
573+
574+ let mem_backend_path = & params. mem_backend . backend_path ;
575+ let mem_state = & microvm_state. vm_state . memory ;
576+ let track_dirty_pages = params. enable_diff_snapshots ;
577+
578+ let ( guest_memory, uffd, socket) = match params. mem_backend . backend_type {
579+ MemBackendType :: File => {
580+ if vm_resources. machine_config . huge_pages . is_hugetlbfs ( ) {
581+ return Err ( BuildMicrovmFromSnapshotErrorGuestMemoryError :: File (
582+ GuestMemoryFromFileError :: HugetlbfsSnapshot ,
583+ )
584+ . into ( ) ) ;
585+ }
586+ (
587+ guest_memory_from_file ( mem_backend_path, mem_state, track_dirty_pages)
588+ . map_err ( BuildMicrovmFromSnapshotErrorGuestMemoryError :: File ) ?,
589+ None ,
590+ None ,
591+ )
592+ }
593+ MemBackendType :: Uffd => {
594+ if vm_resources. machine_config . huge_pages . is_hugetlbfs ( ) && guest_memfd. is_some ( ) {
595+ return Err ( BuildMicrovmFromSnapshotErrorGuestMemoryError :: Uffd (
596+ GuestMemoryFromUffdError :: HugetlbfsSnapshot ,
597+ )
598+ . into ( ) ) ;
599+ }
600+ guest_memory_from_uffd (
601+ mem_backend_path,
602+ mem_state,
603+ track_dirty_pages,
604+ vm_resources. machine_config . huge_pages ,
605+ guest_memfd,
606+ userfault_bitmap_memfd. as_ref ( ) ,
607+ )
608+ . map_err ( BuildMicrovmFromSnapshotErrorGuestMemoryError :: Uffd ) ?
609+ }
610+ } ;
611+
612+ let mut userfault_bitmap = memfd_to_slice ( & userfault_bitmap_memfd) ;
613+ if let Some ( ref mut slice) = userfault_bitmap {
614+ // Set all bits so a fault on any page will cause a VM exit
615+ slice. fill ( 0xffu8 ) ;
616+ }
617+
489618 vmm. vm
490- . register_memory_regions ( guest_memory, None )
619+ . register_memory_regions ( guest_memory, userfault_bitmap )
491620 . map_err ( VmmError :: Vm )
492621 . map_err ( StartMicrovmError :: Internal ) ?;
493622 vmm. uffd = uffd;
623+ vmm. uffd_socket = socket;
494624
495625 #[ cfg( target_arch = "x86_64" ) ]
496626 {
@@ -956,6 +1086,7 @@ pub(crate) mod tests {
9561086 kvm,
9571087 vm,
9581088 uffd : None ,
1089+ uffd_socket : None ,
9591090 vcpus_handles : Vec :: new ( ) ,
9601091 vcpus_exit_evt,
9611092 resource_allocator : ResourceAllocator :: new ( ) . unwrap ( ) ,
0 commit comments