4
4
//! Enables pre-boot setup, instantiation and booting of a Firecracker VMM.
5
5
6
6
use std:: fmt:: Debug ;
7
- use std:: io;
8
- use std:: os:: fd:: AsFd ;
7
+ use std:: fs:: File ;
8
+ use std:: io:: { self } ;
9
+ use std:: os:: fd:: { AsFd , AsRawFd } ;
9
10
use std:: os:: unix:: fs:: MetadataExt ;
10
11
#[ cfg( feature = "gdb" ) ]
11
12
use std:: sync:: mpsc;
@@ -14,7 +15,6 @@ use std::sync::{Arc, Mutex};
14
15
use event_manager:: { MutEventSubscriber , SubscriberOps } ;
15
16
use libc:: EFD_NONBLOCK ;
16
17
use linux_loader:: cmdline:: Cmdline as LoaderKernelCmdline ;
17
- use userfaultfd:: Uffd ;
18
18
use utils:: time:: TimestampUs ;
19
19
#[ cfg( target_arch = "aarch64" ) ]
20
20
use vm_memory:: GuestAddress ;
@@ -23,7 +23,7 @@ use vm_superio::Rtc;
23
23
use vm_superio:: Serial ;
24
24
use vmm_sys_util:: eventfd:: EventFd ;
25
25
26
- use crate :: arch:: { ConfigurationError , configure_system_for_boot, load_kernel} ;
26
+ use crate :: arch:: { ConfigurationError , configure_system_for_boot, host_page_size , load_kernel} ;
27
27
#[ cfg( target_arch = "aarch64" ) ]
28
28
use crate :: construct_kvm_mpidrs;
29
29
use crate :: cpu_config:: templates:: {
@@ -54,15 +54,19 @@ use crate::devices::virtio::vsock::{Vsock, VsockUnixBackend};
54
54
use crate :: gdb;
55
55
use crate :: initrd:: { InitrdConfig , InitrdError } ;
56
56
use crate :: logger:: { debug, error} ;
57
- use crate :: persist:: { MicrovmState , MicrovmStateError } ;
57
+ use crate :: persist:: {
58
+ GuestMemoryFromFileError , GuestMemoryFromUffdError , MicrovmState , MicrovmStateError ,
59
+ guest_memory_from_file, guest_memory_from_uffd,
60
+ } ;
58
61
use crate :: resources:: VmResources ;
59
62
use crate :: seccomp:: BpfThreadMap ;
60
63
use crate :: snapshot:: Persist ;
61
64
use crate :: utils:: u64_to_usize;
62
65
use crate :: vmm_config:: instance_info:: InstanceInfo ;
63
66
use crate :: vmm_config:: machine_config:: MachineConfigError ;
67
+ use crate :: vmm_config:: snapshot:: { LoadSnapshotParams , MemBackendType } ;
64
68
use crate :: vstate:: kvm:: Kvm ;
65
- use crate :: vstate:: memory:: { GuestRegionMmap , MaybeBounce } ;
69
+ use crate :: vstate:: memory:: { MaybeBounce , create_memfd } ;
66
70
use crate :: vstate:: vcpu:: { Vcpu , VcpuError } ;
67
71
use crate :: vstate:: vm:: { KVM_GMEM_NO_DIRECT_MAP , Vm } ;
68
72
use crate :: { EventManager , Vmm , VmmError , device_manager} ;
@@ -188,6 +192,7 @@ fn create_vmm_and_vcpus(
188
192
kvm,
189
193
vm,
190
194
uffd : None ,
195
+ uffd_socket : None ,
191
196
vcpus_handles : Vec :: new ( ) ,
192
197
vcpus_exit_evt,
193
198
resource_allocator,
@@ -422,6 +427,17 @@ pub fn build_and_boot_microvm(
422
427
Ok ( vmm)
423
428
}
424
429
430
+ /// Sub-Error type for [`build_microvm_from_snapshot`] to contain either
431
+ /// [`GuestMemoryFromFileError`] or [`GuestMemoryFromUffdError`] within
432
+ /// [`BuildMicrovmFromSnapshotError`].
433
+ #[ derive( Debug , thiserror:: Error , displaydoc:: Display ) ]
434
+ pub enum BuildMicrovmFromSnapshotErrorGuestMemoryError {
435
+ /// Error creating guest memory from file: {0}
436
+ File ( #[ from] GuestMemoryFromFileError ) ,
437
+ /// Error creating guest memory from uffd: {0}
438
+ Uffd ( #[ from] GuestMemoryFromUffdError ) ,
439
+ }
440
+
425
441
/// Error type for [`build_microvm_from_snapshot`].
426
442
#[ derive( Debug , thiserror:: Error , displaydoc:: Display ) ]
427
443
pub enum BuildMicrovmFromSnapshotError {
@@ -459,6 +475,47 @@ pub enum BuildMicrovmFromSnapshotError {
459
475
ACPIDeviManager ( #[ from] ACPIDeviceManagerRestoreError ) ,
460
476
/// VMGenID update failed: {0}
461
477
VMGenIDUpdate ( std:: io:: Error ) ,
478
+ /// Internal error while restoring microVM: {0}
479
+ Internal ( #[ from] VmmError ) ,
480
+ /// Failed to load guest memory: {0}
481
+ GuestMemory ( #[ from] BuildMicrovmFromSnapshotErrorGuestMemoryError ) ,
482
+ /// Userfault bitmap memfd error: {0}
483
+ UserfaultBitmapMemfd ( #[ from] crate :: vstate:: memory:: MemoryError ) ,
484
+ }
485
+
486
+ fn memfd_to_slice ( memfd : & Option < File > ) -> Option < & mut [ u8 ] > {
487
+ if let Some ( bitmap_file) = memfd {
488
+ let len = u64_to_usize (
489
+ bitmap_file
490
+ . metadata ( )
491
+ . expect ( "Failed to get metadata" )
492
+ . len ( ) ,
493
+ ) ;
494
+
495
+ // SAFETY: the arguments to mmap cannot cause any memory unsafety in the rust sense
496
+ let bitmap_addr = unsafe {
497
+ libc:: mmap (
498
+ std:: ptr:: null_mut ( ) ,
499
+ len,
500
+ libc:: PROT_WRITE ,
501
+ libc:: MAP_SHARED ,
502
+ bitmap_file. as_raw_fd ( ) ,
503
+ 0 ,
504
+ )
505
+ } ;
506
+
507
+ if bitmap_addr == libc:: MAP_FAILED {
508
+ panic ! (
509
+ "Failed to mmap userfault bitmap file: {}" ,
510
+ std:: io:: Error :: last_os_error( )
511
+ ) ;
512
+ }
513
+
514
+ // SAFETY: `bitmap_addr` is a valid memory address returned by `mmap`.
515
+ Some ( unsafe { std:: slice:: from_raw_parts_mut ( bitmap_addr. cast ( ) , len) } )
516
+ } else {
517
+ None
518
+ }
462
519
}
463
520
464
521
/// Builds and starts a microVM based on the provided MicrovmState.
@@ -470,27 +527,100 @@ pub fn build_microvm_from_snapshot(
470
527
instance_info : & InstanceInfo ,
471
528
event_manager : & mut EventManager ,
472
529
microvm_state : MicrovmState ,
473
- guest_memory : Vec < GuestRegionMmap > ,
474
- uffd : Option < Uffd > ,
475
530
seccomp_filters : & BpfThreadMap ,
531
+ params : & LoadSnapshotParams ,
476
532
vm_resources : & mut VmResources ,
477
533
) -> Result < Arc < Mutex < Vmm > > , BuildMicrovmFromSnapshotError > {
534
+ // TODO: take it from kvm-bindings when userfault support is merged upstream
535
+ const KVM_CAP_USERFAULT : u32 = 241 ;
536
+
478
537
// Build Vmm.
479
538
debug ! ( "event_start: build microvm from snapshot" ) ;
539
+
540
+ let secret_free = vm_resources. machine_config . secret_free ;
541
+
542
+ let mut kvm_capabilities = microvm_state. kvm_state . kvm_cap_modifiers . clone ( ) ;
543
+ if secret_free {
544
+ kvm_capabilities. push ( KvmCapability :: Add ( KVM_CAP_USERFAULT ) ) ;
545
+ }
546
+
480
547
let ( mut vmm, mut vcpus) = create_vmm_and_vcpus (
481
548
instance_info,
482
549
event_manager,
483
550
vm_resources. machine_config . vcpu_count ,
484
- microvm_state . kvm_state . kvm_cap_modifiers . clone ( ) ,
485
- false ,
551
+ kvm_capabilities ,
552
+ secret_free ,
486
553
)
487
554
. map_err ( StartMicrovmError :: Internal ) ?;
488
555
556
+ let guest_memfd = match secret_free {
557
+ true => Some (
558
+ vmm. vm
559
+ . create_guest_memfd ( vm_resources. memory_size ( ) , KVM_GMEM_NO_DIRECT_MAP )
560
+ . map_err ( VmmError :: Vm ) ?,
561
+ ) ,
562
+ false => None ,
563
+ } ;
564
+
565
+ let userfault_bitmap_memfd = if secret_free {
566
+ let bitmap_size = vm_resources. memory_size ( ) / host_page_size ( ) / u8:: BITS as usize ;
567
+ let bitmap_file = create_memfd ( bitmap_size as u64 , None ) ?;
568
+
569
+ Some ( bitmap_file. into_file ( ) )
570
+ } else {
571
+ None
572
+ } ;
573
+
574
+ let mem_backend_path = & params. mem_backend . backend_path ;
575
+ let mem_state = & microvm_state. vm_state . memory ;
576
+ let track_dirty_pages = params. enable_diff_snapshots ;
577
+
578
+ let ( guest_memory, uffd, socket) = match params. mem_backend . backend_type {
579
+ MemBackendType :: File => {
580
+ if vm_resources. machine_config . huge_pages . is_hugetlbfs ( ) {
581
+ return Err ( BuildMicrovmFromSnapshotErrorGuestMemoryError :: File (
582
+ GuestMemoryFromFileError :: HugetlbfsSnapshot ,
583
+ )
584
+ . into ( ) ) ;
585
+ }
586
+ (
587
+ guest_memory_from_file ( mem_backend_path, mem_state, track_dirty_pages)
588
+ . map_err ( BuildMicrovmFromSnapshotErrorGuestMemoryError :: File ) ?,
589
+ None ,
590
+ None ,
591
+ )
592
+ }
593
+ MemBackendType :: Uffd => {
594
+ if vm_resources. machine_config . huge_pages . is_hugetlbfs ( ) && guest_memfd. is_some ( ) {
595
+ return Err ( BuildMicrovmFromSnapshotErrorGuestMemoryError :: Uffd (
596
+ GuestMemoryFromUffdError :: HugetlbfsSnapshot ,
597
+ )
598
+ . into ( ) ) ;
599
+ }
600
+ guest_memory_from_uffd (
601
+ mem_backend_path,
602
+ mem_state,
603
+ track_dirty_pages,
604
+ vm_resources. machine_config . huge_pages ,
605
+ guest_memfd,
606
+ userfault_bitmap_memfd. as_ref ( ) ,
607
+ )
608
+ . map_err ( BuildMicrovmFromSnapshotErrorGuestMemoryError :: Uffd ) ?
609
+ }
610
+ } ;
611
+
612
+ let mut userfault_bitmap = memfd_to_slice ( & userfault_bitmap_memfd) ;
613
+ if let Some ( ref mut slice) = userfault_bitmap {
614
+ // Set all bits so a fault on any page will cause a VM exit
615
+ slice. fill ( 0xffu8 ) ;
616
+ }
617
+
489
618
vmm. vm
490
- . register_memory_regions ( guest_memory, None )
619
+ . register_memory_regions ( guest_memory, userfault_bitmap )
491
620
. map_err ( VmmError :: Vm )
492
621
. map_err ( StartMicrovmError :: Internal ) ?;
493
622
vmm. uffd = uffd;
623
+ vmm. uffd_socket = socket;
494
624
495
625
#[ cfg( target_arch = "x86_64" ) ]
496
626
{
@@ -956,6 +1086,7 @@ pub(crate) mod tests {
956
1086
kvm,
957
1087
vm,
958
1088
uffd : None ,
1089
+ uffd_socket : None ,
959
1090
vcpus_handles : Vec :: new ( ) ,
960
1091
vcpus_exit_evt,
961
1092
resource_allocator : ResourceAllocator :: new ( ) . unwrap ( ) ,
0 commit comments