4
4
//! Enables pre-boot setup, instantiation and booting of a Firecracker VMM.
5
5
6
6
use std:: fmt:: Debug ;
7
- use std:: io;
8
- use std:: os:: fd:: AsFd ;
7
+ use std:: fs:: File ;
8
+ use std:: io:: { self } ;
9
+ use std:: os:: fd:: { AsFd , AsRawFd } ;
9
10
use std:: os:: unix:: fs:: MetadataExt ;
10
11
#[ cfg( feature = "gdb" ) ]
11
12
use std:: sync:: mpsc;
@@ -14,14 +15,13 @@ use std::sync::{Arc, Mutex};
14
15
use event_manager:: SubscriberOps ;
15
16
use kvm_ioctls:: Cap ;
16
17
use linux_loader:: cmdline:: Cmdline as LoaderKernelCmdline ;
17
- use userfaultfd:: Uffd ;
18
18
use utils:: time:: TimestampUs ;
19
19
#[ cfg( target_arch = "aarch64" ) ]
20
20
use vm_memory:: GuestAddress ;
21
21
22
22
#[ cfg( target_arch = "aarch64" ) ]
23
23
use crate :: Vcpu ;
24
- use crate :: arch:: { ConfigurationError , configure_system_for_boot, load_kernel} ;
24
+ use crate :: arch:: { ConfigurationError , configure_system_for_boot, host_page_size , load_kernel} ;
25
25
#[ cfg( target_arch = "aarch64" ) ]
26
26
use crate :: construct_kvm_mpidrs;
27
27
use crate :: cpu_config:: templates:: {
@@ -30,6 +30,7 @@ use crate::cpu_config::templates::{
30
30
#[ cfg( target_arch = "x86_64" ) ]
31
31
use crate :: device_manager;
32
32
use crate :: device_manager:: pci_mngr:: PciManagerError ;
33
+ use crate :: device_manager:: persist:: ACPIDeviceManagerRestoreError ;
33
34
use crate :: device_manager:: {
34
35
AttachDeviceError , DeviceManager , DeviceManagerCreateError , DevicePersistError ,
35
36
DeviceRestoreArgs ,
@@ -44,15 +45,19 @@ use crate::devices::virtio::vsock::{Vsock, VsockUnixBackend};
44
45
use crate :: gdb;
45
46
use crate :: initrd:: { InitrdConfig , InitrdError } ;
46
47
use crate :: logger:: debug;
47
- use crate :: persist:: { MicrovmState , MicrovmStateError } ;
48
+ use crate :: persist:: {
49
+ GuestMemoryFromFileError , GuestMemoryFromUffdError , MicrovmState , MicrovmStateError ,
50
+ guest_memory_from_file, guest_memory_from_uffd,
51
+ } ;
48
52
use crate :: resources:: VmResources ;
49
53
use crate :: seccomp:: BpfThreadMap ;
50
54
use crate :: snapshot:: Persist ;
51
55
use crate :: utils:: u64_to_usize;
52
56
use crate :: vmm_config:: instance_info:: InstanceInfo ;
53
57
use crate :: vmm_config:: machine_config:: MachineConfigError ;
58
+ use crate :: vmm_config:: snapshot:: { LoadSnapshotParams , MemBackendType } ;
54
59
use crate :: vstate:: kvm:: { Kvm , KvmError } ;
55
- use crate :: vstate:: memory:: { GuestRegionMmap , MaybeBounce } ;
60
+ use crate :: vstate:: memory:: { MaybeBounce , create_memfd } ;
56
61
#[ cfg( target_arch = "aarch64" ) ]
57
62
use crate :: vstate:: resources:: ResourceAllocator ;
58
63
use crate :: vstate:: vcpu:: VcpuError ;
@@ -344,6 +349,7 @@ pub fn build_microvm_for_boot(
344
349
kvm,
345
350
vm,
346
351
uffd : None ,
352
+ uffd_socket : None ,
347
353
vcpus_handles : Vec :: new ( ) ,
348
354
vcpus_exit_evt,
349
355
device_manager,
@@ -416,6 +422,17 @@ pub fn build_and_boot_microvm(
416
422
Ok ( vmm)
417
423
}
418
424
425
+ /// Sub-Error type for [`build_microvm_from_snapshot`] to contain either
426
+ /// [`GuestMemoryFromFileError`] or [`GuestMemoryFromUffdError`] within
427
+ /// [`BuildMicrovmFromSnapshotError`].
428
+ #[ derive( Debug , thiserror:: Error , displaydoc:: Display ) ]
429
+ pub enum BuildMicrovmFromSnapshotErrorGuestMemoryError {
430
+ /// Error creating guest memory from file: {0}
431
+ File ( #[ from] GuestMemoryFromFileError ) ,
432
+ /// Error creating guest memory from uffd: {0}
433
+ Uffd ( #[ from] GuestMemoryFromUffdError ) ,
434
+ }
435
+
419
436
/// Error type for [`build_microvm_from_snapshot`].
420
437
#[ derive( Debug , thiserror:: Error , displaydoc:: Display ) ]
421
438
pub enum BuildMicrovmFromSnapshotError {
@@ -451,8 +468,55 @@ pub enum BuildMicrovmFromSnapshotError {
451
468
SeccompFiltersInternal ( #[ from] crate :: seccomp:: InstallationError ) ,
452
469
/// Failed to restore devices: {0}
453
470
RestoreDevices ( #[ from] DevicePersistError ) ,
471
+ /// Failed to restore ACPI device manager: {0}
472
+ ACPIDeviManager ( #[ from] ACPIDeviceManagerRestoreError ) ,
473
+ /// VMGenID update failed: {0}
474
+ VMGenIDUpdate ( std:: io:: Error ) ,
475
+ /// Internal error while restoring microVM: {0}
476
+ Internal ( #[ from] VmmError ) ,
477
+ /// Failed to load guest memory: {0}
478
+ GuestMemory ( #[ from] BuildMicrovmFromSnapshotErrorGuestMemoryError ) ,
479
+ /// Userfault bitmap memfd error: {0}
480
+ UserfaultBitmapMemfd ( #[ from] crate :: vstate:: memory:: MemoryError ) ,
454
481
}
455
482
483
+ fn memfd_to_slice ( memfd : & mut Option < File > ) -> Option < & mut [ u8 ] > {
484
+ if let Some ( bitmap_file) = memfd {
485
+ let len = u64_to_usize (
486
+ bitmap_file
487
+ . metadata ( )
488
+ . expect ( "Failed to get metadata" )
489
+ . len ( ) ,
490
+ ) ;
491
+
492
+ // SAFETY: the arguments to mmap cannot cause any memory unsafety in the rust sense
493
+ let bitmap_addr = unsafe {
494
+ libc:: mmap (
495
+ std:: ptr:: null_mut ( ) ,
496
+ len,
497
+ libc:: PROT_WRITE ,
498
+ libc:: MAP_SHARED ,
499
+ bitmap_file. as_raw_fd ( ) ,
500
+ 0 ,
501
+ )
502
+ } ;
503
+
504
+ if bitmap_addr == libc:: MAP_FAILED {
505
+ panic ! (
506
+ "Failed to mmap userfault bitmap file: {}" ,
507
+ std:: io:: Error :: last_os_error( )
508
+ ) ;
509
+ }
510
+
511
+ // SAFETY: `bitmap_addr` is a valid memory address returned by `mmap`.
512
+ Some ( unsafe { std:: slice:: from_raw_parts_mut ( bitmap_addr. cast ( ) , len) } )
513
+ } else {
514
+ None
515
+ }
516
+ }
517
+ // TODO: take it from kvm-bindings when userfault support is merged upstream
518
+ const KVM_CAP_USERFAULT : u32 = 245 ;
519
+
456
520
/// Builds and starts a microVM based on the provided MicrovmState.
457
521
///
458
522
/// An `Arc` reference of the built `Vmm` is also plugged in the `EventManager`, while another
@@ -462,25 +526,96 @@ pub fn build_microvm_from_snapshot(
462
526
instance_info : & InstanceInfo ,
463
527
event_manager : & mut EventManager ,
464
528
microvm_state : MicrovmState ,
465
- guest_memory : Vec < GuestRegionMmap > ,
466
- uffd : Option < Uffd > ,
467
529
seccomp_filters : & BpfThreadMap ,
530
+ params : & LoadSnapshotParams ,
468
531
vm_resources : & mut VmResources ,
469
532
) -> Result < Arc < Mutex < Vmm > > , BuildMicrovmFromSnapshotError > {
470
533
// Build Vmm.
471
534
debug ! ( "event_start: build microvm from snapshot" ) ;
472
535
473
- let kvm = Kvm :: new ( microvm_state. kvm_state . kvm_cap_modifiers . clone ( ) )
474
- . map_err ( StartMicrovmError :: Kvm ) ?;
536
+ let secret_free = vm_resources. machine_config . secret_free ;
537
+ let mut kvm_capabilities = microvm_state. kvm_state . kvm_cap_modifiers . clone ( ) ;
538
+ if secret_free {
539
+ kvm_capabilities. push ( KvmCapability :: Add ( Cap :: GuestMemfd as u32 ) ) ;
540
+ kvm_capabilities. push ( KvmCapability :: Add ( KVM_CAP_GUEST_MEMFD_MMAP ) ) ;
541
+ kvm_capabilities. push ( KvmCapability :: Add ( KVM_CAP_GUEST_MEMFD_NO_DIRECT_MAP ) ) ;
542
+ kvm_capabilities. push ( KvmCapability :: Add ( KVM_CAP_USERFAULT ) ) ;
543
+ }
544
+
545
+ let kvm = Kvm :: new ( kvm_capabilities) . map_err ( StartMicrovmError :: Kvm ) ?;
475
546
// Set up Kvm Vm and register memory regions.
476
547
// Build custom CPU config if a custom template is provided.
477
- let mut vm = Vm :: new ( & kvm, false ) . map_err ( StartMicrovmError :: Vm ) ?;
548
+ let mut vm = Vm :: new ( & kvm, secret_free ) . map_err ( StartMicrovmError :: Vm ) ?;
478
549
479
550
let ( mut vcpus, vcpus_exit_evt) = vm
480
551
. create_vcpus ( vm_resources. machine_config . vcpu_count )
481
552
. map_err ( StartMicrovmError :: Vm ) ?;
482
553
483
- vm. register_memory_regions ( guest_memory, None )
554
+ let guest_memfd = match secret_free {
555
+ true => Some (
556
+ vm. create_guest_memfd (
557
+ vm_resources. memory_size ( ) ,
558
+ GUEST_MEMFD_FLAG_MMAP | GUEST_MEMFD_FLAG_NO_DIRECT_MAP ,
559
+ )
560
+ . map_err ( VmmError :: Vm ) ?,
561
+ ) ,
562
+ false => None ,
563
+ } ;
564
+
565
+ let mut userfault_bitmap_memfd = if secret_free {
566
+ let bitmap_size = vm_resources. memory_size ( ) / host_page_size ( ) / u8:: BITS as usize ;
567
+ let bitmap_file = create_memfd ( bitmap_size as u64 , None ) ?;
568
+
569
+ Some ( bitmap_file. into_file ( ) )
570
+ } else {
571
+ None
572
+ } ;
573
+
574
+ let mem_backend_path = & params. mem_backend . backend_path ;
575
+ let mem_state = & microvm_state. vm_state . memory ;
576
+ let track_dirty_pages = params. track_dirty_pages ;
577
+
578
+ let ( guest_memory, uffd, uffd_socket) = match params. mem_backend . backend_type {
579
+ MemBackendType :: File => {
580
+ if vm_resources. machine_config . huge_pages . is_hugetlbfs ( ) {
581
+ return Err ( BuildMicrovmFromSnapshotErrorGuestMemoryError :: File (
582
+ GuestMemoryFromFileError :: HugetlbfsSnapshot ,
583
+ )
584
+ . into ( ) ) ;
585
+ }
586
+ (
587
+ guest_memory_from_file ( mem_backend_path, mem_state, track_dirty_pages)
588
+ . map_err ( BuildMicrovmFromSnapshotErrorGuestMemoryError :: File ) ?,
589
+ None ,
590
+ None ,
591
+ )
592
+ }
593
+ MemBackendType :: Uffd => {
594
+ if vm_resources. machine_config . huge_pages . is_hugetlbfs ( ) && guest_memfd. is_some ( ) {
595
+ return Err ( BuildMicrovmFromSnapshotErrorGuestMemoryError :: Uffd (
596
+ GuestMemoryFromUffdError :: HugetlbfsSnapshot ,
597
+ )
598
+ . into ( ) ) ;
599
+ }
600
+ guest_memory_from_uffd (
601
+ mem_backend_path,
602
+ mem_state,
603
+ track_dirty_pages,
604
+ vm_resources. machine_config . huge_pages ,
605
+ guest_memfd,
606
+ userfault_bitmap_memfd. as_ref ( ) ,
607
+ )
608
+ . map_err ( BuildMicrovmFromSnapshotErrorGuestMemoryError :: Uffd ) ?
609
+ }
610
+ } ;
611
+
612
+ let mut userfault_bitmap = memfd_to_slice ( & mut userfault_bitmap_memfd) ;
613
+ if let Some ( ref mut slice) = userfault_bitmap {
614
+ // Set all bits so a fault on any page will cause a VM exit
615
+ slice. fill ( 0xffu8 ) ;
616
+ }
617
+
618
+ vm. register_memory_regions ( guest_memory, userfault_bitmap)
484
619
. map_err ( StartMicrovmError :: Vm ) ?;
485
620
486
621
#[ cfg( target_arch = "x86_64" ) ]
@@ -544,6 +679,7 @@ pub fn build_microvm_from_snapshot(
544
679
kvm,
545
680
vm,
546
681
uffd,
682
+ uffd_socket,
547
683
vcpus_handles : Vec :: new ( ) ,
548
684
vcpus_exit_evt,
549
685
device_manager,
@@ -811,6 +947,7 @@ pub(crate) mod tests {
811
947
kvm,
812
948
vm : Arc :: new ( vm) ,
813
949
uffd : None ,
950
+ uffd_socket : None ,
814
951
vcpus_handles : Vec :: new ( ) ,
815
952
vcpus_exit_evt,
816
953
device_manager : default_device_manager ( ) ,
0 commit comments