4
4
//! Enables pre-boot setup, instantiation and booting of a Firecracker VMM.
5
5
6
6
use std:: fmt:: Debug ;
7
- use std:: io;
8
- use std:: os:: fd:: AsFd ;
7
+ use std:: fs:: File ;
8
+ use std:: io:: { self } ;
9
+ use std:: os:: fd:: { AsFd , AsRawFd } ;
9
10
use std:: os:: unix:: fs:: MetadataExt ;
10
11
#[ cfg( feature = "gdb" ) ]
11
12
use std:: sync:: mpsc;
@@ -14,14 +15,13 @@ use std::sync::{Arc, Mutex};
14
15
use event_manager:: SubscriberOps ;
15
16
use kvm_ioctls:: Cap ;
16
17
use linux_loader:: cmdline:: Cmdline as LoaderKernelCmdline ;
17
- use userfaultfd:: Uffd ;
18
18
use utils:: time:: TimestampUs ;
19
19
#[ cfg( target_arch = "aarch64" ) ]
20
20
use vm_memory:: GuestAddress ;
21
21
22
22
#[ cfg( target_arch = "aarch64" ) ]
23
23
use crate :: Vcpu ;
24
- use crate :: arch:: { ConfigurationError , configure_system_for_boot, load_kernel} ;
24
+ use crate :: arch:: { ConfigurationError , configure_system_for_boot, host_page_size , load_kernel} ;
25
25
#[ cfg( target_arch = "aarch64" ) ]
26
26
use crate :: construct_kvm_mpidrs;
27
27
use crate :: cpu_config:: templates:: {
@@ -30,6 +30,7 @@ use crate::cpu_config::templates::{
30
30
#[ cfg( target_arch = "x86_64" ) ]
31
31
use crate :: device_manager;
32
32
use crate :: device_manager:: pci_mngr:: PciManagerError ;
33
+ use crate :: device_manager:: persist:: ACPIDeviceManagerRestoreError ;
33
34
use crate :: device_manager:: {
34
35
AttachDeviceError , DeviceManager , DeviceManagerCreateError , DevicePersistError ,
35
36
DeviceRestoreArgs ,
@@ -44,15 +45,19 @@ use crate::devices::virtio::vsock::{Vsock, VsockUnixBackend};
44
45
use crate :: gdb;
45
46
use crate :: initrd:: { InitrdConfig , InitrdError } ;
46
47
use crate :: logger:: debug;
47
- use crate :: persist:: { MicrovmState , MicrovmStateError } ;
48
+ use crate :: persist:: {
49
+ GuestMemoryFromFileError , GuestMemoryFromUffdError , MicrovmState , MicrovmStateError ,
50
+ guest_memory_from_file, guest_memory_from_uffd,
51
+ } ;
48
52
use crate :: resources:: VmResources ;
49
53
use crate :: seccomp:: BpfThreadMap ;
50
54
use crate :: snapshot:: Persist ;
51
55
use crate :: utils:: u64_to_usize;
52
56
use crate :: vmm_config:: instance_info:: InstanceInfo ;
53
57
use crate :: vmm_config:: machine_config:: MachineConfigError ;
58
+ use crate :: vmm_config:: snapshot:: { LoadSnapshotParams , MemBackendType } ;
54
59
use crate :: vstate:: kvm:: { Kvm , KvmError } ;
55
- use crate :: vstate:: memory:: { GuestRegionMmap , MaybeBounce } ;
60
+ use crate :: vstate:: memory:: { MaybeBounce , create_memfd } ;
56
61
#[ cfg( target_arch = "aarch64" ) ]
57
62
use crate :: vstate:: resources:: ResourceAllocator ;
58
63
use crate :: vstate:: vcpu:: VcpuError ;
@@ -335,6 +340,7 @@ pub fn build_microvm_for_boot(
335
340
kvm,
336
341
vm,
337
342
uffd : None ,
343
+ uffd_socket : None ,
338
344
vcpus_handles : Vec :: new ( ) ,
339
345
vcpus_exit_evt,
340
346
device_manager,
@@ -407,6 +413,17 @@ pub fn build_and_boot_microvm(
407
413
Ok ( vmm)
408
414
}
409
415
416
+ /// Sub-Error type for [`build_microvm_from_snapshot`] to contain either
417
+ /// [`GuestMemoryFromFileError`] or [`GuestMemoryFromUffdError`] within
418
+ /// [`BuildMicrovmFromSnapshotError`].
419
+ #[ derive( Debug , thiserror:: Error , displaydoc:: Display ) ]
420
+ pub enum BuildMicrovmFromSnapshotErrorGuestMemoryError {
421
+ /// Error creating guest memory from file: {0}
422
+ File ( #[ from] GuestMemoryFromFileError ) ,
423
+ /// Error creating guest memory from uffd: {0}
424
+ Uffd ( #[ from] GuestMemoryFromUffdError ) ,
425
+ }
426
+
410
427
/// Error type for [`build_microvm_from_snapshot`].
411
428
#[ derive( Debug , thiserror:: Error , displaydoc:: Display ) ]
412
429
pub enum BuildMicrovmFromSnapshotError {
@@ -442,8 +459,55 @@ pub enum BuildMicrovmFromSnapshotError {
442
459
SeccompFiltersInternal ( #[ from] crate :: seccomp:: InstallationError ) ,
443
460
/// Failed to restore devices: {0}
444
461
RestoreDevices ( #[ from] DevicePersistError ) ,
462
+ /// Failed to restore ACPI device manager: {0}
463
+ ACPIDeviManager ( #[ from] ACPIDeviceManagerRestoreError ) ,
464
+ /// VMGenID update failed: {0}
465
+ VMGenIDUpdate ( std:: io:: Error ) ,
466
+ /// Internal error while restoring microVM: {0}
467
+ Internal ( #[ from] VmmError ) ,
468
+ /// Failed to load guest memory: {0}
469
+ GuestMemory ( #[ from] BuildMicrovmFromSnapshotErrorGuestMemoryError ) ,
470
+ /// Userfault bitmap memfd error: {0}
471
+ UserfaultBitmapMemfd ( #[ from] crate :: vstate:: memory:: MemoryError ) ,
445
472
}
446
473
474
+ fn memfd_to_slice ( memfd : & Option < File > ) -> Option < & mut [ u8 ] > {
475
+ if let Some ( bitmap_file) = memfd {
476
+ let len = u64_to_usize (
477
+ bitmap_file
478
+ . metadata ( )
479
+ . expect ( "Failed to get metadata" )
480
+ . len ( ) ,
481
+ ) ;
482
+
483
+ // SAFETY: the arguments to mmap cannot cause any memory unsafety in the rust sense
484
+ let bitmap_addr = unsafe {
485
+ libc:: mmap (
486
+ std:: ptr:: null_mut ( ) ,
487
+ len,
488
+ libc:: PROT_WRITE ,
489
+ libc:: MAP_SHARED ,
490
+ bitmap_file. as_raw_fd ( ) ,
491
+ 0 ,
492
+ )
493
+ } ;
494
+
495
+ if bitmap_addr == libc:: MAP_FAILED {
496
+ panic ! (
497
+ "Failed to mmap userfault bitmap file: {}" ,
498
+ std:: io:: Error :: last_os_error( )
499
+ ) ;
500
+ }
501
+
502
+ // SAFETY: `bitmap_addr` is a valid memory address returned by `mmap`.
503
+ Some ( unsafe { std:: slice:: from_raw_parts_mut ( bitmap_addr. cast ( ) , len) } )
504
+ } else {
505
+ None
506
+ }
507
+ }
508
+ // TODO: take it from kvm-bindings when userfault support is merged upstream
509
+ const KVM_CAP_USERFAULT : u32 = 245 ;
510
+
447
511
/// Builds and starts a microVM based on the provided MicrovmState.
448
512
///
449
513
/// An `Arc` reference of the built `Vmm` is also plugged in the `EventManager`, while another
@@ -453,25 +517,96 @@ pub fn build_microvm_from_snapshot(
453
517
instance_info : & InstanceInfo ,
454
518
event_manager : & mut EventManager ,
455
519
microvm_state : MicrovmState ,
456
- guest_memory : Vec < GuestRegionMmap > ,
457
- uffd : Option < Uffd > ,
458
520
seccomp_filters : & BpfThreadMap ,
521
+ params : & LoadSnapshotParams ,
459
522
vm_resources : & mut VmResources ,
460
523
) -> Result < Arc < Mutex < Vmm > > , BuildMicrovmFromSnapshotError > {
461
524
// Build Vmm.
462
525
debug ! ( "event_start: build microvm from snapshot" ) ;
463
526
464
- let kvm = Kvm :: new ( microvm_state. kvm_state . kvm_cap_modifiers . clone ( ) )
465
- . map_err ( StartMicrovmError :: Kvm ) ?;
527
+ let secret_free = vm_resources. machine_config . secret_free ;
528
+ let mut kvm_capabilities = microvm_state. kvm_state . kvm_cap_modifiers . clone ( ) ;
529
+ if secret_free {
530
+ kvm_capabilities. push ( KvmCapability :: Add ( Cap :: GuestMemfd as u32 ) ) ;
531
+ kvm_capabilities. push ( KvmCapability :: Add ( KVM_CAP_GUEST_MEMFD_MMAP ) ) ;
532
+ kvm_capabilities. push ( KvmCapability :: Add ( KVM_CAP_GUEST_MEMFD_NO_DIRECT_MAP ) ) ;
533
+ kvm_capabilities. push ( KvmCapability :: Add ( KVM_CAP_USERFAULT ) ) ;
534
+ }
535
+
536
+ let kvm = Kvm :: new ( kvm_capabilities) . map_err ( StartMicrovmError :: Kvm ) ?;
466
537
// Set up Kvm Vm and register memory regions.
467
538
// Build custom CPU config if a custom template is provided.
468
- let mut vm = Vm :: new ( & kvm, false ) . map_err ( StartMicrovmError :: Vm ) ?;
539
+ let mut vm = Vm :: new ( & kvm, secret_free ) . map_err ( StartMicrovmError :: Vm ) ?;
469
540
470
541
let ( mut vcpus, vcpus_exit_evt) = vm
471
542
. create_vcpus ( vm_resources. machine_config . vcpu_count )
472
543
. map_err ( StartMicrovmError :: Vm ) ?;
473
544
474
- vm. register_memory_regions ( guest_memory, None )
545
+ let guest_memfd = match secret_free {
546
+ true => Some (
547
+ vm. create_guest_memfd (
548
+ vm_resources. memory_size ( ) ,
549
+ GUEST_MEMFD_FLAG_MMAP | GUEST_MEMFD_FLAG_NO_DIRECT_MAP ,
550
+ )
551
+ . map_err ( VmmError :: Vm ) ?,
552
+ ) ,
553
+ false => None ,
554
+ } ;
555
+
556
+ let userfault_bitmap_memfd = if secret_free {
557
+ let bitmap_size = vm_resources. memory_size ( ) / host_page_size ( ) / u8:: BITS as usize ;
558
+ let bitmap_file = create_memfd ( bitmap_size as u64 , None ) ?;
559
+
560
+ Some ( bitmap_file. into_file ( ) )
561
+ } else {
562
+ None
563
+ } ;
564
+
565
+ let mem_backend_path = & params. mem_backend . backend_path ;
566
+ let mem_state = & microvm_state. vm_state . memory ;
567
+ let track_dirty_pages = params. track_dirty_pages ;
568
+
569
+ let ( guest_memory, uffd, uffd_socket) = match params. mem_backend . backend_type {
570
+ MemBackendType :: File => {
571
+ if vm_resources. machine_config . huge_pages . is_hugetlbfs ( ) {
572
+ return Err ( BuildMicrovmFromSnapshotErrorGuestMemoryError :: File (
573
+ GuestMemoryFromFileError :: HugetlbfsSnapshot ,
574
+ )
575
+ . into ( ) ) ;
576
+ }
577
+ (
578
+ guest_memory_from_file ( mem_backend_path, mem_state, track_dirty_pages)
579
+ . map_err ( BuildMicrovmFromSnapshotErrorGuestMemoryError :: File ) ?,
580
+ None ,
581
+ None ,
582
+ )
583
+ }
584
+ MemBackendType :: Uffd => {
585
+ if vm_resources. machine_config . huge_pages . is_hugetlbfs ( ) && guest_memfd. is_some ( ) {
586
+ return Err ( BuildMicrovmFromSnapshotErrorGuestMemoryError :: Uffd (
587
+ GuestMemoryFromUffdError :: HugetlbfsSnapshot ,
588
+ )
589
+ . into ( ) ) ;
590
+ }
591
+ guest_memory_from_uffd (
592
+ mem_backend_path,
593
+ mem_state,
594
+ track_dirty_pages,
595
+ vm_resources. machine_config . huge_pages ,
596
+ guest_memfd,
597
+ userfault_bitmap_memfd. as_ref ( ) ,
598
+ )
599
+ . map_err ( BuildMicrovmFromSnapshotErrorGuestMemoryError :: Uffd ) ?
600
+ }
601
+ } ;
602
+
603
+ let mut userfault_bitmap = memfd_to_slice ( & userfault_bitmap_memfd) ;
604
+ if let Some ( ref mut slice) = userfault_bitmap {
605
+ // Set all bits so a fault on any page will cause a VM exit
606
+ slice. fill ( 0xffu8 ) ;
607
+ }
608
+
609
+ vm. register_memory_regions ( guest_memory, userfault_bitmap)
475
610
. map_err ( StartMicrovmError :: Vm ) ?;
476
611
477
612
#[ cfg( target_arch = "x86_64" ) ]
@@ -536,6 +671,7 @@ pub fn build_microvm_from_snapshot(
536
671
kvm,
537
672
vm,
538
673
uffd,
674
+ uffd_socket,
539
675
vcpus_handles : Vec :: new ( ) ,
540
676
vcpus_exit_evt,
541
677
device_manager,
@@ -804,6 +940,7 @@ pub(crate) mod tests {
804
940
kvm,
805
941
vm : Arc :: new ( vm) ,
806
942
uffd : None ,
943
+ uffd_socket : None ,
807
944
vcpus_handles : Vec :: new ( ) ,
808
945
vcpus_exit_evt,
809
946
device_manager : default_device_manager ( ) ,
0 commit comments