@@ -115,7 +115,8 @@ pub mod vstate;
115
115
pub mod initrd;
116
116
117
117
use std:: collections:: HashMap ;
118
- use std:: io;
118
+ use std:: io:: { self , Read , Write } ;
119
+ use std:: os:: fd:: RawFd ;
119
120
use std:: os:: unix:: io:: AsRawFd ;
120
121
use std:: os:: unix:: net:: UnixStream ;
121
122
use std:: sync:: mpsc:: RecvTimeoutError ;
@@ -128,6 +129,7 @@ use event_manager::{EventManager as BaseEventManager, EventOps, Events, MutEvent
128
129
use seccomp:: BpfProgram ;
129
130
use snapshot:: Persist ;
130
131
use userfaultfd:: Uffd ;
132
+ use vm_memory:: GuestAddress ;
131
133
use vmm_sys_util:: epoll:: EventSet ;
132
134
use vmm_sys_util:: eventfd:: EventFd ;
133
135
use vmm_sys_util:: terminal:: Terminal ;
@@ -142,12 +144,15 @@ use crate::devices::virtio::block::device::Block;
142
144
use crate :: devices:: virtio:: net:: Net ;
143
145
use crate :: devices:: virtio:: { TYPE_BALLOON , TYPE_BLOCK , TYPE_NET } ;
144
146
use crate :: logger:: { METRICS , MetricsError , error, info, warn} ;
145
- use crate :: persist:: { MicrovmState , MicrovmStateError , VmInfo } ;
147
+ use crate :: persist:: { FaultReply , FaultRequest , MicrovmState , MicrovmStateError , VmInfo } ;
146
148
use crate :: rate_limiter:: BucketUpdate ;
147
149
use crate :: vmm_config:: instance_info:: { InstanceInfo , VmState } ;
148
- use crate :: vstate:: memory:: { GuestMemory , GuestMemoryMmap , GuestMemoryRegion } ;
150
+ use crate :: vstate:: memory:: {
151
+ GuestMemory , GuestMemoryExtension , GuestMemoryMmap , GuestMemoryRegion ,
152
+ } ;
149
153
use crate :: vstate:: vcpu:: VcpuState ;
150
154
pub use crate :: vstate:: vcpu:: { Vcpu , VcpuConfig , VcpuEvent , VcpuHandle , VcpuResponse } ;
155
+ use crate :: vstate:: vm:: UserfaultData ;
151
156
pub use crate :: vstate:: vm:: Vm ;
152
157
153
158
/// Shorthand type for the EventManager flavour used by Firecracker.
@@ -708,6 +713,111 @@ impl Vmm {
708
713
self . shutdown_exit_code = Some ( exit_code) ;
709
714
}
710
715
716
+ fn process_vcpu_userfault ( & mut self , vcpu : u32 , userfault_data : UserfaultData ) {
717
+ let offset = self
718
+ . vm
719
+ . guest_memory ( )
720
+ . gpa_to_offset ( GuestAddress ( userfault_data. gpa ) )
721
+ . expect ( "Failed to convert GPA to offset" ) ;
722
+
723
+ let fault_request = FaultRequest {
724
+ vcpu,
725
+ offset,
726
+ flags : userfault_data. flags ,
727
+ token : None ,
728
+ } ;
729
+ let fault_request_json =
730
+ serde_json:: to_string ( & fault_request) . expect ( "Failed to serialize fault request" ) ;
731
+
732
+ let written = self
733
+ . uffd_socket
734
+ . as_ref ( )
735
+ . expect ( "Uffd socket is not set" )
736
+ . write ( fault_request_json. as_bytes ( ) )
737
+ . expect ( "Failed to write to uffd socket" ) ;
738
+
739
+ if written != fault_request_json. len ( ) {
740
+ panic ! (
741
+ "Failed to write the entire fault request to the uffd socket: expected {}, \
742
+ written {}",
743
+ fault_request_json. len( ) ,
744
+ written
745
+ ) ;
746
+ }
747
+ }
748
+
749
+ fn active_event_in_uffd_socket ( & self , source : RawFd , event_set : EventSet ) -> bool {
750
+ if let Some ( uffd_socket) = & self . uffd_socket {
751
+ uffd_socket. as_raw_fd ( ) == source && event_set == EventSet :: IN
752
+ } else {
753
+ false
754
+ }
755
+ }
756
+
757
+ fn process_uffd_socket ( & mut self ) {
758
+ const BUFFER_SIZE : usize = 4096 ;
759
+
760
+ let stream = self . uffd_socket . as_mut ( ) . expect ( "Uffd socket is not set" ) ;
761
+
762
+ let mut buffer = [ 0u8 ; BUFFER_SIZE ] ;
763
+ let mut current_pos = 0 ;
764
+
765
+ loop {
766
+ if current_pos < BUFFER_SIZE {
767
+ match stream. read ( & mut buffer[ current_pos..] ) {
768
+ Ok ( 0 ) => break ,
769
+ Ok ( n) => current_pos += n,
770
+ Err ( e) if e. kind ( ) == io:: ErrorKind :: WouldBlock => {
771
+ if current_pos == 0 {
772
+ break ;
773
+ }
774
+ }
775
+ Err ( e) => panic ! ( "Read error: {}" , e) ,
776
+ }
777
+ }
778
+
779
+ let mut parser = serde_json:: Deserializer :: from_slice ( & buffer[ ..current_pos] )
780
+ . into_iter :: < FaultReply > ( ) ;
781
+ let mut total_consumed = 0 ;
782
+ let mut needs_more = false ;
783
+
784
+ while let Some ( result) = parser. next ( ) {
785
+ match result {
786
+ Ok ( fault_reply) => {
787
+ let vcpu = fault_reply. vcpu . expect ( "vCPU must be set" ) ;
788
+
789
+ self . vcpus_handles
790
+ . get ( vcpu as usize )
791
+ . expect ( "Invalid vcpu index" )
792
+ . send_userfault_resolved ( ) ;
793
+
794
+ total_consumed = parser. byte_offset ( ) ;
795
+ }
796
+ Err ( e) if e. is_eof ( ) => {
797
+ needs_more = true ;
798
+ break ;
799
+ }
800
+ Err ( e) => {
801
+ println ! (
802
+ "Buffer content: {:?}" ,
803
+ std:: str :: from_utf8( & buffer[ ..current_pos] )
804
+ ) ;
805
+ panic ! ( "Invalid JSON: {}" , e) ;
806
+ }
807
+ }
808
+ }
809
+
810
+ if total_consumed > 0 {
811
+ buffer. copy_within ( total_consumed..current_pos, 0 ) ;
812
+ current_pos -= total_consumed;
813
+ }
814
+
815
+ if needs_more {
816
+ continue ;
817
+ }
818
+ }
819
+ }
820
+
711
821
/// Gets a reference to kvm-ioctls Vm
712
822
#[ cfg( feature = "gdb" ) ]
713
823
pub fn vm ( & self ) -> & Vm {
@@ -790,38 +900,55 @@ impl MutEventSubscriber for Vmm {
790
900
let event_set = event. event_set ( ) ;
791
901
792
902
if source == self . vcpus_exit_evt . as_raw_fd ( ) && event_set == EventSet :: IN {
793
- // Exit event handling should never do anything more than call 'self.stop()'.
794
903
let _ = self . vcpus_exit_evt . read ( ) ;
795
904
796
- let exit_code = ' exit_code : {
797
- // Query each vcpu for their exit_code.
798
- for handle in & self . vcpus_handles {
799
- // Drain all vcpu responses that are pending from this vcpu until we find an
800
- // exit status.
801
- for response in handle . response_receiver ( ) . try_iter ( ) {
802
- if let VcpuResponse :: Exited ( status ) = response {
803
- // It could be that some vcpus exited successfully while others
804
- // errored out. Thus make sure that error exits from one vcpu always
805
- // takes precedence over "ok" exits
905
+ let mut pending_userfaults = Vec :: with_capacity ( self . vcpus_handles . len ( ) ) ;
906
+ let mut should_exit = false ;
907
+ let mut final_exit_code = FcExitCode :: Ok ;
908
+
909
+ // First pass: collect all responses and determine exit status
910
+ for ( handle , index ) in self . vcpus_handles . iter ( ) . zip ( 0u32 .. ) {
911
+ for response in handle . response_receiver ( ) . try_iter ( ) {
912
+ match response {
913
+ VcpuResponse :: Exited ( status ) => {
914
+ should_exit = true ;
806
915
if status != FcExitCode :: Ok {
807
- break ' exit_code status;
916
+ final_exit_code = status;
808
917
}
809
918
}
919
+ VcpuResponse :: Userfault ( userfault_data) => {
920
+ pending_userfaults. push ( ( index, userfault_data) ) ;
921
+ }
922
+ _ => panic ! ( "Unexpected response from vcpu: {:?}" , response) ,
810
923
}
811
924
}
925
+ }
812
926
813
- // No CPUs exited with error status code, report "Ok"
814
- FcExitCode :: Ok
815
- } ;
816
- self . stop ( exit_code) ;
817
- } else {
818
- error ! ( "Spurious EventManager event for handler: Vmm" ) ;
927
+ // Process any pending userfaults
928
+ for ( index, userfault_data) in pending_userfaults {
929
+ self . process_vcpu_userfault ( index, userfault_data) ;
930
+ }
931
+
932
+ // Stop if we received an exit event
933
+ if should_exit {
934
+ self . stop ( final_exit_code) ;
935
+ }
936
+ }
937
+
938
+ if self . active_event_in_uffd_socket ( source, event_set) {
939
+ self . process_uffd_socket ( ) ;
819
940
}
820
941
}
821
942
822
943
fn init ( & mut self , ops : & mut EventOps ) {
823
944
if let Err ( err) = ops. add ( Events :: new ( & self . vcpus_exit_evt , EventSet :: IN ) ) {
824
945
error ! ( "Failed to register vmm exit event: {}" , err) ;
825
946
}
947
+
948
+ if let Some ( uffd_socket) = self . uffd_socket . as_ref ( ) {
949
+ if let Err ( err) = ops. add ( Events :: new ( uffd_socket, EventSet :: IN ) ) {
950
+ panic ! ( "Failed to register UFFD socket: {}" , err) ;
951
+ }
952
+ }
826
953
}
827
954
}
0 commit comments