@@ -115,7 +115,8 @@ pub mod vstate;
115
115
pub mod initrd;
116
116
117
117
use std:: collections:: HashMap ;
118
- use std:: io;
118
+ use std:: io:: { self , Read , Write } ;
119
+ use std:: os:: fd:: RawFd ;
119
120
use std:: os:: unix:: io:: AsRawFd ;
120
121
use std:: os:: unix:: net:: UnixStream ;
121
122
use std:: sync:: mpsc:: RecvTimeoutError ;
@@ -128,6 +129,7 @@ use devices::acpi::vmgenid::VmGenIdError;
128
129
use event_manager:: { EventManager as BaseEventManager , EventOps , Events , MutEventSubscriber } ;
129
130
use seccomp:: BpfProgram ;
130
131
use userfaultfd:: Uffd ;
132
+ use vm_memory:: GuestAddress ;
131
133
use vmm_sys_util:: epoll:: EventSet ;
132
134
use vmm_sys_util:: eventfd:: EventFd ;
133
135
use vmm_sys_util:: terminal:: Terminal ;
@@ -147,13 +149,16 @@ use crate::devices::virtio::block::device::Block;
147
149
use crate :: devices:: virtio:: net:: Net ;
148
150
use crate :: devices:: virtio:: { TYPE_BALLOON , TYPE_BLOCK , TYPE_NET } ;
149
151
use crate :: logger:: { METRICS , MetricsError , error, info, warn} ;
150
- use crate :: persist:: { MicrovmState , MicrovmStateError , VmInfo } ;
152
+ use crate :: persist:: { FaultReply , FaultRequest , MicrovmState , MicrovmStateError , VmInfo } ;
151
153
use crate :: rate_limiter:: BucketUpdate ;
152
154
use crate :: snapshot:: Persist ;
153
155
use crate :: vmm_config:: instance_info:: { InstanceInfo , VmState } ;
154
- use crate :: vstate:: memory:: { GuestMemory , GuestMemoryMmap , GuestMemoryRegion } ;
156
+ use crate :: vstate:: memory:: {
157
+ GuestMemory , GuestMemoryExtension , GuestMemoryMmap , GuestMemoryRegion ,
158
+ } ;
155
159
use crate :: vstate:: vcpu:: VcpuState ;
156
160
pub use crate :: vstate:: vcpu:: { Vcpu , VcpuConfig , VcpuEvent , VcpuHandle , VcpuResponse } ;
161
+ use crate :: vstate:: vm:: UserfaultData ;
157
162
pub use crate :: vstate:: vm:: Vm ;
158
163
159
164
/// Shorthand type for the EventManager flavour used by Firecracker.
@@ -800,6 +805,111 @@ impl Vmm {
800
805
self . shutdown_exit_code = Some ( exit_code) ;
801
806
}
802
807
808
+ fn process_vcpu_userfault ( & mut self , vcpu : u32 , userfault_data : UserfaultData ) {
809
+ let offset = self
810
+ . vm
811
+ . guest_memory ( )
812
+ . gpa_to_offset ( GuestAddress ( userfault_data. gpa ) )
813
+ . expect ( "Failed to convert GPA to offset" ) ;
814
+
815
+ let fault_request = FaultRequest {
816
+ vcpu,
817
+ offset,
818
+ flags : userfault_data. flags ,
819
+ token : None ,
820
+ } ;
821
+ let fault_request_json =
822
+ serde_json:: to_string ( & fault_request) . expect ( "Failed to serialize fault request" ) ;
823
+
824
+ let written = self
825
+ . uffd_socket
826
+ . as_ref ( )
827
+ . expect ( "Uffd socket is not set" )
828
+ . write ( fault_request_json. as_bytes ( ) )
829
+ . expect ( "Failed to write to uffd socket" ) ;
830
+
831
+ if written != fault_request_json. len ( ) {
832
+ panic ! (
833
+ "Failed to write the entire fault request to the uffd socket: expected {}, \
834
+ written {}",
835
+ fault_request_json. len( ) ,
836
+ written
837
+ ) ;
838
+ }
839
+ }
840
+
841
+ fn active_event_in_uffd_socket ( & self , source : RawFd , event_set : EventSet ) -> bool {
842
+ if let Some ( uffd_socket) = & self . uffd_socket {
843
+ uffd_socket. as_raw_fd ( ) == source && event_set == EventSet :: IN
844
+ } else {
845
+ false
846
+ }
847
+ }
848
+
849
+ fn process_uffd_socket ( & mut self ) {
850
+ const BUFFER_SIZE : usize = 4096 ;
851
+
852
+ let stream = self . uffd_socket . as_mut ( ) . expect ( "Uffd socket is not set" ) ;
853
+
854
+ let mut buffer = [ 0u8 ; BUFFER_SIZE ] ;
855
+ let mut current_pos = 0 ;
856
+
857
+ loop {
858
+ if current_pos < BUFFER_SIZE {
859
+ match stream. read ( & mut buffer[ current_pos..] ) {
860
+ Ok ( 0 ) => break ,
861
+ Ok ( n) => current_pos += n,
862
+ Err ( e) if e. kind ( ) == io:: ErrorKind :: WouldBlock => {
863
+ if current_pos == 0 {
864
+ break ;
865
+ }
866
+ }
867
+ Err ( e) => panic ! ( "Read error: {}" , e) ,
868
+ }
869
+ }
870
+
871
+ let mut parser = serde_json:: Deserializer :: from_slice ( & buffer[ ..current_pos] )
872
+ . into_iter :: < FaultReply > ( ) ;
873
+ let mut total_consumed = 0 ;
874
+ let mut needs_more = false ;
875
+
876
+ while let Some ( result) = parser. next ( ) {
877
+ match result {
878
+ Ok ( fault_reply) => {
879
+ let vcpu = fault_reply. vcpu . expect ( "vCPU must be set" ) ;
880
+
881
+ self . vcpus_handles
882
+ . get ( vcpu as usize )
883
+ . expect ( "Invalid vcpu index" )
884
+ . send_userfault_resolved ( ) ;
885
+
886
+ total_consumed = parser. byte_offset ( ) ;
887
+ }
888
+ Err ( e) if e. is_eof ( ) => {
889
+ needs_more = true ;
890
+ break ;
891
+ }
892
+ Err ( e) => {
893
+ println ! (
894
+ "Buffer content: {:?}" ,
895
+ std:: str :: from_utf8( & buffer[ ..current_pos] )
896
+ ) ;
897
+ panic ! ( "Invalid JSON: {}" , e) ;
898
+ }
899
+ }
900
+ }
901
+
902
+ if total_consumed > 0 {
903
+ buffer. copy_within ( total_consumed..current_pos, 0 ) ;
904
+ current_pos -= total_consumed;
905
+ }
906
+
907
+ if needs_more {
908
+ continue ;
909
+ }
910
+ }
911
+ }
912
+
803
913
/// Gets a reference to kvm-ioctls Vm
804
914
#[ cfg( feature = "gdb" ) ]
805
915
pub fn vm ( & self ) -> & Vm {
@@ -882,38 +992,55 @@ impl MutEventSubscriber for Vmm {
882
992
let event_set = event. event_set ( ) ;
883
993
884
994
if source == self . vcpus_exit_evt . as_raw_fd ( ) && event_set == EventSet :: IN {
885
- // Exit event handling should never do anything more than call 'self.stop()'.
886
995
let _ = self . vcpus_exit_evt . read ( ) ;
887
996
888
- let exit_code = ' exit_code : {
889
- // Query each vcpu for their exit_code.
890
- for handle in & self . vcpus_handles {
891
- // Drain all vcpu responses that are pending from this vcpu until we find an
892
- // exit status.
893
- for response in handle . response_receiver ( ) . try_iter ( ) {
894
- if let VcpuResponse :: Exited ( status ) = response {
895
- // It could be that some vcpus exited successfully while others
896
- // errored out. Thus make sure that error exits from one vcpu always
897
- // takes precedence over "ok" exits
997
+ let mut pending_userfaults = Vec :: with_capacity ( self . vcpus_handles . len ( ) ) ;
998
+ let mut should_exit = false ;
999
+ let mut final_exit_code = FcExitCode :: Ok ;
1000
+
1001
+ // First pass: collect all responses and determine exit status
1002
+ for ( handle , index ) in self . vcpus_handles . iter ( ) . zip ( 0u32 .. ) {
1003
+ for response in handle . response_receiver ( ) . try_iter ( ) {
1004
+ match response {
1005
+ VcpuResponse :: Exited ( status ) => {
1006
+ should_exit = true ;
898
1007
if status != FcExitCode :: Ok {
899
- break ' exit_code status;
1008
+ final_exit_code = status;
900
1009
}
901
1010
}
1011
+ VcpuResponse :: Userfault ( userfault_data) => {
1012
+ pending_userfaults. push ( ( index, userfault_data) ) ;
1013
+ }
1014
+ _ => panic ! ( "Unexpected response from vcpu: {:?}" , response) ,
902
1015
}
903
1016
}
1017
+ }
904
1018
905
- // No CPUs exited with error status code, report "Ok"
906
- FcExitCode :: Ok
907
- } ;
908
- self . stop ( exit_code) ;
909
- } else {
910
- error ! ( "Spurious EventManager event for handler: Vmm" ) ;
1019
+ // Process any pending userfaults
1020
+ for ( index, userfault_data) in pending_userfaults {
1021
+ self . process_vcpu_userfault ( index, userfault_data) ;
1022
+ }
1023
+
1024
+ // Stop if we received an exit event
1025
+ if should_exit {
1026
+ self . stop ( final_exit_code) ;
1027
+ }
1028
+ }
1029
+
1030
+ if self . active_event_in_uffd_socket ( source, event_set) {
1031
+ self . process_uffd_socket ( ) ;
911
1032
}
912
1033
}
913
1034
914
1035
fn init ( & mut self , ops : & mut EventOps ) {
915
1036
if let Err ( err) = ops. add ( Events :: new ( & self . vcpus_exit_evt , EventSet :: IN ) ) {
916
1037
error ! ( "Failed to register vmm exit event: {}" , err) ;
917
1038
}
1039
+
1040
+ if let Some ( uffd_socket) = self . uffd_socket . as_ref ( ) {
1041
+ if let Err ( err) = ops. add ( Events :: new ( uffd_socket, EventSet :: IN ) ) {
1042
+ panic ! ( "Failed to register UFFD socket: {}" , err) ;
1043
+ }
1044
+ }
918
1045
}
919
1046
}
0 commit comments