@@ -115,7 +115,8 @@ pub mod vstate;
115115pub mod initrd;
116116
117117use std:: collections:: HashMap ;
118- use std:: io;
118+ use std:: io:: { self , Read , Write } ;
119+ use std:: os:: fd:: RawFd ;
119120use std:: os:: unix:: io:: AsRawFd ;
120121use std:: os:: unix:: net:: UnixStream ;
121122use std:: sync:: mpsc:: RecvTimeoutError ;
@@ -128,6 +129,7 @@ use devices::acpi::vmgenid::VmGenIdError;
128129use event_manager:: { EventManager as BaseEventManager , EventOps , Events , MutEventSubscriber } ;
129130use seccomp:: BpfProgram ;
130131use userfaultfd:: Uffd ;
132+ use vm_memory:: GuestAddress ;
131133use vmm_sys_util:: epoll:: EventSet ;
132134use vmm_sys_util:: eventfd:: EventFd ;
133135use vmm_sys_util:: terminal:: Terminal ;
@@ -147,13 +149,16 @@ use crate::devices::virtio::block::device::Block;
147149use crate :: devices:: virtio:: net:: Net ;
148150use crate :: devices:: virtio:: { TYPE_BALLOON , TYPE_BLOCK , TYPE_NET } ;
149151use crate :: logger:: { METRICS , MetricsError , error, info, warn} ;
150- use crate :: persist:: { MicrovmState , MicrovmStateError , VmInfo } ;
152+ use crate :: persist:: { FaultReply , FaultRequest , MicrovmState , MicrovmStateError , VmInfo } ;
151153use crate :: rate_limiter:: BucketUpdate ;
152154use crate :: snapshot:: Persist ;
153155use crate :: vmm_config:: instance_info:: { InstanceInfo , VmState } ;
154- use crate :: vstate:: memory:: { GuestMemory , GuestMemoryMmap , GuestMemoryRegion } ;
156+ use crate :: vstate:: memory:: {
157+ GuestMemory , GuestMemoryExtension , GuestMemoryMmap , GuestMemoryRegion ,
158+ } ;
155159use crate :: vstate:: vcpu:: VcpuState ;
156160pub use crate :: vstate:: vcpu:: { Vcpu , VcpuConfig , VcpuEvent , VcpuHandle , VcpuResponse } ;
161+ use crate :: vstate:: vm:: UserfaultData ;
157162pub use crate :: vstate:: vm:: Vm ;
158163
159164/// Shorthand type for the EventManager flavour used by Firecracker.
@@ -800,6 +805,111 @@ impl Vmm {
800805 self . shutdown_exit_code = Some ( exit_code) ;
801806 }
802807
808+ fn process_vcpu_userfault ( & mut self , vcpu : usize , userfault_data : UserfaultData ) {
809+ let offset = self
810+ . vm
811+ . guest_memory ( )
812+ . gpa_to_offset ( GuestAddress ( userfault_data. gpa ) )
813+ . expect ( "Failed to convert GPA to offset" ) ;
814+
815+ let fault_request = FaultRequest {
816+ vcpu : vcpu. try_into ( ) . expect ( "Invalid vCPU index" ) ,
817+ offset,
818+ flags : userfault_data. flags ,
819+ token : None ,
820+ } ;
821+ let fault_request_json =
822+ serde_json:: to_string ( & fault_request) . expect ( "Failed to serialize fault request" ) ;
823+
824+ let written = self
825+ . uffd_socket
826+ . as_ref ( )
827+ . expect ( "Uffd socket is not set" )
828+ . write ( fault_request_json. as_bytes ( ) )
829+ . expect ( "Failed to write to uffd socket" ) ;
830+
831+ if written != fault_request_json. len ( ) {
832+ panic ! (
833+ "Failed to write the entire fault request to the uffd socket: expected {}, \
834+ written {}",
835+ fault_request_json. len( ) ,
836+ written
837+ ) ;
838+ }
839+ }
840+
841+ fn active_event_in_uffd_socket ( & self , source : RawFd , event_set : EventSet ) -> bool {
842+ if let Some ( uffd_socket) = & self . uffd_socket {
843+ uffd_socket. as_raw_fd ( ) == source && event_set == EventSet :: IN
844+ } else {
845+ false
846+ }
847+ }
848+
849+ fn process_uffd_socket ( & mut self ) {
850+ const BUFFER_SIZE : usize = 4096 ;
851+
852+ let stream = self . uffd_socket . as_mut ( ) . expect ( "Uffd socket is not set" ) ;
853+
854+ let mut buffer = [ 0u8 ; BUFFER_SIZE ] ;
855+ let mut current_pos = 0 ;
856+
857+ loop {
858+ if current_pos < BUFFER_SIZE {
859+ match stream. read ( & mut buffer[ current_pos..] ) {
860+ Ok ( 0 ) => break ,
861+ Ok ( n) => current_pos += n,
862+ Err ( e) if e. kind ( ) == io:: ErrorKind :: WouldBlock => {
863+ if current_pos == 0 {
864+ break ;
865+ }
866+ }
867+ Err ( e) => panic ! ( "Read error: {}" , e) ,
868+ }
869+ }
870+
871+ let mut parser = serde_json:: Deserializer :: from_slice ( & buffer[ ..current_pos] )
872+ . into_iter :: < FaultReply > ( ) ;
873+ let mut total_consumed = 0 ;
874+ let mut needs_more = false ;
875+
876+ while let Some ( result) = parser. next ( ) {
877+ match result {
878+ Ok ( fault_reply) => {
879+ let vcpu = fault_reply. vcpu . expect ( "vCPU must be set" ) ;
880+
881+ self . vcpus_handles
882+ . get ( vcpu as usize )
883+ . expect ( "Invalid vcpu index" )
884+ . send_userfault_resolved ( ) ;
885+
886+ total_consumed = parser. byte_offset ( ) ;
887+ }
888+ Err ( e) if e. is_eof ( ) => {
889+ needs_more = true ;
890+ break ;
891+ }
892+ Err ( e) => {
893+ println ! (
894+ "Buffer content: {:?}" ,
895+ std:: str :: from_utf8( & buffer[ ..current_pos] )
896+ ) ;
897+ panic ! ( "Invalid JSON: {}" , e) ;
898+ }
899+ }
900+ }
901+
902+ if total_consumed > 0 {
903+ buffer. copy_within ( total_consumed..current_pos, 0 ) ;
904+ current_pos -= total_consumed;
905+ }
906+
907+ if needs_more {
908+ continue ;
909+ }
910+ }
911+ }
912+
803913 /// Gets a reference to kvm-ioctls Vm
804914 #[ cfg( feature = "gdb" ) ]
805915 pub fn vm ( & self ) -> & Vm {
@@ -882,38 +992,55 @@ impl MutEventSubscriber for Vmm {
882992 let event_set = event. event_set ( ) ;
883993
884994 if source == self . vcpus_exit_evt . as_raw_fd ( ) && event_set == EventSet :: IN {
885- // Exit event handling should never do anything more than call 'self.stop()'.
886995 let _ = self . vcpus_exit_evt . read ( ) ;
887996
888- let exit_code = ' exit_code : {
889- // Query each vcpu for their exit_code.
890- for handle in & self . vcpus_handles {
891- // Drain all vcpu responses that are pending from this vcpu until we find an
892- // exit status.
893- for response in handle . response_receiver ( ) . try_iter ( ) {
894- if let VcpuResponse :: Exited ( status ) = response {
895- // It could be that some vcpus exited successfully while others
896- // errored out. Thus make sure that error exits from one vcpu always
897- // takes precedence over "ok" exits
997+ let mut pending_userfaults = Vec :: with_capacity ( self . vcpus_handles . len ( ) ) ;
998+ let mut should_exit = false ;
999+ let mut final_exit_code = FcExitCode :: Ok ;
1000+
1001+ // First pass: collect all responses and determine exit status
1002+ for ( index , handle ) in self . vcpus_handles . iter ( ) . enumerate ( ) {
1003+ for response in handle . response_receiver ( ) . try_iter ( ) {
1004+ match response {
1005+ VcpuResponse :: Exited ( status ) => {
1006+ should_exit = true ;
8981007 if status != FcExitCode :: Ok {
899- break ' exit_code status;
1008+ final_exit_code = status;
9001009 }
9011010 }
1011+ VcpuResponse :: Userfault ( userfault_data) => {
1012+ pending_userfaults. push ( ( index, userfault_data) ) ;
1013+ }
1014+ _ => panic ! ( "Unexpected response from vcpu: {:?}" , response) ,
9021015 }
9031016 }
1017+ }
9041018
905- // No CPUs exited with error status code, report "Ok"
906- FcExitCode :: Ok
907- } ;
908- self . stop ( exit_code) ;
909- } else {
910- error ! ( "Spurious EventManager event for handler: Vmm" ) ;
1019+ // Process any pending userfaults
1020+ for ( index, userfault_data) in pending_userfaults {
1021+ self . process_vcpu_userfault ( index, userfault_data) ;
1022+ }
1023+
1024+ // Stop if we received an exit event
1025+ if should_exit {
1026+ self . stop ( final_exit_code) ;
1027+ }
1028+ }
1029+
1030+ if self . active_event_in_uffd_socket ( source, event_set) {
1031+ self . process_uffd_socket ( ) ;
9111032 }
9121033 }
9131034
9141035 fn init ( & mut self , ops : & mut EventOps ) {
9151036 if let Err ( err) = ops. add ( Events :: new ( & self . vcpus_exit_evt , EventSet :: IN ) ) {
9161037 error ! ( "Failed to register vmm exit event: {}" , err) ;
9171038 }
1039+
1040+ if let Some ( uffd_socket) = self . uffd_socket . as_ref ( ) {
1041+ if let Err ( err) = ops. add ( Events :: new ( uffd_socket, EventSet :: IN ) ) {
1042+ panic ! ( "Failed to register UFFD socket: {}" , err) ;
1043+ }
1044+ }
9181045 }
9191046}
0 commit comments