@@ -29,6 +29,8 @@ use std::sync::Arc;
2929use std:: sync:: atomic:: { AtomicBool , AtomicU64 , Ordering } ;
3030
3131use log:: { LevelFilter , error} ;
32+ #[ cfg( mshv3) ]
33+ use mshv_bindings:: MSHV_GPAP_ACCESS_OP_CLEAR ;
3234#[ cfg( mshv2) ]
3335use mshv_bindings:: hv_message;
3436use mshv_bindings:: {
@@ -76,6 +78,9 @@ use crate::sandbox::SandboxConfiguration;
7678use crate :: sandbox:: uninitialized:: SandboxRuntimeConfig ;
7779use crate :: { Result , log_then_return, new_error} ;
7880
81+ #[ cfg( mshv2) ]
82+ const CLEAR_DIRTY_BIT_FLAG : u64 = 0b100 ;
83+
7984#[ cfg( gdb) ]
8085mod debug {
8186 use std:: sync:: { Arc , Mutex } ;
@@ -302,6 +307,7 @@ pub(crate) struct HypervLinuxDriver {
302307 vcpu_fd : VcpuFd ,
303308 entrypoint : u64 ,
304309 mem_regions : Vec < MemoryRegion > ,
310+ n_initial_regions : usize ,
305311 orig_rsp : GuestPtr ,
306312 interrupt_handle : Arc < LinuxInterruptHandle > ,
307313
@@ -351,6 +357,7 @@ impl HypervLinuxDriver {
351357 vm_fd. initialize ( ) ?;
352358 vm_fd
353359 } ;
360+ vm_fd. enable_dirty_page_tracking ( ) ?;
354361
355362 let mut vcpu_fd = vm_fd. create_vcpu ( 0 ) ?;
356363
@@ -391,13 +398,31 @@ impl HypervLinuxDriver {
391398 ( None , None )
392399 } ;
393400
401+ let mut base_pfn = u64:: MAX ;
402+ let mut total_size: usize = 0 ;
403+
394404 mem_regions. iter ( ) . try_for_each ( |region| {
395- let mshv_region = region. to_owned ( ) . into ( ) ;
405+ let mshv_region: mshv_user_mem_region = region. to_owned ( ) . into ( ) ;
406+ if base_pfn == u64:: MAX {
407+ base_pfn = mshv_region. guest_pfn ;
408+ }
409+ total_size += mshv_region. size as usize ;
396410 vm_fd. map_user_memory ( mshv_region)
397411 } ) ?;
398412
399413 Self :: setup_initial_sregs ( & mut vcpu_fd, pml4_ptr. absolute ( ) ?) ?;
400414
415+ // get/clear the dirty page bitmap, mshv sets all the bit dirty at initialization
416+ // if we dont clear them then we end up taking a complete snapsot of memory page by page which gets
417+ // progressively slower as the sandbox size increases
418+ // the downside of doing this here is that the call to get_dirty_log will takes longer as the number of pages increase
419+ // but for larger sandboxes its easily cheaper than copying all the pages
420+
421+ #[ cfg( mshv2) ]
422+ vm_fd. get_dirty_log ( base_pfn, total_size, CLEAR_DIRTY_BIT_FLAG ) ?;
423+ #[ cfg( mshv3) ]
424+ vm_fd. get_dirty_log ( base_pfn, total_size, MSHV_GPAP_ACCESS_OP_CLEAR as u8 ) ?;
425+
401426 let interrupt_handle = Arc :: new ( LinuxInterruptHandle {
402427 running : AtomicU64 :: new ( 0 ) ,
403428 cancel_requested : AtomicBool :: new ( false ) ,
@@ -428,6 +453,7 @@ impl HypervLinuxDriver {
428453 page_size : 0 ,
429454 vm_fd,
430455 vcpu_fd,
456+ n_initial_regions : mem_regions. len ( ) ,
431457 mem_regions,
432458 entrypoint : entrypoint_ptr. absolute ( ) ?,
433459 orig_rsp : rsp_ptr,
@@ -885,6 +911,69 @@ impl Hypervisor for HypervLinuxDriver {
885911 self . interrupt_handle . clone ( )
886912 }
887913
914+ // TODO: Implement getting additional host-mapped dirty pages.
915+ fn get_and_clear_dirty_pages ( & mut self ) -> Result < Vec < u64 > > {
916+ let first_mshv_region: mshv_user_mem_region = self
917+ . mem_regions
918+ . first ( )
919+ . ok_or ( new_error ! (
920+ "tried to get dirty page bitmap of 0-sized region"
921+ ) ) ?
922+ . to_owned ( )
923+ . into ( ) ;
924+
925+ let n_contiguous = self
926+ . mem_regions
927+ . windows ( 2 )
928+ . take_while ( |window| window[ 0 ] . guest_region . end == window[ 1 ] . guest_region . start )
929+ . count ( )
930+ + 1 ; // +1 because windows(2) gives us n-1 pairs for n regions
931+
932+ if n_contiguous != self . n_initial_regions {
933+ return Err ( new_error ! (
934+ "get_and_clear_dirty_pages: not all regions are contiguous, expected {} but got {}" ,
935+ self . n_initial_regions,
936+ n_contiguous
937+ ) ) ;
938+ }
939+
940+ let sandbox_total_size = self
941+ . mem_regions
942+ . iter ( )
943+ . take ( n_contiguous)
944+ . map ( |r| r. guest_region . len ( ) )
945+ . sum ( ) ;
946+
947+ let mut sandbox_dirty_pages = self . vm_fd . get_dirty_log (
948+ first_mshv_region. guest_pfn ,
949+ sandbox_total_size,
950+ #[ cfg( mshv2) ]
951+ CLEAR_DIRTY_BIT_FLAG ,
952+ #[ cfg( mshv3) ]
953+ ( MSHV_GPAP_ACCESS_OP_CLEAR as u8 ) ,
954+ ) ?;
955+
956+ // Sanitize bits beyond sandbox
957+ //
958+ // TODO: remove this once bug in mshv is fixed. The bug makes it possible
959+ // for non-mapped memory to incorrectly be marked dirty. To fix this, we just zero out
960+ // any bits that are not within the sandbox size.
961+ let sandbox_pages = sandbox_total_size / self . page_size ;
962+ let last_block_idx = sandbox_dirty_pages. len ( ) . saturating_sub ( 1 ) ;
963+ if let Some ( last_block) = sandbox_dirty_pages. last_mut ( ) {
964+ let last_block_start_page = last_block_idx * 64 ;
965+ let last_block_end_page = last_block_start_page + 64 ;
966+
967+ // If the last block extends beyond the sandbox, clear the invalid bits
968+ if last_block_end_page > sandbox_pages {
969+ let valid_bits_in_last_block = sandbox_pages - last_block_start_page;
970+ let mask = ( 1u64 << valid_bits_in_last_block) - 1 ;
971+ * last_block &= mask;
972+ }
973+ }
974+ Ok ( sandbox_dirty_pages)
975+ }
976+
888977 #[ cfg( crashdump) ]
889978 fn crashdump_context ( & self ) -> Result < Option < super :: crashdump:: CrashDumpContext > > {
890979 if self . rt_cfg . guest_core_dump {
0 commit comments