hyperlight-dev
diff --git a/‎src/hyperlight_common/src/mem.rs‎
Lines changed: 2 additions & 0 deletions b/‎src/hyperlight_common/src/mem.rs‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/hyperlight_host/src/hypervisor/hyperv_linux.rs‎
Lines changed: 90 additions & 1 deletion b/‎src/hyperlight_host/src/hypervisor/hyperv_linux.rs‎
Lines changed: 90 additions & 1 deletion
diff --git a/‎src/hyperlight_host/src/hypervisor/hyperv_windows.rs‎
Lines changed: 10 additions & 1 deletion b/‎src/hyperlight_host/src/hypervisor/hyperv_windows.rs‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎src/hyperlight_host/src/hypervisor/kvm.rs‎
Lines changed: 60 additions & 1 deletion b/‎src/hyperlight_host/src/hypervisor/kvm.rs‎
Lines changed: 60 additions & 1 deletion
diff --git a/‎src/hyperlight_host/src/hypervisor/mod.rs‎
Lines changed: 6 additions & 0 deletions b/‎src/hyperlight_host/src/hypervisor/mod.rs‎
Lines changed: 6 additions & 0 deletions
@@ -17,6 +17,8 @@ limitations under the License.
 pub const PAGE_SHIFT: u64 = 12;
 pub const PAGE_SIZE: u64 = 1 << 12;
 pub const PAGE_SIZE_USIZE: usize = 1 << 12;
+// The number of pages in 1 "block". A single u64 can be used as bitmap to keep track of all dirty pages in a block.
+pub const PAGES_IN_BLOCK: usize = 64;
 
 /// A memory region in the guest address space
 #[derive(Debug, Clone, Copy)]
 
@@ -29,6 +29,8 @@ use std::sync::Arc;
 use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
 
 use log::{LevelFilter, error};
+#[cfg(mshv3)]
+use mshv_bindings::MSHV_GPAP_ACCESS_OP_CLEAR;
 #[cfg(mshv2)]
 use mshv_bindings::hv_message;
 use mshv_bindings::{
@@ -76,6 +78,9 @@ use crate::sandbox::SandboxConfiguration;
 use crate::sandbox::uninitialized::SandboxRuntimeConfig;
 use crate::{Result, log_then_return, new_error};
 
+#[cfg(mshv2)]
+const CLEAR_DIRTY_BIT_FLAG: u64 = 0b100;
+
 #[cfg(gdb)]
 mod debug {
     use std::sync::{Arc, Mutex};
@@ -302,6 +307,7 @@ pub(crate) struct HypervLinuxDriver {
     vcpu_fd: VcpuFd,
     entrypoint: u64,
     mem_regions: Vec<MemoryRegion>,
+    n_initial_regions: usize,
     orig_rsp: GuestPtr,
     interrupt_handle: Arc<LinuxInterruptHandle>,
 
@@ -351,6 +357,7 @@ impl HypervLinuxDriver {
             vm_fd.initialize()?;
             vm_fd
         };
+        vm_fd.enable_dirty_page_tracking()?;
 
         let mut vcpu_fd = vm_fd.create_vcpu(0)?;
 
@@ -391,13 +398,31 @@ impl HypervLinuxDriver {
             (None, None)
         };
 
+        let mut base_pfn = u64::MAX;
+        let mut total_size: usize = 0;
+
         mem_regions.iter().try_for_each(|region| {
-            let mshv_region = region.to_owned().into();
+            let mshv_region: mshv_user_mem_region = region.to_owned().into();
+            if base_pfn == u64::MAX {
+                base_pfn = mshv_region.guest_pfn;
+            }
+            total_size += mshv_region.size as usize;
             vm_fd.map_user_memory(mshv_region)
         })?;
 
         Self::setup_initial_sregs(&mut vcpu_fd, pml4_ptr.absolute()?)?;
 
+        // get/clear the dirty page bitmap, mshv sets all the bit dirty at initialization
+        // if we dont clear them then we end up taking a complete snapsot of memory page by page which gets
+        // progressively slower as the sandbox size increases
+        // the downside of doing this here is that the call to get_dirty_log will takes longer as the number of pages increase
+        // but for larger sandboxes its easily cheaper than copying all the pages
+
+        #[cfg(mshv2)]
+        vm_fd.get_dirty_log(base_pfn, total_size, CLEAR_DIRTY_BIT_FLAG)?;
+        #[cfg(mshv3)]
+        vm_fd.get_dirty_log(base_pfn, total_size, MSHV_GPAP_ACCESS_OP_CLEAR as u8)?;
+
         let interrupt_handle = Arc::new(LinuxInterruptHandle {
             running: AtomicU64::new(0),
             cancel_requested: AtomicBool::new(false),
@@ -428,6 +453,7 @@ impl HypervLinuxDriver {
             page_size: 0,
             vm_fd,
             vcpu_fd,
+            n_initial_regions: mem_regions.len(),
             mem_regions,
             entrypoint: entrypoint_ptr.absolute()?,
             orig_rsp: rsp_ptr,
@@ -885,6 +911,69 @@ impl Hypervisor for HypervLinuxDriver {
         self.interrupt_handle.clone()
     }
 
+    // TODO: Implement getting additional host-mapped dirty pages.
+    fn get_and_clear_dirty_pages(&mut self) -> Result<Vec<u64>> {
+        let first_mshv_region: mshv_user_mem_region = self
+            .mem_regions
+            .first()
+            .ok_or(new_error!(
+                "tried to get dirty page bitmap of 0-sized region"
+            ))?
+            .to_owned()
+            .into();
+
+        let n_contiguous = self
+            .mem_regions
+            .windows(2)
+            .take_while(|window| window[0].guest_region.end == window[1].guest_region.start)
+            .count()
+            + 1; // +1 because windows(2) gives us n-1 pairs for n regions
+
+        if n_contiguous != self.n_initial_regions {
+            return Err(new_error!(
+                "get_and_clear_dirty_pages: not all regions are contiguous, expected {} but got {}",
+                self.n_initial_regions,
+                n_contiguous
+            ));
+        }
+
+        let sandbox_total_size = self
+            .mem_regions
+            .iter()
+            .take(n_contiguous)
+            .map(|r| r.guest_region.len())
+            .sum();
+
+        let mut sandbox_dirty_pages = self.vm_fd.get_dirty_log(
+            first_mshv_region.guest_pfn,
+            sandbox_total_size,
+            #[cfg(mshv2)]
+            CLEAR_DIRTY_BIT_FLAG,
+            #[cfg(mshv3)]
+            (MSHV_GPAP_ACCESS_OP_CLEAR as u8),
+        )?;
+
+        // Sanitize bits beyond sandbox
+        //
+        // TODO: remove this once bug in mshv is fixed. The bug makes it possible
+        // for non-mapped memory to incorrectly be marked dirty. To fix this, we just zero out
+        // any bits that are not within the sandbox size.
+        let sandbox_pages = sandbox_total_size / self.page_size;
+        let last_block_idx = sandbox_dirty_pages.len().saturating_sub(1);
+        if let Some(last_block) = sandbox_dirty_pages.last_mut() {
+            let last_block_start_page = last_block_idx * 64;
+            let last_block_end_page = last_block_start_page + 64;
+
+            // If the last block extends beyond the sandbox, clear the invalid bits
+            if last_block_end_page > sandbox_pages {
+                let valid_bits_in_last_block = sandbox_pages - last_block_start_page;
+                let mask = (1u64 << valid_bits_in_last_block) - 1;
+                *last_block &= mask;
+            }
+        }
+        Ok(sandbox_dirty_pages)
+    }
+
     #[cfg(crashdump)]
     fn crashdump_context(&self) -> Result<Option<super::crashdump::CrashDumpContext>> {
         if self.rt_cfg.guest_core_dump {
 
@@ -58,6 +58,7 @@ use super::{
 use super::{HyperlightExit, Hypervisor, InterruptHandle, VirtualCPU};
 use crate::hypervisor::fpu::FP_CONTROL_WORD_DEFAULT;
 use crate::hypervisor::wrappers::WHvGeneralRegisters;
+use crate::mem::bitmap::new_page_bitmap;
 use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags};
 use crate::mem::ptr::{GuestPtr, RawPtr};
 #[cfg(crashdump)]
@@ -615,13 +616,21 @@ impl Hypervisor for HypervWindowsDriver {
         Ok(())
     }
 
+    fn get_and_clear_dirty_pages(&mut self) -> Result<Vec<u64>> {
+        // For now we just mark all pages dirty which is the equivalent of taking a full snapshot
+        let total_size = self.mem_regions.iter().map(|r| r.guest_region.len()).sum();
+        new_page_bitmap(total_size, true)
+    }
+
     #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
     unsafe fn map_region(&mut self, _rgn: &MemoryRegion) -> Result<()> {
-        log_then_return!("Mapping host memory into the guest not yet supported on this platform");
+        // TODO: when adding support, also update `get_and_clear_dirty_pages`, see kvm/mshv for details
+        log_then_return!("Mapping host memory into the guest not yet supported on this platform.");
     }
 
     #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
     unsafe fn unmap_regions(&mut self, n: u64) -> Result<()> {
+        // TODO: when adding support, also update `get_and_clear_dirty_pages`, see kvm/mshv for details
         if n > 0 {
             log_then_return!(
                 "Mapping host memory into the guest not yet supported on this platform"
 
@@ -21,6 +21,7 @@ use std::sync::Arc;
 use std::sync::Mutex;
 use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
 
+use hyperlight_common::mem::{PAGE_SIZE_USIZE, PAGES_IN_BLOCK};
 use kvm_bindings::{kvm_fpu, kvm_regs, kvm_userspace_memory_region};
 use kvm_ioctls::Cap::UserMemory;
 use kvm_ioctls::{Kvm, VcpuExit, VcpuFd, VmFd};
@@ -43,7 +44,8 @@ use super::{
 use super::{HyperlightExit, Hypervisor, InterruptHandle, LinuxInterruptHandle, VirtualCPU};
 #[cfg(gdb)]
 use crate::HyperlightError;
-use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags};
+use crate::mem::bitmap::{bit_index_iterator, new_page_bitmap};
+use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags, MemoryRegionType};
 use crate::mem::ptr::{GuestPtr, RawPtr};
 use crate::sandbox::SandboxConfiguration;
 #[cfg(crashdump)]
@@ -290,6 +292,7 @@ pub(crate) struct KVMDriver {
     entrypoint: u64,
     orig_rsp: GuestPtr,
     mem_regions: Vec<MemoryRegion>,
+    n_initial_regions: usize,
     interrupt_handle: Arc<LinuxInterruptHandle>,
 
     #[cfg(gdb)]
@@ -372,6 +375,7 @@ impl KVMDriver {
             vcpu_fd,
             entrypoint,
             orig_rsp: rsp_gp,
+            n_initial_regions: mem_regions.len(),
             mem_regions,
             interrupt_handle: interrupt_handle.clone(),
             #[cfg(gdb)]
@@ -750,6 +754,61 @@ impl Hypervisor for KVMDriver {
         self.interrupt_handle.clone()
     }
 
+    // TODO: Implement getting additional host-mapped dirty pages.
+    fn get_and_clear_dirty_pages(&mut self) -> Result<Vec<u64>> {
+        let n_contiguous = self
+            .mem_regions
+            .windows(2)
+            .take_while(|window| window[0].guest_region.end == window[1].guest_region.start)
+            .count()
+            + 1; // +1 because windows(2) gives us n-1 pairs for n regions
+
+        if n_contiguous != self.n_initial_regions {
+            return Err(new_error!(
+                "get_and_clear_dirty_pages: not all regions are contiguous, expected {} but got {}",
+                self.n_initial_regions,
+                n_contiguous
+            ));
+        }
+        let mut page_indices = vec![];
+        let mut current_page = 0;
+
+        // Iterate over all memory regions and get the dirty pages for each region ignoring guard pages which cannot be dirty
+        for (i, mem_region) in self.mem_regions.iter().take(n_contiguous).enumerate() {
+            let num_pages = mem_region.guest_region.len() / PAGE_SIZE_USIZE;
+            let bitmap = match mem_region.flags {
+                MemoryRegionFlags::READ => {
+                    // read-only page. It can never be dirty so return zero dirty pages.
+                    new_page_bitmap(mem_region.guest_region.len(), false)?
+                }
+                _ => {
+                    if mem_region.region_type == MemoryRegionType::GuardPage {
+                        //  Trying to get dirty pages for a guard page region results in a VMMSysError(2)
+                        new_page_bitmap(mem_region.guest_region.len(), false)?
+                    } else {
+                        // Get the dirty bitmap for the memory region
+                        self.vm_fd
+                            .get_dirty_log(i as u32, mem_region.guest_region.len())?
+                    }
+                }
+            };
+            for page_idx in bit_index_iterator(&bitmap) {
+                page_indices.push(current_page + page_idx);
+            }
+            current_page += num_pages;
+        }
+
+        // convert vec of page indices to vec of blocks
+        let mut sandbox_dirty_pages = new_page_bitmap(current_page * PAGE_SIZE_USIZE, false)?;
+        for page_idx in page_indices {
+            let block_idx = page_idx / PAGES_IN_BLOCK;
+            let bit_idx = page_idx % PAGES_IN_BLOCK;
+            sandbox_dirty_pages[block_idx] |= 1 << bit_idx;
+        }
+
+        Ok(sandbox_dirty_pages)
+    }
+
     #[cfg(crashdump)]
     fn crashdump_context(&self) -> Result<Option<crashdump::CrashDumpContext>> {
         if self.rt_cfg.guest_core_dump {
 
@@ -196,6 +196,12 @@ pub(crate) trait Hypervisor: Debug + Sync + Send {
         None
     }
 
+    /// Get dirty pages as a bitmap (Vec<u64>).
+    /// Each bit in a u64 represents a page.
+    /// This also clears the bitflags, marking the pages as non-dirty.
+    /// TODO: Implement getting additional host-mapped dirty pages.
+    fn get_and_clear_dirty_pages(&mut self) -> Result<Vec<u64>>;
+
     /// Get InterruptHandle to underlying VM
     fn interrupt_handle(&self) -> Arc<dyn InterruptHandle>;