Skip to content

Commit f1a739d

Browse files
committed
Add API for getting VM's dirty pages, and add some bitmap utility functions.
Signed-off-by: Ludvig Liljenberg <[email protected]>
1 parent ab957f7 commit f1a739d

File tree

8 files changed

+321
-15
lines changed

8 files changed

+321
-15
lines changed

src/hyperlight_common/src/mem.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ limitations under the License.
1717
pub const PAGE_SHIFT: u64 = 12;
1818
pub const PAGE_SIZE: u64 = 1 << 12;
1919
pub const PAGE_SIZE_USIZE: usize = 1 << 12;
20+
// The number of pages in 1 "block". A single u64 can be used as bitmap to keep track of all dirty pages in a block.
21+
pub const PAGES_IN_BLOCK: usize = 64;
2022

2123
/// A memory region in the guest address space
2224
#[derive(Debug, Clone, Copy)]

src/hyperlight_host/src/hypervisor/hyperv_linux.rs

Lines changed: 71 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ use std::sync::Arc;
2929
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
3030

3131
use log::{LevelFilter, error};
32+
#[cfg(mshv3)]
33+
use mshv_bindings::MSHV_GPAP_ACCESS_OP_CLEAR;
3234
#[cfg(mshv2)]
3335
use mshv_bindings::hv_message;
3436
use mshv_bindings::{
@@ -76,6 +78,9 @@ use crate::sandbox::SandboxConfiguration;
7678
use crate::sandbox::uninitialized::SandboxRuntimeConfig;
7779
use crate::{Result, log_then_return, new_error};
7880

81+
#[cfg(mshv2)]
82+
const CLEAR_DIRTY_BIT_FLAG: u64 = 0b100;
83+
7984
#[cfg(gdb)]
8085
mod debug {
8186
use std::sync::{Arc, Mutex};
@@ -301,7 +306,12 @@ pub(crate) struct HypervLinuxDriver {
301306
vm_fd: VmFd,
302307
vcpu_fd: VcpuFd,
303308
entrypoint: u64,
309+
// Regions part of the original sandbox
304310
mem_regions: Vec<MemoryRegion>,
311+
// Size of the (contioous) sandbox mem_regions
312+
mem_regions_size: usize,
313+
// Regions that are mapped after sandbox creation
314+
mmap_regions: Vec<MemoryRegion>,
305315
orig_rsp: GuestPtr,
306316
interrupt_handle: Arc<LinuxInterruptHandle>,
307317

@@ -351,6 +361,7 @@ impl HypervLinuxDriver {
351361
vm_fd.initialize()?;
352362
vm_fd
353363
};
364+
vm_fd.enable_dirty_page_tracking()?;
354365

355366
let mut vcpu_fd = vm_fd.create_vcpu(0)?;
356367

@@ -391,13 +402,31 @@ impl HypervLinuxDriver {
391402
(None, None)
392403
};
393404

405+
let mut base_pfn = u64::MAX;
406+
let mut total_size: usize = 0;
407+
394408
mem_regions.iter().try_for_each(|region| {
395-
let mshv_region = region.to_owned().into();
409+
let mshv_region: mshv_user_mem_region = region.to_owned().into();
410+
if base_pfn == u64::MAX {
411+
base_pfn = mshv_region.guest_pfn;
412+
}
413+
total_size += mshv_region.size as usize;
396414
vm_fd.map_user_memory(mshv_region)
397415
})?;
398416

399417
Self::setup_initial_sregs(&mut vcpu_fd, pml4_ptr.absolute()?)?;
400418

419+
// get/clear the dirty page bitmap, mshv sets all the bit dirty at initialization
420+
// if we dont clear them then we end up taking a complete snapsot of memory page by page which gets
421+
// progressively slower as the sandbox size increases
422+
// the downside of doing this here is that the call to get_dirty_log will takes longer as the number of pages increase
423+
// but for larger sandboxes its easily cheaper than copying all the pages
424+
425+
#[cfg(mshv2)]
426+
vm_fd.get_dirty_log(base_pfn, total_size, CLEAR_DIRTY_BIT_FLAG)?;
427+
#[cfg(mshv3)]
428+
vm_fd.get_dirty_log(base_pfn, total_size, MSHV_GPAP_ACCESS_OP_CLEAR as u8)?;
429+
401430
let interrupt_handle = Arc::new(LinuxInterruptHandle {
402431
running: AtomicU64::new(0),
403432
cancel_requested: AtomicBool::new(false),
@@ -429,6 +458,8 @@ impl HypervLinuxDriver {
429458
vm_fd,
430459
vcpu_fd,
431460
mem_regions,
461+
mem_regions_size: total_size,
462+
mmap_regions: Vec::new(),
432463
entrypoint: entrypoint_ptr.absolute()?,
433464
orig_rsp: rsp_ptr,
434465
interrupt_handle: interrupt_handle.clone(),
@@ -597,16 +628,14 @@ impl Hypervisor for HypervLinuxDriver {
597628
}
598629
let mshv_region: mshv_user_mem_region = rgn.to_owned().into();
599630
self.vm_fd.map_user_memory(mshv_region)?;
600-
self.mem_regions.push(rgn.to_owned());
631+
self.mmap_regions.push(rgn.to_owned());
601632
Ok(())
602633
}
603634

604635
#[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
605636
unsafe fn unmap_regions(&mut self, n: u64) -> Result<()> {
606-
for rgn in self
607-
.mem_regions
608-
.split_off(self.mem_regions.len() - n as usize)
609-
{
637+
let n_keep = self.mmap_regions.len() - n as usize;
638+
for rgn in self.mmap_regions.split_off(n_keep) {
610639
let mshv_region: mshv_user_mem_region = rgn.to_owned().into();
611640
self.vm_fd.unmap_user_memory(mshv_region)?;
612641
}
@@ -885,6 +914,42 @@ impl Hypervisor for HypervLinuxDriver {
885914
self.interrupt_handle.clone()
886915
}
887916

917+
// TODO: Implement getting additional host-mapped dirty pages.
918+
fn get_and_clear_dirty_pages(&mut self) -> Result<Vec<u64>> {
919+
let first_mshv_region: mshv_user_mem_region = self
920+
.mem_regions
921+
.first()
922+
.ok_or(new_error!(
923+
"tried to get dirty page bitmap of 0-sized region"
924+
))?
925+
.to_owned()
926+
.into();
927+
928+
let mut sandbox_dirty_pages = self.vm_fd.get_dirty_log(
929+
first_mshv_region.guest_pfn,
930+
self.mem_regions_size,
931+
#[cfg(mshv2)]
932+
CLEAR_DIRTY_BIT_FLAG,
933+
#[cfg(mshv3)]
934+
(MSHV_GPAP_ACCESS_OP_CLEAR as u8),
935+
)?;
936+
937+
// Sanitize bits beyond sandbox
938+
//
939+
// TODO: remove this once bug in mshv is fixed. The bug makes it possible
940+
// for non-mapped memory to incorrectly be marked dirty. To fix this, we just zero out
941+
// any bits that are not within the sandbox size.
942+
let sandbox_pages = self.mem_regions_size / self.page_size;
943+
if let Some(last_block) = sandbox_dirty_pages.last_mut() {
944+
let mask = match sandbox_pages % 64 {
945+
0 => u64::MAX,
946+
tail_bits => (1u64 << tail_bits) - 1,
947+
};
948+
*last_block &= mask;
949+
}
950+
Ok(sandbox_dirty_pages)
951+
}
952+
888953
#[cfg(crashdump)]
889954
fn crashdump_context(&self) -> Result<Option<super::crashdump::CrashDumpContext>> {
890955
if self.rt_cfg.guest_core_dump {

src/hyperlight_host/src/hypervisor/hyperv_windows.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ use super::{
5858
use super::{HyperlightExit, Hypervisor, InterruptHandle, VirtualCPU};
5959
use crate::hypervisor::fpu::FP_CONTROL_WORD_DEFAULT;
6060
use crate::hypervisor::wrappers::WHvGeneralRegisters;
61+
use crate::mem::bitmap::new_page_bitmap;
6162
use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags};
6263
use crate::mem::ptr::{GuestPtr, RawPtr};
6364
#[cfg(crashdump)]
@@ -615,13 +616,21 @@ impl Hypervisor for HypervWindowsDriver {
615616
Ok(())
616617
}
617618

619+
fn get_and_clear_dirty_pages(&mut self) -> Result<Vec<u64>> {
620+
// For now we just mark all pages dirty which is the equivalent of taking a full snapshot
621+
let total_size = self.mem_regions.iter().map(|r| r.guest_region.len()).sum();
622+
new_page_bitmap(total_size, true)
623+
}
624+
618625
#[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
619626
unsafe fn map_region(&mut self, _rgn: &MemoryRegion) -> Result<()> {
620-
log_then_return!("Mapping host memory into the guest not yet supported on this platform");
627+
// TODO: when adding support, also update `get_and_clear_dirty_pages`, see kvm/mshv for details
628+
log_then_return!("Mapping host memory into the guest not yet supported on this platform.");
621629
}
622630

623631
#[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
624632
unsafe fn unmap_regions(&mut self, n: u64) -> Result<()> {
633+
// TODO: when adding support, also update `get_and_clear_dirty_pages`, see kvm/mshv for details
625634
if n > 0 {
626635
log_then_return!(
627636
"Mapping host memory into the guest not yet supported on this platform"

src/hyperlight_host/src/hypervisor/kvm.rs

Lines changed: 54 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use std::sync::Arc;
2121
use std::sync::Mutex;
2222
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
2323

24+
use hyperlight_common::mem::{PAGE_SIZE_USIZE, PAGES_IN_BLOCK};
2425
use kvm_bindings::{kvm_fpu, kvm_regs, kvm_userspace_memory_region};
2526
use kvm_ioctls::Cap::UserMemory;
2627
use kvm_ioctls::{Kvm, VcpuExit, VcpuFd, VmFd};
@@ -43,7 +44,8 @@ use super::{
4344
use super::{HyperlightExit, Hypervisor, InterruptHandle, LinuxInterruptHandle, VirtualCPU};
4445
#[cfg(gdb)]
4546
use crate::HyperlightError;
46-
use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags};
47+
use crate::mem::bitmap::{bit_index_iterator, new_page_bitmap};
48+
use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags, MemoryRegionType};
4749
use crate::mem::ptr::{GuestPtr, RawPtr};
4850
use crate::sandbox::SandboxConfiguration;
4951
#[cfg(crashdump)]
@@ -289,7 +291,10 @@ pub(crate) struct KVMDriver {
289291
vcpu_fd: VcpuFd,
290292
entrypoint: u64,
291293
orig_rsp: GuestPtr,
294+
// Regions part of the original sandbox
292295
mem_regions: Vec<MemoryRegion>,
296+
// Regions that are mapped after sandbox creation
297+
mmap_regions: Vec<MemoryRegion>,
293298
interrupt_handle: Arc<LinuxInterruptHandle>,
294299

295300
#[cfg(gdb)]
@@ -373,6 +378,7 @@ impl KVMDriver {
373378
entrypoint,
374379
orig_rsp: rsp_gp,
375380
mem_regions,
381+
mmap_regions: Vec::new(),
376382
interrupt_handle: interrupt_handle.clone(),
377383
#[cfg(gdb)]
378384
debug,
@@ -503,18 +509,19 @@ impl Hypervisor for KVMDriver {
503509
}
504510

505511
let mut kvm_region: kvm_userspace_memory_region = region.clone().into();
506-
kvm_region.slot = self.mem_regions.len() as u32;
512+
kvm_region.slot = (self.mem_regions.len() + self.mmap_regions.len()) as u32;
507513
unsafe { self.vm_fd.set_user_memory_region(kvm_region) }?;
508-
self.mem_regions.push(region.to_owned());
514+
self.mmap_regions.push(region.to_owned());
509515
Ok(())
510516
}
511517

512518
#[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
513519
unsafe fn unmap_regions(&mut self, n: u64) -> Result<()> {
514-
let n_keep = self.mem_regions.len() - n as usize;
515-
for (k, region) in self.mem_regions.split_off(n_keep).iter().enumerate() {
520+
let n_keep = self.mmap_regions.len() - n as usize;
521+
let n_sandbox_regions = self.mem_regions.len();
522+
for (k, region) in self.mmap_regions.split_off(n_keep).iter().enumerate() {
516523
let mut kvm_region: kvm_userspace_memory_region = region.clone().into();
517-
kvm_region.slot = (n_keep + k) as u32;
524+
kvm_region.slot = (n_sandbox_regions + n_keep + k) as u32;
518525
// Setting memory_size to 0 unmaps the slot's region
519526
// From https://docs.kernel.org/virt/kvm/api.html
520527
// > Deleting a slot is done by passing zero for memory_size.
@@ -750,6 +757,47 @@ impl Hypervisor for KVMDriver {
750757
self.interrupt_handle.clone()
751758
}
752759

760+
// TODO: Implement getting additional host-mapped dirty pages.
761+
fn get_and_clear_dirty_pages(&mut self) -> Result<Vec<u64>> {
762+
let mut page_indices = vec![];
763+
let mut current_page = 0;
764+
765+
// Iterate over all memory regions and get the dirty pages for each region ignoring guard pages which cannot be dirty
766+
for (i, mem_region) in self.mem_regions.iter().enumerate() {
767+
let num_pages = mem_region.guest_region.len() / PAGE_SIZE_USIZE;
768+
let bitmap = match mem_region.flags {
769+
MemoryRegionFlags::READ => {
770+
// read-only page. It can never be dirty so return zero dirty pages.
771+
new_page_bitmap(mem_region.guest_region.len(), false)?
772+
}
773+
_ => {
774+
if mem_region.region_type == MemoryRegionType::GuardPage {
775+
// Trying to get dirty pages for a guard page region results in a VMMSysError(2)
776+
new_page_bitmap(mem_region.guest_region.len(), false)?
777+
} else {
778+
// Get the dirty bitmap for the memory region
779+
self.vm_fd
780+
.get_dirty_log(i as u32, mem_region.guest_region.len())?
781+
}
782+
}
783+
};
784+
for page_idx in bit_index_iterator(&bitmap) {
785+
page_indices.push(current_page + page_idx);
786+
}
787+
current_page += num_pages;
788+
}
789+
790+
// convert vec of page indices to vec of blocks
791+
let mut sandbox_dirty_pages = new_page_bitmap(current_page * PAGE_SIZE_USIZE, false)?;
792+
for page_idx in page_indices {
793+
let block_idx = page_idx / PAGES_IN_BLOCK;
794+
let bit_idx = page_idx % PAGES_IN_BLOCK;
795+
sandbox_dirty_pages[block_idx] |= 1 << bit_idx;
796+
}
797+
798+
Ok(sandbox_dirty_pages)
799+
}
800+
753801
#[cfg(crashdump)]
754802
fn crashdump_context(&self) -> Result<Option<crashdump::CrashDumpContext>> {
755803
if self.rt_cfg.guest_core_dump {

src/hyperlight_host/src/hypervisor/mod.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,12 @@ pub(crate) trait Hypervisor: Debug + Sync + Send {
196196
None
197197
}
198198

199+
/// Get dirty pages as a bitmap (Vec<u64>).
200+
/// Each bit in a u64 represents a page.
201+
/// This also clears the bitflags, marking the pages as non-dirty.
202+
/// TODO: Implement getting additional host-mapped dirty pages.
203+
fn get_and_clear_dirty_pages(&mut self) -> Result<Vec<u64>>;
204+
199205
/// Get InterruptHandle to underlying VM
200206
fn interrupt_handle(&self) -> Arc<dyn InterruptHandle>;
201207

0 commit comments

Comments
 (0)