Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ and this project adheres to
- [#5175](https://github.com/firecracker-microvm/firecracker/pull/5175): Allow
including a custom cpu template directly in the json configuration file passed
to `--config-file` under the `cpu_config` key.
- [#5274](https://github.com/firecracker-microvm/firecracker/pull/5274): Allow
taking diff snapshots even if dirty page tracking is disabled, by using
`mincore(2)` to overapproximate the set of dirty pages. Only works if swap is
disabled.

### Changed

Expand Down
3 changes: 3 additions & 0 deletions resources/seccomp/aarch64-unknown-linux-musl.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
{
"syscall": "write"
},
{
"syscall": "mincore"
},
{
"syscall": "writev",
"comment": "Used by the VirtIO net device to write to tap"
Expand Down
3 changes: 3 additions & 0 deletions resources/seccomp/x86_64-unknown-linux-musl.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
{
"syscall": "write"
},
{
"syscall": "mincore"
},
{
"syscall": "writev",
"comment": "Used by the VirtIO net device to write to tap"
Expand Down
4 changes: 2 additions & 2 deletions src/vmm/src/persist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ use crate::vstate::kvm::KvmState;
use crate::vstate::memory;
use crate::vstate::memory::{GuestMemoryState, GuestRegionMmap, MemoryError};
use crate::vstate::vcpu::{VcpuSendEventError, VcpuState};
use crate::vstate::vm::VmState;
use crate::vstate::vm::{VmError, VmState};
use crate::{EventManager, Vmm, vstate};

/// Holds information related to the VM that is not part of VmState.
Expand Down Expand Up @@ -134,7 +134,7 @@ pub enum MicrovmStateError {
#[derive(Debug, thiserror::Error, displaydoc::Display)]
pub enum CreateSnapshotError {
/// Cannot get dirty bitmap: {0}
DirtyBitmap(#[from] vmm_sys_util::errno::Error),
DirtyBitmap(#[from] VmError),
/// Cannot write memory file: {0}
Memory(#[from] MemoryError),
/// Cannot perform {0} on the memory backing file: {1}
Expand Down
7 changes: 0 additions & 7 deletions src/vmm/src/rpc_interface.rs
Original file line number Diff line number Diff line change
Expand Up @@ -747,13 +747,6 @@ impl RuntimeApiController {
) -> Result<VmmData, VmmActionError> {
if create_params.snapshot_type == SnapshotType::Diff {
log_dev_preview_warning("Virtual machine diff snapshots", None);

if !self.vm_resources.machine_config.track_dirty_pages {
return Err(VmmActionError::NotSupported(
"Diff snapshots are not allowed on uVMs with dirty page tracking disabled."
.to_string(),
));
}
}

let mut locked_vmm = self.vmm.lock().unwrap();
Expand Down
1 change: 0 additions & 1 deletion src/vmm/src/test_utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ pub fn default_vmm_no_boot(kernel_image: Option<&str>) -> (Arc<Mutex<Vmm>>, Even
create_vmm(kernel_image, false, false)
}

#[cfg(target_arch = "x86_64")]
pub fn dirty_tracking_vmm(kernel_image: Option<&str>) -> (Arc<Mutex<Vmm>>, EventManager) {
create_vmm(kernel_image, true, true)
}
Expand Down
67 changes: 58 additions & 9 deletions src/vmm/src/vstate/vm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the THIRD-PARTY file.

use std::collections::HashMap;
use std::fs::OpenOptions;
use std::io::Write;
use std::path::Path;
Expand All @@ -15,6 +14,7 @@ use kvm_bindings::{KVM_MEM_LOG_DIRTY_PAGES, kvm_userspace_memory_region};
use kvm_ioctls::VmFd;
use vmm_sys_util::eventfd::EventFd;

use crate::arch::host_page_size;
pub use crate::arch::{ArchVm as Vm, ArchVmError, VmState};
use crate::logger::info;
use crate::persist::CreateSnapshotError;
Expand Down Expand Up @@ -45,6 +45,8 @@ pub enum VmError {
SetUserMemoryRegion(kvm_ioctls::Error),
/// Failed to create VM: {0}
CreateVm(kvm_ioctls::Error),
/// Failed to get KVM's dirty log: {0}
GetDirtyLog(kvm_ioctls::Error),
/// {0}
Arch(#[from] ArchVmError),
/// Error during eventfd operations: {0}
Expand All @@ -55,6 +57,8 @@ pub enum VmError {
NotEnoughMemorySlots,
/// Memory Error: {0}
VmMemory(#[from] vm_memory::Error),
/// Error calling mincore: {0}
Mincore(vmm_sys_util::errno::Error),
}

/// Contains Vm functions that are usable across CPU architectures
Expand Down Expand Up @@ -196,17 +200,21 @@ impl Vm {
}

/// Retrieves the KVM dirty bitmap for each of the guest's memory regions.
pub fn get_dirty_bitmap(&self) -> Result<DirtyBitmap, vmm_sys_util::errno::Error> {
let mut bitmap: DirtyBitmap = HashMap::new();
pub fn get_dirty_bitmap(&self) -> Result<DirtyBitmap, VmError> {
self.guest_memory()
.iter()
.zip(0u32..)
.try_for_each(|(region, slot)| {
self.fd()
.get_dirty_log(slot, u64_to_usize(region.len()))
.map(|bitmap_region| _ = bitmap.insert(slot, bitmap_region))
})?;
Ok(bitmap)
.map(|(region, slot)| {
let bitmap = match region.bitmap() {
Some(_) => self
.fd()
.get_dirty_log(slot, u64_to_usize(region.len()))
.map_err(VmError::GetDirtyLog)?,
None => mincore_bitmap(region)?,
};
Ok((slot, bitmap))
})
.collect()
}

/// Takes a snapshot of the virtual machine running inside the given [`Vmm`] and saves it to
Expand Down Expand Up @@ -278,6 +286,47 @@ impl Vm {
}
}

/// Use `mincore(2)` to overapproximate the dirty bitmap for the given memslot. To be used
/// if a diff snapshot is requested, but dirty page tracking wasn't enabled.
fn mincore_bitmap(region: &GuestRegionMmap) -> Result<Vec<u64>, VmError> {
// TODO: Once Host 5.10 goes out of support, we can make this more robust and work on
// swap-enabled systems, by doing mlock2(MLOCK_ONFAULT)/munlock() in this function (to
// force swapped-out pages to get paged in, so that mincore will consider them incore).
// However, on AMD (m6a/m7a) 5.10, doing so introduces a 100%/30ms regression to snapshot
// creation, even if swap is disabled, so currently it cannot be done.

// Mincore always works at PAGE_SIZE granularity, even if the VMA we are dealing with
// is a hugetlbfs VMA (e.g. to report a single hugepage as "present", mincore will
// give us 512 4k markers with the lowest bit set).
let page_size = host_page_size();
let mut mincore_bitmap = vec![0u8; u64_to_usize(region.len()) / page_size];
let mut bitmap = vec![0u64; (u64_to_usize(region.len()) / page_size).div_ceil(64)];

// SAFETY: The safety invariants of GuestRegionMmap ensure that region.as_ptr() is a valid
// userspace mapping of size region.len() bytes. The bitmap has exactly one byte for each
// page in this userspace mapping. Note that mincore does not operate on bitmaps like
// KVM_MEM_LOG_DIRTY_PAGES, but rather it uses 8 bits per page (e.g. 1 byte), setting the
// least significant bit to 1 if the page corresponding to a byte is in core (available in
// the page cache and resolvable via just a minor page fault).
let r = unsafe {
libc::mincore(
region.as_ptr().cast::<libc::c_void>(),
u64_to_usize(region.len()),
mincore_bitmap.as_mut_ptr(),
)
};

if r != 0 {
return Err(VmError::Mincore(vmm_sys_util::errno::Error::last()));
}

for (page_idx, b) in mincore_bitmap.iter().enumerate() {
bitmap[page_idx / 64] |= (*b as u64 & 0x1) << (page_idx as u64 % 64);
}

Ok(bitmap)
}

#[cfg(test)]
pub(crate) mod tests {
use vm_memory::GuestAddress;
Expand Down
59 changes: 21 additions & 38 deletions src/vmm/tests/integration_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,47 +100,30 @@ fn test_pause_resume_microvm() {
vmm.lock().unwrap().stop(FcExitCode::Ok);
}

#[test]
fn test_dirty_bitmap_error() {
// Error case: dirty tracking disabled.
let (vmm, _) = default_vmm(None);

// The vmm will start with dirty page tracking = OFF.
// With dirty tracking disabled, the underlying KVM_GET_DIRTY_LOG ioctl will fail
// with errno 2 (ENOENT) because KVM can't find any guest memory regions with dirty
// page tracking enabled.
assert_eq!(
vmm.lock()
.unwrap()
.vm
.get_dirty_bitmap()
.unwrap_err()
.errno(),
2
);
vmm.lock().unwrap().stop(FcExitCode::Ok);
}

#[test]
#[cfg(target_arch = "x86_64")]
fn test_dirty_bitmap_success() {
// The vmm will start with dirty page tracking = ON.
let (vmm, _) = vmm::test_utils::dirty_tracking_vmm(Some(NOISY_KERNEL_IMAGE));

// Let it churn for a while and dirty some pages...
thread::sleep(Duration::from_millis(100));
let bitmap = vmm.lock().unwrap().vm.get_dirty_bitmap().unwrap();
let num_dirty_pages: u32 = bitmap
.values()
.map(|bitmap_per_region| {
// Gently coerce to u32
let num_dirty_pages_per_region: u32 =
bitmap_per_region.iter().map(|n| n.count_ones()).sum();
num_dirty_pages_per_region
})
.sum();
assert!(num_dirty_pages > 0);
vmm.lock().unwrap().stop(FcExitCode::Ok);
let vmms = [
vmm::test_utils::dirty_tracking_vmm(Some(NOISY_KERNEL_IMAGE)),
default_vmm(Some(NOISY_KERNEL_IMAGE)),
];

for (vmm, _) in vmms {
// Let it churn for a while and dirty some pages...
thread::sleep(Duration::from_millis(100));
let bitmap = vmm.lock().unwrap().vm.get_dirty_bitmap().unwrap();
let num_dirty_pages: u32 = bitmap
.values()
.map(|bitmap_per_region| {
// Gently coerce to u32
let num_dirty_pages_per_region: u32 =
bitmap_per_region.iter().map(|n| n.count_ones()).sum();
num_dirty_pages_per_region
})
.sum();
assert!(num_dirty_pages > 0);
vmm.lock().unwrap().stop(FcExitCode::Ok);
}
}

#[test]
Expand Down
16 changes: 1 addition & 15 deletions tests/integration_tests/functional/test_snapshot_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def test_resume(uvm_nano, microvm_factory, resume_at_restore):
assert restored_vm.state == "Paused"
restored_vm.resume()
assert restored_vm.state == "Running"
restored_vm.ssh.check_output("true")


def test_snapshot_current_version(uvm_nano):
Expand Down Expand Up @@ -390,21 +391,6 @@ def test_negative_snapshot_create(uvm_nano):
mem_file_path="memfile", snapshot_path="statefile", snapshot_type="Full"
)

vm.api.vm.patch(state="Paused")

# Try diff with dirty pages tracking disabled.
expected_msg = (
"Diff snapshots are not allowed on uVMs with dirty page tracking disabled"
)
with pytest.raises(RuntimeError, match=expected_msg):
vm.api.snapshot_create.put(
mem_file_path="memfile", snapshot_path="statefile", snapshot_type="Diff"
)
assert not os.path.exists("statefile")
assert not os.path.exists("memfile")

vm.kill()


def test_create_large_diff_snapshot(uvm_plain):
"""
Expand Down