Skip to content

Commit 5d15d22

Browse files
committed
Use mincore(2) to create diff snapshots without dirty page tracking
Currently, Firecracker only supports creation of diff snapshots if dirty page tracking is explicitly enabled. Allow creation of diff snapshots even if it is not enabled, through the use of mincore(2). The mincore(2) syscalls determines which pages of a VMA are "in core". For anonymous mappings (as used by booted VMs without vhost-user devices), this refers to all pages that are currently faulted in. For memfd (as used by booted vms with vhost-user devices), this means all pages that have been allocated into the memfd, regardless of whether they were allocated through the VMA on which mincore(2) was called (meaning creation of mincore-diff-snapshots will correctly account for pages that were only touched by the vhost-user backend, but not by Firecracker or KVM). For restored VMs, this means all pages of the underlying snapshot file that have been faulted in. Note that this only works if swap has been disabled, as pages currently swapped to disk do not count as "in-core", yet obviously should be included in a diff snapshot. If swap is used, dirty page tracking MUST be enabled for diff snapshots to work correctly. Compared to diff snapshots based on dirty page tracking, mincore-based diff snapshots will be slightly larger. This is because dirty page tracked diff snapshots only include pages that were actually written to, while mincore-based snapshots will contain all pages that were accessed at all, e.g. even if only for reading. Signed-off-by: Patrick Roy <[email protected]>
1 parent c8eb16f commit 5d15d22

File tree

6 files changed

+46
-7
lines changed

6 files changed

+46
-7
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ and this project adheres to
1616
- [#5175](https://github.com/firecracker-microvm/firecracker/pull/5175): Allow
1717
including a custom cpu template directly in the json configuration file passed
1818
to `--config-file` under the `cpu_config` key.
19+
- [#????](...): Allow taking diff snapshots even if dirty page tracking is
20+
disabled, by using `mincore(2)` to overapproximate the set of dirty pages.
21+
Only works if swap is disabled.
1922

2023
### Changed
2124

resources/seccomp/aarch64-unknown-linux-musl.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@
2828
{
2929
"syscall": "write"
3030
},
31+
{
32+
"syscall": "mincore"
33+
},
3134
{
3235
"syscall": "writev",
3336
"comment": "Used by the VirtIO net device to write to tap"

resources/seccomp/x86_64-unknown-linux-musl.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@
2828
{
2929
"syscall": "write"
3030
},
31+
{
32+
"syscall": "mincore"
33+
},
3134
{
3235
"syscall": "writev",
3336
"comment": "Used by the VirtIO net device to write to tap"

src/vmm/src/rpc_interface.rs

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -747,13 +747,6 @@ impl RuntimeApiController {
747747
) -> Result<VmmData, VmmActionError> {
748748
if create_params.snapshot_type == SnapshotType::Diff {
749749
log_dev_preview_warning("Virtual machine diff snapshots", None);
750-
751-
if !self.vm_resources.machine_config.track_dirty_pages {
752-
return Err(VmmActionError::NotSupported(
753-
"Diff snapshots are not allowed on uVMs with dirty page tracking disabled."
754-
.to_string(),
755-
));
756-
}
757750
}
758751

759752
let mut locked_vmm = self.vmm.lock().unwrap();

src/vmm/src/vstate/vm.rs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ use kvm_bindings::{KVM_MEM_LOG_DIRTY_PAGES, kvm_userspace_memory_region};
1515
use kvm_ioctls::VmFd;
1616
use vmm_sys_util::eventfd::EventFd;
1717

18+
use crate::arch::host_page_size;
1819
pub use crate::arch::{ArchVm as Vm, ArchVmError, VmState};
1920
use crate::logger::info;
2021
use crate::persist::CreateSnapshotError;
@@ -204,6 +205,9 @@ impl Vm {
204205
.try_for_each(|(region, slot)| {
205206
self.fd()
206207
.get_dirty_log(slot, u64_to_usize(region.len()))
208+
// Getting the dirty log failed. This is probably because dirty page tracking
209+
// was disabled. Fall back to mincore in this case.
210+
.or_else(|_| mincore_bitmap(region))
207211
.map(|bitmap_region| _ = bitmap.insert(slot, bitmap_region))
208212
})?;
209213
Ok(bitmap)
@@ -278,6 +282,38 @@ impl Vm {
278282
}
279283
}
280284

285+
/// Use `mincore(2)` to overapproximate the dirty bitmap for the given memslot. To be used
286+
/// if a diff snapshot is requested, but dirty page tracking wasn't enabled.
287+
fn mincore_bitmap(region: &GuestRegionMmap) -> Result<Vec<u64>, vmm_sys_util::errno::Error> {
288+
let page_size = host_page_size();
289+
let mut mincore_bitmap = vec![0u8; u64_to_usize(region.len()) / page_size];
290+
let mut bitmap = vec![0u64; u64_to_usize(region.len()) / page_size];
291+
292+
// SAFETY: The safety invariants of GuestRegionMmap ensure that region.as_ptr() is a valid
293+
// userspace mapping of size region.len() bytes. The bitmap has exactly one byte for each
294+
// page in this userspace mapping. Note that mincore does not operate on bitmaps like
295+
// KVM_MEM_LOG_DIRTY_PAGES, but rather it uses 8 bits per page (e.g. 1 byte), setting the
296+
// least significant bit to 1 if the page corresponding to a byte is in core (available in
297+
// the page cache and resolvable via just a minor page fault).
298+
let r = unsafe {
299+
libc::mincore(
300+
region.as_ptr().cast::<libc::c_void>(),
301+
u64_to_usize(region.len()),
302+
mincore_bitmap.as_mut_ptr(),
303+
)
304+
};
305+
306+
if r != 0 {
307+
return vmm_sys_util::errno::errno_result();
308+
}
309+
310+
for (page_idx, b) in mincore_bitmap.iter().enumerate() {
311+
bitmap[page_idx / 64] |= (*b as u64 & 0x1) << (page_idx as u64 % 64);
312+
}
313+
314+
Ok(bitmap)
315+
}
316+
281317
#[cfg(test)]
282318
pub(crate) mod tests {
283319
use vm_memory::GuestAddress;

tests/integration_tests/functional/test_snapshot_basic.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ def test_resume(uvm_nano, microvm_factory, resume_at_restore):
7676
assert restored_vm.state == "Paused"
7777
restored_vm.resume()
7878
assert restored_vm.state == "Running"
79+
restored_vm.ssh.check_output("true")
7980

8081

8182
def test_snapshot_current_version(uvm_nano):

0 commit comments

Comments
 (0)