Skip to content

Commit 0d0cab9

Browse files
JackThomson2RomanKovtyukhpb8o
committed
Make diff snapshots transactional
Store kvm dirty bitmap inside firecrackers internal bitmap so no data is lost incase of an error Signed-off-by: Jack Thomson <[email protected]> Co-authored-by: Roman Kovtyukh <[email protected]> Co-authored-by: Pablo Barbáchano <[email protected]>
1 parent 8b15271 commit 0d0cab9

File tree

3 files changed

+102
-9
lines changed

3 files changed

+102
-9
lines changed

src/vmm/src/vstate/memory.rs

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,23 @@ impl GuestMemoryExtension for GuestMemoryMmap {
301301
let mut writer_offset = 0;
302302
let page_size = get_page_size().map_err(MemoryError::PageSize)?;
303303

304+
self.iter().enumerate().for_each(|(slot, region)| {
305+
let kvm_bitmap = dirty_bitmap.get(&slot).unwrap();
306+
let firecracker_bitmap = region.bitmap();
307+
308+
for (i, v) in kvm_bitmap.iter().enumerate() {
309+
for j in 0..64 {
310+
let is_kvm_page_dirty = ((v >> j) & 1u64) != 0u64;
311+
312+
if is_kvm_page_dirty {
313+
let page_offset = ((i * 64) + j) * page_size;
314+
315+
firecracker_bitmap.mark_dirty(page_offset, 1)
316+
}
317+
}
318+
}
319+
});
320+
304321
self.iter()
305322
.enumerate()
306323
.try_for_each(|(slot, region)| {
@@ -309,12 +326,11 @@ impl GuestMemoryExtension for GuestMemoryMmap {
309326
let mut write_size = 0;
310327
let mut dirty_batch_start: u64 = 0;
311328

312-
for (i, v) in kvm_bitmap.iter().enumerate() {
329+
for i in 0..kvm_bitmap.len() {
313330
for j in 0..64 {
314-
let is_kvm_page_dirty = ((v >> j) & 1u64) != 0u64;
315331
let page_offset = ((i * 64) + j) * page_size;
316332
let is_firecracker_page_dirty = firecracker_bitmap.dirty_at(page_offset);
317-
if is_kvm_page_dirty || is_firecracker_page_dirty {
333+
if is_firecracker_page_dirty {
318334
// We are at the start of a new batch of dirty pages.
319335
if write_size == 0 {
320336
// Seek forward over the unmodified pages.
@@ -344,13 +360,14 @@ impl GuestMemoryExtension for GuestMemoryMmap {
344360
)?;
345361
}
346362
writer_offset += region.len();
347-
if let Some(bitmap) = firecracker_bitmap {
348-
bitmap.reset();
349-
}
350363

351364
Ok(())
352365
})
353-
.map_err(MemoryError::WriteMemory)
366+
.map_err(MemoryError::WriteMemory)?;
367+
368+
self.reset_dirty();
369+
370+
Ok(())
354371
}
355372

356373
/// Resets all the memory region bitmaps

tests/framework/microvm.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ def __init__(
214214
self.log_file = None
215215
self.metrics_file = None
216216
self._spawned = False
217+
self._killed = False
217218

218219
# device dictionaries
219220
self.iface = {}
@@ -238,6 +239,9 @@ def __repr__(self):
238239
def kill(self):
239240
"""All clean up associated with this microVM should go here."""
240241
# pylint: disable=subprocess-run-check
242+
# if it was already killed, return
243+
if self._killed:
244+
return
241245

242246
# Stop any registered monitors
243247
for monitor in self.monitors:
@@ -287,6 +291,7 @@ def kill(self):
287291

288292
# Mark the microVM as not spawned, so we avoid trying to kill twice.
289293
self._spawned = False
294+
self._killed = True
290295

291296
if self.time_api_requests:
292297
self._validate_api_response_times()
@@ -1014,8 +1019,9 @@ def kill(self):
10141019
for vm in self.vms:
10151020
vm.kill()
10161021
vm.jailer.cleanup()
1017-
if len(vm.jailer.jailer_id) > 0:
1018-
shutil.rmtree(vm.jailer.chroot_base_with_id())
1022+
chroot_base_with_id = vm.jailer.chroot_base_with_id()
1023+
if len(vm.jailer.jailer_id) > 0 and chroot_base_with_id.exists():
1024+
shutil.rmtree(chroot_base_with_id)
10191025
vm.netns.cleanup()
10201026

10211027
self.vms.clear()
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
"""Test that the no dirty pages lost in case of error during snapshot creation."""
4+
5+
import subprocess
6+
from pathlib import Path
7+
8+
import pytest
9+
10+
11+
@pytest.fixture
12+
def mount_tmpfs_small(worker_id):
13+
"""Mount a small tmpfs and return its path"""
14+
mnt_path = Path(f"/mnt/{worker_id}")
15+
mnt_path.mkdir(parents=True)
16+
subprocess.check_call(
17+
["mount", "-o", "size=512M", "-t", "tmpfs", "none", str(mnt_path)]
18+
)
19+
try:
20+
yield mnt_path
21+
finally:
22+
subprocess.check_call(["umount", mnt_path])
23+
mnt_path.rmdir()
24+
25+
26+
def test_diff_snapshot_works_after_error(
27+
microvm_factory, guest_kernel_linux_5_10, rootfs_ubuntu_22, mount_tmpfs_small
28+
):
29+
"""
30+
Test that if a partial snapshot errors it will work after and not lose data
31+
"""
32+
uvm = microvm_factory.build(
33+
guest_kernel_linux_5_10,
34+
rootfs_ubuntu_22,
35+
jailer_kwargs={"chroot_base": mount_tmpfs_small},
36+
)
37+
38+
vm_mem_size = 128
39+
uvm.spawn()
40+
uvm.basic_config(mem_size_mib=vm_mem_size, track_dirty_pages=True)
41+
uvm.add_net_iface()
42+
uvm.start()
43+
uvm.wait_for_up()
44+
45+
chroot = Path(uvm.chroot())
46+
47+
# Create a large file, so we run out of space (ENOSPC) during the snapshot
48+
# Assumes a Docker /srv tmpfs of 1G, derived by trial and error
49+
fill = chroot / "fill"
50+
subprocess.check_call(f"fallocate -l 330M {fill}", shell=True)
51+
52+
try:
53+
uvm.snapshot_diff()
54+
except RuntimeError:
55+
msg = "No space left on device"
56+
uvm.check_log_message(msg)
57+
else:
58+
assert False, "This should fail"
59+
60+
fill.unlink()
61+
62+
# Now there is enough space for it to work
63+
snap2 = uvm.snapshot_diff()
64+
65+
vm2 = microvm_factory.build()
66+
vm2.spawn()
67+
vm2.restore_from_snapshot(snap2, resume=True)
68+
vm2.wait_for_up()
69+
70+
uvm.kill()

0 commit comments

Comments
 (0)