Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 17 additions & 16 deletions tests/framework/microvm.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,9 @@ def kill(self):
stderr == "" and "firecracker" not in stdout
), f"Firecracker reported its pid {self.firecracker_pid}, which was killed, but there still exist processes using the supposedly dead Firecracker's jailer_id: {stdout}"

if self.uffd_handler and self.uffd_handler.is_running():
self.uffd_handler.kill()

# Mark the microVM as not spawned, so we avoid trying to kill twice.
self._spawned = False
self._killed = True
Expand Down Expand Up @@ -975,19 +978,22 @@ def snapshot_full(self, *, mem_path: str = "mem", vmstate_path="vmstate"):

def restore_from_snapshot(
self,
snapshot: Snapshot = None,
snapshot: Snapshot,
resume: bool = False,
rename_interfaces: dict = None,
*,
uffd_handler_name: str = None,
):
"""Restore a snapshot"""
if self.uffd_handler is None:
assert (
snapshot is not None
), "snapshot file must be provided if no uffd handler is attached!"

jailed_snapshot = snapshot.copy_to_chroot(Path(self.chroot()))
else:
jailed_snapshot = self.uffd_handler.snapshot
jailed_snapshot = snapshot.copy_to_chroot(Path(self.chroot()))

if uffd_handler_name:
self.uffd_handler = spawn_pf_handler(
self,
uffd_handler(uffd_handler_name, binary_dir=self.fc_binary_path.parent),
jailed_snapshot,
)

jailed_mem = Path("/") / jailed_snapshot.mem.name
jailed_vmstate = Path("/") / jailed_snapshot.vmstate.name
Expand Down Expand Up @@ -1180,14 +1186,9 @@ def build_n_from_snapshot(
microvm = self.build()
microvm.spawn()

if uffd_handler_name is not None:
spawn_pf_handler(
microvm,
uffd_handler(uffd_handler_name, binary_dir=self.binary_path),
current_snapshot,
)

snapshot_copy = microvm.restore_from_snapshot(current_snapshot, resume=True)
snapshot_copy = microvm.restore_from_snapshot(
current_snapshot, resume=True, uffd_handler_name=uffd_handler_name
)

yield microvm

Expand Down
20 changes: 15 additions & 5 deletions tests/framework/utils_uffd.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,16 +77,27 @@ def log_data(self):
return ""
return self.log_file.read_text(encoding="utf-8")

def kill(self):
"""Kills the uffd handler process"""
assert self.is_running()

self.proc.kill()

def mark_killed(self):
"""Marks the uffd handler as already dead"""
assert not self.is_running()

self._proc = None

def __del__(self):
"""Tear down the UFFD handler process."""
if self.proc is not None:
self.proc.kill()
if self.is_running():
self.kill()


def spawn_pf_handler(vm, handler_path, snapshot):
def spawn_pf_handler(vm, handler_path, jailed_snapshot):
"""Spawn page fault handler process."""
# Copy snapshot memory file into chroot of microVM.
jailed_snapshot = snapshot.copy_to_chroot(Path(vm.chroot()))
# Copy the valid page fault binary into chroot of microVM.
jailed_handler = vm.create_jailed_resource(handler_path)
handler_name = os.path.basename(jailed_handler)
Expand All @@ -95,7 +106,6 @@ def spawn_pf_handler(vm, handler_path, snapshot):
handler_name, SOCKET_PATH, jailed_snapshot, vm.chroot(), "uffd.log"
)
uffd_handler.spawn(vm.jailer.uid, vm.jailer.gid)
vm.uffd_handler = uffd_handler

return uffd_handler

Expand Down
11 changes: 9 additions & 2 deletions tests/host_tools/fcmetrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import datetime
import json
import logging
import math
import platform
import time
Expand Down Expand Up @@ -544,8 +545,14 @@ def stop(self):
# this should also avoid any race condition leading to
# uploading the same metrics twice
self.join()
self.vm.api.actions.put(action_type="FlushMetrics")
self._flush_metrics()
try:
self.vm.api.actions.put(action_type="FlushMetrics")
except: # pylint: disable=bare-except
# if this doesn't work, ignore the failure. This function is called during teardown,
# and if it fails there, then the resulting exception hides the actual test failure.
logging.error("Failed to flush Firecracker metrics!")
finally:
self._flush_metrics()

def run(self):
self.running = True
Expand Down
16 changes: 5 additions & 11 deletions tests/integration_tests/functional/test_uffd.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import requests

from framework.utils import Timeout, check_output
from framework.utils_uffd import spawn_pf_handler, uffd_handler


@pytest.fixture(scope="function", name="snapshot")
Expand Down Expand Up @@ -90,11 +89,7 @@ def test_valid_handler(uvm_plain, snapshot):
vm = uvm_plain
vm.memory_monitor = None
vm.spawn()

# Spawn page fault handler process.
spawn_pf_handler(vm, uffd_handler("on_demand"), snapshot)

vm.restore_from_snapshot(resume=True)
vm.restore_from_snapshot(snapshot, resume=True, uffd_handler_name="on_demand")

# Inflate balloon.
vm.api.balloon.patch(amount_mib=200)
Expand Down Expand Up @@ -124,14 +119,13 @@ def test_malicious_handler(uvm_plain, snapshot):
vm.memory_monitor = None
vm.spawn()

# Spawn page fault handler process.
spawn_pf_handler(vm, uffd_handler("malicious"), snapshot)

# We expect Firecracker to freeze while resuming from a snapshot
# due to the malicious handler's unavailability.
try:
with Timeout(seconds=30):
vm.restore_from_snapshot(resume=True)
vm.restore_from_snapshot(
snapshot, resume=True, uffd_handler_name="malicious"
)
assert False, "Firecracker should freeze"
except (TimeoutError, requests.exceptions.ReadTimeout):
pass
vm.uffd_handler.mark_killed()
20 changes: 5 additions & 15 deletions tests/integration_tests/performance/test_huge_pages.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from framework.microvm import HugePagesConfig
from framework.properties import global_props
from framework.utils_ftrace import ftrace_events
from framework.utils_uffd import spawn_pf_handler, uffd_handler


def check_hugetlbfs_in_use(pid: int, allocation_name: str):
Expand Down Expand Up @@ -91,11 +90,7 @@ def test_hugetlbfs_snapshot(microvm_factory, guest_kernel_linux_5_10, rootfs):
### Restore Snapshot ###
vm = microvm_factory.build()
vm.spawn()

# Spawn page fault handler process.
spawn_pf_handler(vm, uffd_handler("on_demand"), snapshot)

vm.restore_from_snapshot(resume=True)
vm.restore_from_snapshot(snapshot, resume=True, uffd_handler_name="on_demand")

check_hugetlbfs_in_use(vm.firecracker_pid, "/anon_hugepage")

Expand Down Expand Up @@ -133,11 +128,9 @@ def test_hugetlbfs_diff_snapshot(microvm_factory, uvm_plain):

vm = microvm_factory.build()
vm.spawn()

# Spawn page fault handler process.
spawn_pf_handler(vm, uffd_handler("on_demand"), snapshot_merged)

vm.restore_from_snapshot(resume=True)
vm.restore_from_snapshot(
snapshot_merged, resume=True, uffd_handler_name="on_demand"
)

# Verify if the restored microvm works.

Expand Down Expand Up @@ -192,11 +185,8 @@ def test_ept_violation_count(
vm.jailer.extra_args.update({"no-seccomp": None})
vm.spawn()

# Spawn page fault handler process.
spawn_pf_handler(vm, uffd_handler("fault_all"), snapshot)

with ftrace_events("kvm:*"):
vm.restore_from_snapshot(resume=True)
vm.restore_from_snapshot(snapshot, resume=True, uffd_handler_name="fault_all")

# Verify if guest can run commands, and also wake up the fast page fault helper to trigger page faults.
vm.ssh.check_output(f"kill -s {signal.SIGUSR1} {pid}")
Expand Down