Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to
[Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## \[Unreleased\]

### Fixed

- [#4836](https://github.com/firecracker-microvm/firecracker/pull/4836): Fixed
Vsock not notifying guest about `TRANSPORT_RESET_EVENT` event after snapshot
restore. This resulted in guest waiting indefinitely on a connection which was
reset during snapshot creation.

## \[1.9.1\]

### Fixed
Expand Down
12 changes: 11 additions & 1 deletion src/vmm/src/device_manager/mmio.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
use crate::devices::virtio::mmio::MmioTransport;
use crate::devices::virtio::net::Net;
use crate::devices::virtio::rng::Entropy;
use crate::devices::virtio::vsock::TYPE_VSOCK;
use crate::devices::virtio::vsock::{Vsock, VsockUnixBackend, TYPE_VSOCK};
use crate::devices::virtio::{TYPE_BALLOON, TYPE_BLOCK, TYPE_NET, TYPE_RNG};
use crate::devices::BusDevice;
#[cfg(target_arch = "x86_64")]
Expand Down Expand Up @@ -486,6 +486,16 @@
// so for Vsock we don't support connection persistence through snapshot.
// Any in-flight packets or events are simply lost.
// Vsock is restored 'empty'.
// The only reason we still `kick` it is to make guest process
// `TRANSPORT_RESET_EVENT` event we sent during snapshot creation.
let vsock = virtio
.as_mut_any()
.downcast_mut::<Vsock<VsockUnixBackend>>()
.unwrap();
if vsock.is_activated() {
info!("kick vsock {id}.");
vsock.signal_used_queue().unwrap();
}

Check warning on line 498 in src/vmm/src/device_manager/mmio.rs

View check run for this annotation

Codecov / codecov/patch

src/vmm/src/device_manager/mmio.rs#L491-L498

Added lines #L491 - L498 were not covered by tests
}
TYPE_RNG => {
let entropy = virtio.as_mut_any().downcast_mut::<Entropy>().unwrap();
Expand Down
12 changes: 7 additions & 5 deletions src/vmm/src/device_manager/persist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -365,11 +365,6 @@ impl<'a> Persist<'a> for MMIODeviceManager {
.downcast_mut::<Vsock<VsockUnixBackend>>()
.unwrap();

let vsock_state = VsockState {
backend: vsock.backend().save(),
frontend: vsock.save(),
};

// Send Transport event to reset connections if device
// is activated.
if vsock.is_activated() {
Expand All @@ -378,6 +373,13 @@ impl<'a> Persist<'a> for MMIODeviceManager {
});
}

// Save state after potential notification to the guest. This
// way we save changes to the queue the notification can cause.
let vsock_state = VsockState {
backend: vsock.backend().save(),
frontend: vsock.save(),
};

states.vsock_device = Some(ConnectedVsockState {
device_id: devid.clone(),
device_state: vsock_state,
Expand Down
8 changes: 8 additions & 0 deletions tests/framework/microvm.py
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,8 @@ def spawn(
# and leave 0.2 delay between them.
if "no-api" not in self.jailer.extra_args:
self._wait_create()
if "config-file" in self.jailer.extra_args and self.iface:
self.wait_for_up()
if self.log_file and log_level in ("Trace", "Debug", "Info"):
self.check_log_message("Running Firecracker")

Expand Down Expand Up @@ -876,6 +878,9 @@ def start(self):
# Check that the VM has started
assert self.state == "Running"

if self.iface:
self.wait_for_up()

def pause(self):
"""Pauses the microVM"""
self.api.vm.patch(state="Paused")
Expand Down Expand Up @@ -956,6 +961,9 @@ def restore_from_snapshot(
enable_diff_snapshots=snapshot.is_diff,
resume_vm=resume,
)
# This is not a "wait for boot", but rather a "VM still works after restoration"
if snapshot.net_ifaces and resume:
self.wait_for_up()
return jailed_snapshot

def enable_entropy_device(self):
Expand Down
58 changes: 58 additions & 0 deletions tests/host_tools/udp_offload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
"""
A utility for sending a UDP message with UDP oflload enabled.

Inspired by the "TUN_F_CSUM is a must" chapter
in https://blog.cloudflare.com/fr-fr/virtual-networking-101-understanding-tap/
by Cloudflare.
"""

import socket
import sys


def eprint(*args, **kwargs):
"""Print to stderr"""
print(*args, file=sys.stderr, **kwargs)


# Define SOL_UDP and UDP_SEGMENT if not defined in the system headers
try:
from socket import SOL_UDP, UDP_SEGMENT
except ImportError:
SOL_UDP = 17 # Protocol number for UDP
UDP_SEGMENT = 103 # Option code for UDP segmentation (non-standard)

# Get the IP and port from command-line arguments
if len(sys.argv) != 3:
eprint("Usage: python3 udp_offload.py <ip_address> <port>")
sys.exit(1)

ip_address = sys.argv[1]
port = int(sys.argv[2])

# Create a UDP socket
sockfd = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)

# Set the UDP segmentation option (UDP_SEGMENT) to 1400 bytes
OPTVAL = 1400
try:
sockfd.setsockopt(SOL_UDP, UDP_SEGMENT, OPTVAL)
except (AttributeError, PermissionError):
eprint("Unable to set UDP_SEGMENT option")
sys.exit(1)

# Set the destination address and port
servaddr = (ip_address, port)

# Send the message to the destination address
MESSAGE = b"x"
try:
sockfd.sendto(MESSAGE, servaddr)
print("Message sent successfully")
except socket.error as e:
eprint(f"Error sending message: {e}")
sys.exit(1)

sockfd.close()
2 changes: 0 additions & 2 deletions tests/integration_tests/functional/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@ def test_drive_io_engine(uvm_plain):
test_microvm.api.drive.put(**kwargs)

test_microvm.start()
test_microvm.wait_for_up()

assert test_microvm.api.vm_config.get().json()["drives"][0]["io_engine"] == "Sync"

Expand Down Expand Up @@ -1166,7 +1165,6 @@ def test_get_full_config_after_restoring_snapshot(microvm_factory, uvm_nano):
}
]

uvm_nano.wait_for_up()
snapshot = uvm_nano.snapshot_full()
uvm2 = microvm_factory.build()
uvm2.spawn()
Expand Down
40 changes: 13 additions & 27 deletions tests/integration_tests/functional/test_balloon.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,8 @@ def make_guest_dirty_memory(ssh_connection, amount_mib=32):
logger.error("while running: %s", cmd)
logger.error("stdout: %s", stdout)
logger.error("stderr: %s", stderr)

cmd = "cat /tmp/fillmem_output.txt"
except TimeoutExpired:
# It's ok if this expires. Some times the SSH connection
# It's ok if this expires. Sometimes the SSH connection
# gets killed by the OOM killer *after* the fillmem program
# started. As a result, we can ignore timeouts here.
pass
Expand Down Expand Up @@ -198,7 +196,6 @@ def test_deflate_on_oom(uvm_plain_any, deflate_on_oom):

# Start the microvm.
test_microvm.start()
test_microvm.wait_for_up()
firecracker_pid = test_microvm.firecracker_pid

# We get an initial reading of the RSS, then calculate the amount
Expand Down Expand Up @@ -243,7 +240,6 @@ def test_reinflate_balloon(uvm_plain_any):

# Start the microvm.
test_microvm.start()
test_microvm.wait_for_up()
firecracker_pid = test_microvm.firecracker_pid

# First inflate the balloon to free up the uncertain amount of memory
Expand Down Expand Up @@ -340,16 +336,27 @@ def test_stats(uvm_plain_any):

# Add a memory balloon with stats enabled.
test_microvm.api.balloon.put(
amount_mib=0, deflate_on_oom=True, stats_polling_interval_s=1
amount_mib=0,
deflate_on_oom=True,
stats_polling_interval_s=STATS_POLLING_INTERVAL_S,
)

# Start the microvm.
test_microvm.start()
firecracker_pid = test_microvm.firecracker_pid

# Give Firecracker enough time to poll the stats at least once post-boot
time.sleep(STATS_POLLING_INTERVAL_S * 2)

# Get an initial reading of the stats.
initial_stats = test_microvm.api.balloon_stats.get().json()

# Major faults happen when a page fault has to be satisfied from disk. They are not
# triggered by our `make_guest_dirty_memory` workload, as it uses MAP_ANONYMOUS, which
# only triggers minor faults. However, during the boot process, things are read from the
# rootfs, so we should at least see a non-zero number of major faults.
assert initial_stats["major_faults"] > 0

# Dirty 10MB of pages.
make_guest_dirty_memory(test_microvm.ssh, amount_mib=10)
time.sleep(1)
Expand All @@ -359,7 +366,6 @@ def test_stats(uvm_plain_any):
# Make sure that the stats catch the page faults.
after_workload_stats = test_microvm.api.balloon_stats.get().json()
assert initial_stats.get("minor_faults", 0) < after_workload_stats["minor_faults"]
assert initial_stats.get("major_faults", 0) < after_workload_stats["major_faults"]

# Now inflate the balloon with 10MB of pages.
test_microvm.api.balloon.patch(amount_mib=10)
Expand Down Expand Up @@ -482,8 +488,6 @@ def test_balloon_snapshot(microvm_factory, guest_kernel, rootfs):
microvm.spawn()
microvm.restore_from_snapshot(snapshot, resume=True)

microvm.wait_for_up()

# Get the firecracker from snapshot pid, and open an ssh connection.
firecracker_pid = microvm.firecracker_pid

Expand Down Expand Up @@ -520,24 +524,6 @@ def test_balloon_snapshot(microvm_factory, guest_kernel, rootfs):
assert stats_after_snap["available_memory"] > latest_stats["available_memory"]


def test_snapshot_compatibility(microvm_factory, guest_kernel, rootfs):
"""
Test that the balloon serializes correctly.
"""
vm = microvm_factory.build(guest_kernel, rootfs)
vm.spawn()
vm.basic_config(
vcpu_count=2,
mem_size_mib=256,
)

# Add a memory balloon with stats enabled.
vm.api.balloon.put(amount_mib=0, deflate_on_oom=True, stats_polling_interval_s=1)

vm.start()
vm.snapshot_full()


def test_memory_scrub(microvm_factory, guest_kernel, rootfs):
"""
Test that the memory is zeroed after deflate.
Expand Down
8 changes: 2 additions & 6 deletions tests/integration_tests/functional/test_cmd_line_start.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import platform
import re
import shutil
import time
from pathlib import Path

import pytest
Expand Down Expand Up @@ -164,7 +163,6 @@ def test_config_start_no_api_exit(uvm_plain, vm_config_file):
test_microvm.jailer.extra_args.update({"no-api": None})

test_microvm.spawn() # Start Firecracker and MicroVM
time.sleep(3) # Wait for startup
test_microvm.ssh.run("reboot") # Exit

test_microvm.mark_killed() # waits for process to terminate
Expand Down Expand Up @@ -266,7 +264,7 @@ def test_config_start_with_limit(uvm_plain, vm_config_file):
response += '{ "error": "Request payload with size 260 is larger than '
response += "the limit of 250 allowed by server.\n"
response += 'All previous unanswered requests will be dropped." }'
_, stdout, _stderr = utils.check_output(cmd)
_, stdout, _ = utils.check_output(cmd)
assert stdout.encode("utf-8") == response.encode("utf-8")


Expand Down Expand Up @@ -421,8 +419,6 @@ def test_config_start_and_mmds_with_api(uvm_plain, vm_config_file):
# Network namespace has already been created.
test_microvm.spawn()

assert test_microvm.state == "Running"

data_store = {
"latest": {
"meta-data": {"ami-id": "ami-12345678", "reservation-id": "r-fea54097"}
Expand All @@ -434,7 +430,7 @@ def test_config_start_and_mmds_with_api(uvm_plain, vm_config_file):
assert response.json() == {}

# Populate MMDS with data.
response = test_microvm.api.mmds.put(**data_store)
test_microvm.api.mmds.put(**data_store)

# Ensure the MMDS contents have been successfully updated.
response = test_microvm.api.mmds.get()
Expand Down
1 change: 0 additions & 1 deletion tests/integration_tests/functional/test_concurrency.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ def launch1():
microvm.basic_config(vcpu_count=1, mem_size_mib=128)
microvm.add_net_iface()
microvm.start()
microvm.wait_for_up()

with ThreadPoolExecutor(max_workers=NO_OF_MICROVMS) as tpe:
for _ in range(NO_OF_MICROVMS):
Expand Down
1 change: 0 additions & 1 deletion tests/integration_tests/functional/test_cpu_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -672,7 +672,6 @@ def test_cpu_template(uvm_plain_any, cpu_template, microvm_factory):
restored_vm = microvm_factory.build()
restored_vm.spawn()
restored_vm.restore_from_snapshot(snapshot, resume=True)
restored_vm.wait_for_up()
check_masked_features(restored_vm, cpu_template)
check_enabled_features(restored_vm, cpu_template)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ def test_cpu_features_with_static_template(
restored_vm = microvm_factory.build()
restored_vm.spawn()
restored_vm.restore_from_snapshot(snapshot, resume=True)
restored_vm.wait_for_up()
_check_cpu_features_arm(restored_vm, guest_kv, "v1n1")


Expand Down Expand Up @@ -143,5 +142,4 @@ def test_cpu_features_with_custom_template(
restored_vm = microvm_factory.build()
restored_vm.spawn()
restored_vm.restore_from_snapshot(snapshot, resume=True)
restored_vm.wait_for_up()
_check_cpu_features_arm(restored_vm, guest_kv, custom_cpu_template["name"])
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ def test_dirty_pages_after_full_snapshot(uvm_plain):
uvm.basic_config(mem_size_mib=vm_mem_size, track_dirty_pages=True)
uvm.add_net_iface()
uvm.start()
uvm.wait_for_up()

snap_full = uvm.snapshot_full(vmstate_path="vmstate_full", mem_path="mem_full")
snap_diff = uvm.snapshot_diff(vmstate_path="vmstate_diff", mem_path="mem_diff")
Expand Down
5 changes: 0 additions & 5 deletions tests/integration_tests/functional/test_drive_vhost_user.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ def test_vhost_user_block(microvm_factory, guest_kernel, rootfs_ubuntu_22):
"vhost_user_block", 1, aggr_supported=False
)
vm.start()
vm.wait_for_up()

# Now check that vhost-user-block with rw is last.
# 1-0 means line 1, column 0.
Expand Down Expand Up @@ -91,7 +90,6 @@ def test_vhost_user_block_read_write(microvm_factory, guest_kernel, rootfs_ubunt
vm.add_vhost_user_drive("rootfs", rootfs_rw, is_root_device=True)
vm.add_net_iface()
vm.start()
vm.wait_for_up()

# Now check that vhost-user-block with rw is last.
# 1-0 means line 1, column 0.
Expand Down Expand Up @@ -120,7 +118,6 @@ def test_vhost_user_block_disconnect(microvm_factory, guest_kernel, rootfs_ubunt
)
vm.add_net_iface()
vm.start()
vm.wait_for_up()

# Killing the backend
vm.disks_vhost_user["rootfs"].kill()
Expand Down Expand Up @@ -231,7 +228,6 @@ def test_partuuid_boot(
)
vm.add_net_iface()
vm.start()
vm.wait_for_up()

# Now check that vhost-user-block with rw is last.
# 1-0 means line 1, column 0.
Expand Down Expand Up @@ -275,7 +271,6 @@ def test_partuuid_update(microvm_factory, guest_kernel, rootfs_ubuntu_22):
"vhost_user_block", 1, aggr_supported=False
)
vm.start()
vm.wait_for_up()

# Now check that vhost-user-block with rw is last.
# 1-0 means line 1, column 0.
Expand Down
1 change: 0 additions & 1 deletion tests/integration_tests/functional/test_mmds.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@ def _validate_mmds_snapshot(
microvm = microvm_factory.build(**kwargs)
microvm.spawn()
microvm.restore_from_snapshot(snapshot, resume=True)
microvm.wait_for_up()

ssh_connection = microvm.ssh

Expand Down
Loading
Loading