firecracker-microvm · bchalios · Oct 8, 2024 · Sep 3, 2024 · Sep 3, 2024 · Sep 3, 2024
@@ -6,6 +6,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to
 [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## \[Unreleased\]
+
+### Fixed
+
+- [#4836](https://github.com/firecracker-microvm/firecracker/pull/4836): Fixed
+  Vsock not notifying guest about `TRANSPORT_RESET_EVENT` event after snapshot
+  restore. This resulted in guest waiting indefinitely on a connection which was
+  reset during snapshot creation.
+
 ## \[1.9.1\]
 
 ### Fixed

diff --git a/src/vmm/src/device_manager/mmio.rs b/src/vmm/src/device_manager/mmio.rs
@@ -33,7 +33,7 @@
 use crate::devices::virtio::mmio::MmioTransport;
 use crate::devices::virtio::net::Net;
 use crate::devices::virtio::rng::Entropy;
-use crate::devices::virtio::vsock::TYPE_VSOCK;
+use crate::devices::virtio::vsock::{Vsock, VsockUnixBackend, TYPE_VSOCK};
 use crate::devices::virtio::{TYPE_BALLOON, TYPE_BLOCK, TYPE_NET, TYPE_RNG};
 use crate::devices::BusDevice;
 #[cfg(target_arch = "x86_64")]
@@ -486,6 +486,16 @@
                         // so for Vsock we don't support connection persistence through snapshot.
                         // Any in-flight packets or events are simply lost.
                         // Vsock is restored 'empty'.
+                        // The only reason we still `kick` it is to make guest process
+                        // `TRANSPORT_RESET_EVENT` event we sent during snapshot creation.
+                        let vsock = virtio
+                            .as_mut_any()
+                            .downcast_mut::<Vsock<VsockUnixBackend>>()
+                            .unwrap();
+                        if vsock.is_activated() {
+                            info!("kick vsock {id}.");
+                            vsock.signal_used_queue().unwrap();
+                        }
                     }
                     TYPE_RNG => {
                         let entropy = virtio.as_mut_any().downcast_mut::<Entropy>().unwrap();

diff --git a/src/vmm/src/device_manager/persist.rs b/src/vmm/src/device_manager/persist.rs
@@ -365,11 +365,6 @@ impl<'a> Persist<'a> for MMIODeviceManager {
                         .downcast_mut::<Vsock<VsockUnixBackend>>()
                         .unwrap();
 
-                    let vsock_state = VsockState {
-                        backend: vsock.backend().save(),
-                        frontend: vsock.save(),
-                    };
-
                     // Send Transport event to reset connections if device
                     // is activated.
                     if vsock.is_activated() {
@@ -378,6 +373,13 @@ impl<'a> Persist<'a> for MMIODeviceManager {
                         });
                     }
 
+                    // Save state after potential notification to the guest. This
+                    // way we save changes to the queue the notification can cause.
+                    let vsock_state = VsockState {
+                        backend: vsock.backend().save(),
+                        frontend: vsock.save(),
+                    };
+
                     states.vsock_device = Some(ConnectedVsockState {
                         device_id: devid.clone(),
                         device_state: vsock_state,

diff --git a/tests/framework/microvm.py b/tests/framework/microvm.py
@@ -653,6 +653,8 @@ def spawn(
         # and leave 0.2 delay between them.
         if "no-api" not in self.jailer.extra_args:
             self._wait_create()
+        if "config-file" in self.jailer.extra_args and self.iface:
+            self.wait_for_up()
         if self.log_file and log_level in ("Trace", "Debug", "Info"):
             self.check_log_message("Running Firecracker")
 
@@ -876,6 +878,9 @@ def start(self):
         # Check that the VM has started
         assert self.state == "Running"
 
+        if self.iface:
+            self.wait_for_up()
+
     def pause(self):
         """Pauses the microVM"""
         self.api.vm.patch(state="Paused")
@@ -956,6 +961,9 @@ def restore_from_snapshot(
             enable_diff_snapshots=snapshot.is_diff,
             resume_vm=resume,
         )
+        # This is not a "wait for boot", but rather a "VM still works after restoration"
+        if snapshot.net_ifaces and resume:
+            self.wait_for_up()
         return jailed_snapshot
 
     def enable_entropy_device(self):

diff --git a/tests/host_tools/udp_offload.py b/tests/host_tools/udp_offload.py
@@ -0,0 +1,58 @@
+# Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""
+A utility for sending a UDP message with UDP oflload enabled.
+
+Inspired by the "TUN_F_CSUM is a must" chapter
+in https://blog.cloudflare.com/fr-fr/virtual-networking-101-understanding-tap/
+by Cloudflare.
+"""
+
+import socket
+import sys
+
+
+def eprint(*args, **kwargs):
+    """Print to stderr"""
+    print(*args, file=sys.stderr, **kwargs)
+
+
+# Define SOL_UDP and UDP_SEGMENT if not defined in the system headers
+try:
+    from socket import SOL_UDP, UDP_SEGMENT
+except ImportError:
+    SOL_UDP = 17  # Protocol number for UDP
+    UDP_SEGMENT = 103  # Option code for UDP segmentation (non-standard)
+
+# Get the IP and port from command-line arguments
+if len(sys.argv) != 3:
+    eprint("Usage: python3 udp_offload.py <ip_address> <port>")
+    sys.exit(1)
+
+ip_address = sys.argv[1]
+port = int(sys.argv[2])
+
+# Create a UDP socket
+sockfd = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+
+# Set the UDP segmentation option (UDP_SEGMENT) to 1400 bytes
+OPTVAL = 1400
+try:
+    sockfd.setsockopt(SOL_UDP, UDP_SEGMENT, OPTVAL)
+except (AttributeError, PermissionError):
+    eprint("Unable to set UDP_SEGMENT option")
+    sys.exit(1)
+
+# Set the destination address and port
+servaddr = (ip_address, port)
+
+# Send the message to the destination address
+MESSAGE = b"x"
+try:
+    sockfd.sendto(MESSAGE, servaddr)
+    print("Message sent successfully")
+except socket.error as e:
+    eprint(f"Error sending message: {e}")
+    sys.exit(1)
+
+sockfd.close()
diff --git a/tests/integration_tests/functional/test_api.py b/tests/integration_tests/functional/test_api.py
@@ -83,7 +83,6 @@ def test_drive_io_engine(uvm_plain):
         test_microvm.api.drive.put(**kwargs)
 
     test_microvm.start()
-    test_microvm.wait_for_up()
 
     assert test_microvm.api.vm_config.get().json()["drives"][0]["io_engine"] == "Sync"
 
@@ -1166,7 +1165,6 @@ def test_get_full_config_after_restoring_snapshot(microvm_factory, uvm_nano):
         }
     ]
 
-    uvm_nano.wait_for_up()
     snapshot = uvm_nano.snapshot_full()
     uvm2 = microvm_factory.build()
     uvm2.spawn()

diff --git a/tests/integration_tests/functional/test_balloon.py b/tests/integration_tests/functional/test_balloon.py
@@ -74,10 +74,8 @@ def make_guest_dirty_memory(ssh_connection, amount_mib=32):
             logger.error("while running: %s", cmd)
             logger.error("stdout: %s", stdout)
             logger.error("stderr: %s", stderr)
-
-        cmd = "cat /tmp/fillmem_output.txt"
     except TimeoutExpired:
-        # It's ok if this expires. Some times the SSH connection
+        # It's ok if this expires. Sometimes the SSH connection
         # gets killed by the OOM killer *after* the fillmem program
         # started. As a result, we can ignore timeouts here.
         pass
@@ -198,7 +196,6 @@ def test_deflate_on_oom(uvm_plain_any, deflate_on_oom):
 
     # Start the microvm.
     test_microvm.start()
-    test_microvm.wait_for_up()
     firecracker_pid = test_microvm.firecracker_pid
 
     # We get an initial reading of the RSS, then calculate the amount
@@ -243,7 +240,6 @@ def test_reinflate_balloon(uvm_plain_any):
 
     # Start the microvm.
     test_microvm.start()
-    test_microvm.wait_for_up()
     firecracker_pid = test_microvm.firecracker_pid
 
     # First inflate the balloon to free up the uncertain amount of memory
@@ -340,16 +336,27 @@ def test_stats(uvm_plain_any):
 
     # Add a memory balloon with stats enabled.
     test_microvm.api.balloon.put(
-        amount_mib=0, deflate_on_oom=True, stats_polling_interval_s=1
+        amount_mib=0,
+        deflate_on_oom=True,
+        stats_polling_interval_s=STATS_POLLING_INTERVAL_S,
     )
 
     # Start the microvm.
     test_microvm.start()
     firecracker_pid = test_microvm.firecracker_pid
 
+    # Give Firecracker enough time to poll the stats at least once post-boot
+    time.sleep(STATS_POLLING_INTERVAL_S * 2)
+
     # Get an initial reading of the stats.
     initial_stats = test_microvm.api.balloon_stats.get().json()
 
+    # Major faults happen when a page fault has to be satisfied from disk. They are not
+    # triggered by our `make_guest_dirty_memory` workload, as it uses MAP_ANONYMOUS, which
+    # only triggers minor faults. However, during the boot process, things are read from the
+    # rootfs, so we should at least see a non-zero number of major faults.
+    assert initial_stats["major_faults"] > 0
+
     # Dirty 10MB of pages.
     make_guest_dirty_memory(test_microvm.ssh, amount_mib=10)
     time.sleep(1)
@@ -359,7 +366,6 @@ def test_stats(uvm_plain_any):
     # Make sure that the stats catch the page faults.
     after_workload_stats = test_microvm.api.balloon_stats.get().json()
     assert initial_stats.get("minor_faults", 0) < after_workload_stats["minor_faults"]
-    assert initial_stats.get("major_faults", 0) < after_workload_stats["major_faults"]
 
     # Now inflate the balloon with 10MB of pages.
     test_microvm.api.balloon.patch(amount_mib=10)
@@ -482,8 +488,6 @@ def test_balloon_snapshot(microvm_factory, guest_kernel, rootfs):
     microvm.spawn()
     microvm.restore_from_snapshot(snapshot, resume=True)
 
-    microvm.wait_for_up()
-
     # Get the firecracker from snapshot pid, and open an ssh connection.
     firecracker_pid = microvm.firecracker_pid
 
@@ -520,24 +524,6 @@ def test_balloon_snapshot(microvm_factory, guest_kernel, rootfs):
     assert stats_after_snap["available_memory"] > latest_stats["available_memory"]
 
 
-def test_snapshot_compatibility(microvm_factory, guest_kernel, rootfs):
-    """
-    Test that the balloon serializes correctly.
-    """
-    vm = microvm_factory.build(guest_kernel, rootfs)
-    vm.spawn()
-    vm.basic_config(
-        vcpu_count=2,
-        mem_size_mib=256,
-    )
-
-    # Add a memory balloon with stats enabled.
-    vm.api.balloon.put(amount_mib=0, deflate_on_oom=True, stats_polling_interval_s=1)
-
-    vm.start()
-    vm.snapshot_full()
-
-
 def test_memory_scrub(microvm_factory, guest_kernel, rootfs):
     """
     Test that the memory is zeroed after deflate.

diff --git a/tests/integration_tests/functional/test_cmd_line_start.py b/tests/integration_tests/functional/test_cmd_line_start.py
@@ -7,7 +7,6 @@
 import platform
 import re
 import shutil
-import time
 from pathlib import Path
 
 import pytest
@@ -164,7 +163,6 @@ def test_config_start_no_api_exit(uvm_plain, vm_config_file):
     test_microvm.jailer.extra_args.update({"no-api": None})
 
     test_microvm.spawn()  # Start Firecracker and MicroVM
-    time.sleep(3)  # Wait for startup
     test_microvm.ssh.run("reboot")  # Exit
 
     test_microvm.mark_killed()  # waits for process to terminate
@@ -266,7 +264,7 @@ def test_config_start_with_limit(uvm_plain, vm_config_file):
     response += '{ "error": "Request payload with size 260 is larger than '
     response += "the limit of 250 allowed by server.\n"
     response += 'All previous unanswered requests will be dropped." }'
-    _, stdout, _stderr = utils.check_output(cmd)
+    _, stdout, _ = utils.check_output(cmd)
     assert stdout.encode("utf-8") == response.encode("utf-8")
 
 
@@ -421,8 +419,6 @@ def test_config_start_and_mmds_with_api(uvm_plain, vm_config_file):
     # Network namespace has already been created.
     test_microvm.spawn()
 
-    assert test_microvm.state == "Running"
-
     data_store = {
         "latest": {
             "meta-data": {"ami-id": "ami-12345678", "reservation-id": "r-fea54097"}
@@ -434,7 +430,7 @@ def test_config_start_and_mmds_with_api(uvm_plain, vm_config_file):
     assert response.json() == {}
 
     # Populate MMDS with data.
-    response = test_microvm.api.mmds.put(**data_store)
+    test_microvm.api.mmds.put(**data_store)
 
     # Ensure the MMDS contents have been successfully updated.
     response = test_microvm.api.mmds.get()

diff --git a/tests/integration_tests/functional/test_concurrency.py b/tests/integration_tests/functional/test_concurrency.py
@@ -19,7 +19,6 @@ def launch1():
         microvm.basic_config(vcpu_count=1, mem_size_mib=128)
         microvm.add_net_iface()
         microvm.start()
-        microvm.wait_for_up()
 
     with ThreadPoolExecutor(max_workers=NO_OF_MICROVMS) as tpe:
         for _ in range(NO_OF_MICROVMS):

diff --git a/tests/integration_tests/functional/test_cpu_features.py b/tests/integration_tests/functional/test_cpu_features.py
@@ -672,7 +672,6 @@ def test_cpu_template(uvm_plain_any, cpu_template, microvm_factory):
     restored_vm = microvm_factory.build()
     restored_vm.spawn()
     restored_vm.restore_from_snapshot(snapshot, resume=True)
-    restored_vm.wait_for_up()
     check_masked_features(restored_vm, cpu_template)
     check_enabled_features(restored_vm, cpu_template)
 

diff --git a/tests/integration_tests/functional/test_cpu_features_aarch64.py b/tests/integration_tests/functional/test_cpu_features_aarch64.py
@@ -113,7 +113,6 @@ def test_cpu_features_with_static_template(
     restored_vm = microvm_factory.build()
     restored_vm.spawn()
     restored_vm.restore_from_snapshot(snapshot, resume=True)
-    restored_vm.wait_for_up()
     _check_cpu_features_arm(restored_vm, guest_kv, "v1n1")
 
 
@@ -143,5 +142,4 @@ def test_cpu_features_with_custom_template(
     restored_vm = microvm_factory.build()
     restored_vm.spawn()
     restored_vm.restore_from_snapshot(snapshot, resume=True)
-    restored_vm.wait_for_up()
     _check_cpu_features_arm(restored_vm, guest_kv, custom_cpu_template["name"])
diff --git a/tests/integration_tests/functional/test_dirty_pages_in_full_snapshot.py b/tests/integration_tests/functional/test_dirty_pages_in_full_snapshot.py
@@ -14,7 +14,6 @@ def test_dirty_pages_after_full_snapshot(uvm_plain):
     uvm.basic_config(mem_size_mib=vm_mem_size, track_dirty_pages=True)
     uvm.add_net_iface()
     uvm.start()
-    uvm.wait_for_up()
 
     snap_full = uvm.snapshot_full(vmstate_path="vmstate_full", mem_path="mem_full")
     snap_diff = uvm.snapshot_diff(vmstate_path="vmstate_diff", mem_path="mem_diff")

diff --git a/tests/integration_tests/functional/test_drive_vhost_user.py b/tests/integration_tests/functional/test_drive_vhost_user.py
@@ -56,7 +56,6 @@ def test_vhost_user_block(microvm_factory, guest_kernel, rootfs_ubuntu_22):
         "vhost_user_block", 1, aggr_supported=False
     )
     vm.start()
-    vm.wait_for_up()
 
     # Now check that vhost-user-block with rw is last.
     # 1-0 means line 1, column 0.
@@ -91,7 +90,6 @@ def test_vhost_user_block_read_write(microvm_factory, guest_kernel, rootfs_ubunt
     vm.add_vhost_user_drive("rootfs", rootfs_rw, is_root_device=True)
     vm.add_net_iface()
     vm.start()
-    vm.wait_for_up()
 
     # Now check that vhost-user-block with rw is last.
     # 1-0 means line 1, column 0.
@@ -120,7 +118,6 @@ def test_vhost_user_block_disconnect(microvm_factory, guest_kernel, rootfs_ubunt
     )
     vm.add_net_iface()
     vm.start()
-    vm.wait_for_up()
 
     # Killing the backend
     vm.disks_vhost_user["rootfs"].kill()
@@ -231,7 +228,6 @@ def test_partuuid_boot(
     )
     vm.add_net_iface()
     vm.start()
-    vm.wait_for_up()
 
     # Now check that vhost-user-block with rw is last.
     # 1-0 means line 1, column 0.
@@ -275,7 +271,6 @@ def test_partuuid_update(microvm_factory, guest_kernel, rootfs_ubuntu_22):
         "vhost_user_block", 1, aggr_supported=False
     )
     vm.start()
-    vm.wait_for_up()
 
     # Now check that vhost-user-block with rw is last.
     # 1-0 means line 1, column 0.

diff --git a/tests/integration_tests/functional/test_mmds.py b/tests/integration_tests/functional/test_mmds.py
@@ -94,7 +94,6 @@ def _validate_mmds_snapshot(
     microvm = microvm_factory.build(**kwargs)
     microvm.spawn()
     microvm.restore_from_snapshot(snapshot, resume=True)
-    microvm.wait_for_up()
 
     ssh_connection = microvm.ssh