From 673001a1c1aab1a71269dca672feb85f9b0d2773 Mon Sep 17 00:00:00 2001
From: Ilias Stamatis <ilstam@amazon.com>
Date: Wed, 7 Jan 2026 12:00:04 +0000
Subject: [PATCH 1/2] refactor: Eliminate double call to Vmm::stop()

Commit 79c5f79c6f44e ("Shutdown VCPU threads so they can
thread::join()") introduced a Vmm::stop() function that should run in
the main VMM thread when a vCPU thread writes to the vcpus_exit_evt
eventfd. Vmm::stop() sends a termination event to the vCPU threads and
joins them. That commit claims that there was a cyclical dependency
where vCPU objects referenced the Vmm objects and therefore Vmm::stop()
could not be called from Vmm:drop().

Later, commit 1d45fcc810be2 ("vmm: fix drop logic for Vmm") introduced a
second call to Vmm::stop(), this time from Vmm::drop().

As a result, when killing the VMM today (by issuing a reboot in the
guest) Vmm::stop() is called twice and the "Vmm is stopping" message
appears twice in the firecracker logs.

Additionally, the documentation in Vmm:stop() is incorrect. Not all
teardown paths call vcpu.exit(). In fact, the most common teardown path
which is the guest asking for a CPU reset via the i8042 controller
writes to the corresponding eventfd directly and vcpu.exit() is never
called.

Today, the vCPU threads do not hold references to the Vmm object and
therefore it's fine to join the vCPU threads in Vmm::drop().

Eliminate the double call to Vmm::stop() (and the duplicate log message)
and move the vCPU thread termination and join logic to Vmm::drop().
Vmm::stop() now simply sets Vmm::shutdown_exit_code which causes the VMM
to break out of its main event loop and start the termination process.
Additionally, remove part of the documentation that was already
incorrect or becomes incorrect after this change.

Signed-off-by: Ilias Stamatis <ilstam@amazon.com>
---
 src/vmm/src/lib.rs         | 62 +++++++-------------------------------
 src/vmm/src/vstate/vcpu.rs | 20 ++----------
 2 files changed, 13 insertions(+), 69 deletions(-)

diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs
index 845ec3efd55..213f01b84f4 100644
--- a/src/vmm/src/lib.rs
+++ b/src/vmm/src/lib.rs
@@ -651,39 +651,8 @@ impl Vmm {
 
     /// Signals Vmm to stop and exit.
     pub fn stop(&mut self, exit_code: FcExitCode) {
-        // To avoid cycles, all teardown paths take the following route:
-        //   +------------------------+----------------------------+------------------------+
-        //   |        Vmm             |           Action           |           Vcpu         |
-        //   +------------------------+----------------------------+------------------------+
-        // 1 |                        |                            | vcpu.exit(exit_code)   |
-        // 2 |                        |                            | vcpu.exit_evt.write(1) |
-        // 3 |                        | <--- EventFd::exit_evt --- |                        |
-        // 4 | vmm.stop()             |                            |                        |
-        // 5 |                        | --- VcpuEvent::Finish ---> |                        |
-        // 6 |                        |                            | StateMachine::finish() |
-        // 7 | VcpuHandle::join()     |                            |                        |
-        // 8 | vmm.shutdown_exit_code becomes Some(exit_code) breaking the main event loop  |
-        //   +------------------------+----------------------------+------------------------+
-        // Vcpu initiated teardown starts from `fn Vcpu::exit()` (step 1).
-        // Vmm initiated teardown starts from `pub fn Vmm::stop()` (step 4).
-        // Once `vmm.shutdown_exit_code` becomes `Some(exit_code)`, it is the upper layer's
-        // responsibility to break main event loop and propagate the exit code value.
         info!("Vmm is stopping.");
 
-        // We send a "Finish" event.  If a VCPU has already exited, this is the only
-        // message it will accept... but running and paused will take it as well.
-        // It breaks out of the state machine loop so that the thread can be joined.
-        for (idx, handle) in self.vcpus_handles.iter_mut().enumerate() {
-            if let Err(err) = handle.send_event(VcpuEvent::Finish) {
-                error!("Failed to send VcpuEvent::Finish to vCPU {}: {}", idx, err);
-            }
-        }
-        // The actual thread::join() that runs to release the thread's resource is done in
-        // the VcpuHandle's Drop trait.  We can trigger that to happen now by clearing the
-        // list of handles. Do it here instead of Vmm::Drop to avoid dependency cycles.
-        // (Vmm's Drop will also check if this list is empty).
-        self.vcpus_handles.clear();
-
         // Break the main event loop, propagating the Vmm exit-code.
         self.shutdown_exit_code = Some(exit_code);
     }
@@ -722,26 +691,17 @@ fn construct_kvm_mpidrs(vcpu_states: &[VcpuState]) -> Vec<u64> {
 
 impl Drop for Vmm {
     fn drop(&mut self) {
-        // There are two cases when `drop()` is called:
-        // 1) before the Vmm has been mutexed and subscribed to the event manager, or
-        // 2) after the Vmm has been registered as a subscriber to the event manager.
-        //
-        // The first scenario is bound to happen if an error is raised during
-        // Vmm creation (for example, during snapshot load), before the Vmm has
-        // been subscribed to the event manager. If that happens, the `drop()`
-        // function is called right before propagating the error. In order to
-        // be able to gracefully exit Firecracker with the correct fault
-        // message, we need to prepare the Vmm contents for the tear down
-        // (join the vcpu threads). Explicitly calling `stop()` allows the
-        // Vmm to be successfully dropped and firecracker to propagate the
-        // error.
-        //
-        // In the second case, before dropping the Vmm object, the event
-        // manager calls `stop()`, which sends a `Finish` event to the vcpus
-        // and joins the vcpu threads. The Vmm is dropped after everything is
-        // ready to be teared down. The line below is a no-op, because the Vmm
-        // has already been stopped by the event manager at this point.
-        self.stop(self.shutdown_exit_code.unwrap_or(FcExitCode::Ok));
+        info!("Killing vCPU threads");
+
+        // Send a "Finish" event to the vCPU threads so that they terminate.
+        for (idx, handle) in self.vcpus_handles.iter_mut().enumerate() {
+            if let Err(err) = handle.send_event(VcpuEvent::Finish) {
+                error!("Failed to send VcpuEvent::Finish to vCPU {}: {}", idx, err);
+            }
+        }
+
+        // Join the vCPU threads by running VcpuHandle::drop().
+        self.vcpus_handles.clear();
 
         if let Err(err) = std::io::stdin().lock().set_canon_mode() {
             warn!("Cannot set canonical mode for the terminal. {:?}", err);
diff --git a/src/vmm/src/vstate/vcpu.rs b/src/vmm/src/vstate/vcpu.rs
index 1efeee58a8d..3f844dc3644 100644
--- a/src/vmm/src/vstate/vcpu.rs
+++ b/src/vmm/src/vstate/vcpu.rs
@@ -362,25 +362,9 @@ impl Vcpu {
     }
 
     // Transition to the exited state and finish on command.
+    // Note that this function isn't called when the guest asks for a CPU
+    // reset via the i8042 controller on x86.
     fn exit(&mut self, exit_code: FcExitCode) -> StateMachine<Self> {
-        // To avoid cycles, all teardown paths take the following route:
-        //   +------------------------+----------------------------+------------------------+
-        //   |        Vmm             |           Action           |           Vcpu         |
-        //   +------------------------+----------------------------+------------------------+
-        // 1 |                        |                            | vcpu.exit(exit_code)   |
-        // 2 |                        |                            | vcpu.exit_evt.write(1) |
-        // 3 |                        | <--- EventFd::exit_evt --- |                        |
-        // 4 | vmm.stop()             |                            |                        |
-        // 5 |                        | --- VcpuEvent::Finish ---> |                        |
-        // 6 |                        |                            | StateMachine::finish() |
-        // 7 | VcpuHandle::join()     |                            |                        |
-        // 8 | vmm.shutdown_exit_code becomes Some(exit_code) breaking the main event loop  |
-        //   +------------------------+----------------------------+------------------------+
-        // Vcpu initiated teardown starts from `fn Vcpu::exit()` (step 1).
-        // Vmm initiated teardown starts from `pub fn Vmm::stop()` (step 4).
-        // Once `vmm.shutdown_exit_code` becomes `Some(exit_code)`, it is the upper layer's
-        // responsibility to break main event loop and propagate the exit code value.
-        // Signal Vmm of Vcpu exit.
         if let Err(err) = self.exit_evt.write(1) {
             METRICS.vcpu.failures.inc();
             error!("Failed signaling vcpu exit event: {}", err);

From ad9f5bdb089d48425634c7c0c126aaf32bb7de91 Mon Sep 17 00:00:00 2001
From: Ilias Stamatis <ilstam@amazon.com>
Date: Wed, 7 Jan 2026 15:15:19 +0000
Subject: [PATCH 2/2] fix(vcpu): Don't treat KVM_EXIT_{SHUTDOWN,HLT} as
 successful termination

This is almost a pure revert of commit 3a9a1ac0b8c0 ("exit with success
code on certain KVM_EXIT events") which added code that treats
KVM_EXIT_SHUTDOWN and KVM_EXIT_HLT as successful VM terminations.

KVM_EXIT_SHUTDOWN is an exit code that KVM uses when an x86 CPU triple
faults.

KVM_EXIT_HLT is the exit code that KVM uses when the guest executes a
HALT x86 instruction and KVM doesn't emulate the irqchip. Since we're
using the in-kernel irqchip we should never see a KVM_EXIT_HLT exit, as
HALT instructions are emulated by KVM and do not cause userspace exits.

Do not return Ok(VcpuEmulation::Stopped) for these exit types since that
ends up propagating an FcExitCode::Ok code to the main thread, even
though these are abnormal terminations (especially the triple-fault
one).

Remove special handling for these x86-specific exit reasons and treat
them as any other unexpected exit reason.

Also, replace an incorrect comment that says that vCPUs exit with
KVM_EXIT_SHUTDOWN or KVM_EXIT_HLT when the guest issues a reboot. On x86
the guest asks for a CPU reset via the i8042 controller which
Firecracker intercepts and kills the VM. On ARM KVM exits to userspace
with the reason KVM_EXIT_SYSTEM_EVENT (which we already handle
correctly).

Since we're in the neighbourhood get rid of a stale (8 year old) TODO
comment questioning whether we should kill the VM when we get an
unexpected KVM exit reason. Terminating the VM as we have been doing is
completely reasonable.

Fixes: 3a9a1ac0b8c0 ("exit with success code on certain KVM_EXIT events")
Signed-off-by: Ilias Stamatis <ilstam@amazon.com>
---
 src/vmm/src/arch/x86_64/vcpu.rs |  2 --
 src/vmm/src/vstate/vcpu.rs      | 25 +++++++++++--------------
 2 files changed, 11 insertions(+), 16 deletions(-)

diff --git a/src/vmm/src/arch/x86_64/vcpu.rs b/src/vmm/src/arch/x86_64/vcpu.rs
index c2c5352d5e8..64bff7fb0d2 100644
--- a/src/vmm/src/arch/x86_64/vcpu.rs
+++ b/src/vmm/src/arch/x86_64/vcpu.rs
@@ -746,8 +746,6 @@ impl Peripherals {
             }
             unexpected_exit => {
                 METRICS.vcpu.failures.inc();
-                // TODO: Are we sure we want to finish running a vcpu upon
-                // receiving a vm exit that is not necessarily an error?
                 error!("Unexpected exit reason on vcpu run: {:?}", unexpected_exit);
                 Err(VcpuError::UnhandledKvmExit(format!(
                     "{:?}",
diff --git a/src/vmm/src/vstate/vcpu.rs b/src/vmm/src/vstate/vcpu.rs
index 3f844dc3644..8380396b0e9 100644
--- a/src/vmm/src/vstate/vcpu.rs
+++ b/src/vmm/src/vstate/vcpu.rs
@@ -226,10 +226,9 @@ impl Vcpu {
                 Ok(VcpuEmulation::Handled) => (),
                 // Emulation was interrupted, check external events.
                 Ok(VcpuEmulation::Interrupted) => break,
-                // If the guest was rebooted or halted:
-                // - vCPU0 will always exit out of `KVM_RUN` with KVM_EXIT_SHUTDOWN or KVM_EXIT_HLT.
-                // - the other vCPUs won't ever exit out of `KVM_RUN`, but they won't consume CPU.
-                // So we pause vCPU0 and send a signal to the emulation thread to stop the VMM.
+                // The guest requested a SHUTDOWN or RESET. This is ARM
+                // specific. On x86 the i8042 emulation signals the main thread
+                // directly without calling Vcpu::exit().
                 Ok(VcpuEmulation::Stopped) => return self.exit(FcExitCode::Ok),
                 // If the emulation requests a pause lets do this
                 #[cfg(feature = "gdb")]
@@ -440,14 +439,6 @@ fn handle_kvm_exit(
                 }
                 Ok(VcpuEmulation::Handled)
             }
-            VcpuExit::Hlt => {
-                info!("Received KVM_EXIT_HLT signal");
-                Ok(VcpuEmulation::Stopped)
-            }
-            VcpuExit::Shutdown => {
-                info!("Received KVM_EXIT_SHUTDOWN signal");
-                Ok(VcpuEmulation::Stopped)
-            }
             // Documentation specifies that below kvm exits are considered
             // errors.
             VcpuExit::FailEntry(hardware_entry_failure_reason, cpu) => {
@@ -697,10 +688,16 @@ pub(crate) mod tests {
     fn test_handle_kvm_exit() {
         let (_, _, mut vcpu) = setup_vcpu(0x1000);
         let res = handle_kvm_exit(&mut vcpu.kvm_vcpu.peripherals, Ok(VcpuExit::Hlt));
-        assert_eq!(res.unwrap(), VcpuEmulation::Stopped);
+        assert!(matches!(
+            res,
+            Err(EmulationError::UnhandledKvmExit(s)) if s == "Hlt",
+        ));
 
         let res = handle_kvm_exit(&mut vcpu.kvm_vcpu.peripherals, Ok(VcpuExit::Shutdown));
-        assert_eq!(res.unwrap(), VcpuEmulation::Stopped);
+        assert!(matches!(
+            res,
+            Err(EmulationError::UnhandledKvmExit(s)) if s == "Shutdown",
+        ));
 
         let res = handle_kvm_exit(
             &mut vcpu.kvm_vcpu.peripherals,