Skip to content

Commit 21b1d44

Browse files
odesenfansaliel
andauthored
Fix: Graceful QEMU shutdown escalation to prevent disk corruption (#925)
* Fix: Graceful QEMU shutdown escalation to prevent disk corruption QemuVM.stop() previously sent an ACPI powerdown and returned immediately, leaving the 30s systemd SIGKILL as the only fallback. A SIGKILL terminates QEMU without flushing disk caches, which can corrupt qcow2 metadata and guest filesystems (e.g. missing kernel files after an in-guest apt upgrade). The new shutdown sequence: t=0s ACPI system_powerdown (guest handles clean shutdown) t=50s QMP "quit" (QEMU flushes block device caches and exits) t=60s systemd SIGKILL (last resort) * qemu: wait for process exit after QMP quit before returning from stop() --------- Co-authored-by: Ali EL BROUDI <ali.elb@gmail.com>
1 parent a0bdae7 commit 21b1d44

2 files changed

Lines changed: 65 additions & 7 deletions

File tree

packaging/aleph-vm/etc/systemd/system/aleph-vm-controller@.service

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@ Environment=PYTHONPATH=/opt/aleph-vm/:$PYTHONPATH
1212
ExecStart=/usr/bin/python3 -m aleph.vm.controllers --config=/var/lib/aleph/vm/%i-controller.json
1313
Restart=on-failure
1414
# KillMode=Mixed is used so initially only the Python controller process receives the SIGTERM signal.
15-
# The controller catches it and sends a QEMU command to shut down the Guest VM, allowing it to clean up
16-
# properly and avoid disk corruption.
17-
# After 30s (TimeoutStopSec), if the process is still running, both the controller and subprocesses receive SIGKILL.
15+
# The controller catches it and sends an ACPI powerdown to the Guest VM, giving it time to shut down
16+
# cleanly. If the guest does not respond within 50s, the controller sends QMP "quit" which makes
17+
# QEMU flush its disk caches before exiting. The 60s SIGKILL is a last resort to avoid hung processes.
1818
KillMode=mixed
19-
TimeoutStopSec=30
19+
TimeoutStopSec=60
2020

2121
[Install]
2222
WantedBy=multi-user.target

src/aleph/vm/hypervisors/qemu/qemuvm.py

Lines changed: 61 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@
1212

1313
logger = logging.getLogger(__name__)
1414

15+
# Seconds to wait for guest ACPI shutdown before escalating to QMP quit.
16+
# Must be shorter than systemd TimeoutStopSec (60s) to leave time for
17+
# QEMU to flush disk caches via "quit" before the SIGKILL deadline.
18+
GRACEFUL_SHUTDOWN_TIMEOUT = 50
19+
1520

1621
@dataclass
1722
class HostVolume:
@@ -229,11 +234,64 @@ def send_shutdown_message(self):
229234
logger.info("Shutdown message sent to %s", self.vm_hash)
230235
client.close()
231236

232-
async def stop(self):
233-
"""Stop the VM."""
234-
self.send_shutdown_message()
237+
def _send_qmp_quit(self):
238+
"""Tell QEMU to exit cleanly, flushing disk caches before terminating."""
239+
client = self._get_qmpclient()
240+
if client:
241+
try:
242+
client.command("quit")
243+
logger.info("Sent QMP quit to VM %s", self.vm_hash)
244+
except Exception:
245+
logger.warning("QMP quit failed for VM %s", self.vm_hash, exc_info=True)
246+
finally:
247+
client.close()
235248

249+
def _close_journals(self):
236250
if self.journal_stdout and self.journal_stdout != asyncio.subprocess.DEVNULL:
237251
self.journal_stdout.close()
238252
if self.journal_stderr and self.journal_stderr != asyncio.subprocess.DEVNULL:
239253
self.journal_stderr.close()
254+
255+
async def stop(self):
256+
"""Stop the VM with graceful shutdown escalation.
257+
258+
Sends an ACPI powerdown and waits for the guest to shut down cleanly.
259+
If the guest does not stop in time, sends QMP "quit" which makes QEMU
260+
flush its disk caches and exit — avoiding the qcow2 corruption that
261+
a SIGKILL would cause.
262+
"""
263+
self.send_shutdown_message()
264+
265+
if self.qemu_process:
266+
try:
267+
await asyncio.wait_for(self.qemu_process.wait(), timeout=GRACEFUL_SHUTDOWN_TIMEOUT)
268+
logger.info("VM %s shut down gracefully after ACPI powerdown", self.vm_hash)
269+
self._close_journals()
270+
return
271+
except asyncio.TimeoutError:
272+
logger.warning(
273+
"VM %s did not shut down within %ds, sending QMP quit",
274+
self.vm_hash,
275+
GRACEFUL_SHUTDOWN_TIMEOUT,
276+
)
277+
278+
# Guest didn't respond to ACPI — tell QEMU to exit cleanly.
279+
# Unlike SIGKILL, "quit" flushes block device caches first.
280+
self._send_qmp_quit()
281+
282+
# Wait for QEMU to finish flushing and exit before returning,
283+
# so callers that tear down network/tap/nftables after stop()
284+
# don't race with a still-running qemu process.
285+
if self.qemu_process:
286+
remaining = 60 - GRACEFUL_SHUTDOWN_TIMEOUT
287+
try:
288+
await asyncio.wait_for(self.qemu_process.wait(), timeout=remaining)
289+
logger.info("VM %s exited after QMP quit", self.vm_hash)
290+
except asyncio.TimeoutError:
291+
logger.warning(
292+
"VM %s still running %ds after QMP quit, " "systemd SIGKILL will handle it",
293+
self.vm_hash,
294+
remaining,
295+
)
296+
297+
self._close_journals()

0 commit comments

Comments
 (0)