Skip to content

Commit 6732053

Browse files
committed
Merge remote-tracking branch 'remotes/philmd-gitlab/tags/python-next-20200714' into staging
Python patches for 5.1 - Reduce race conditions on QEMUMachine::shutdown() 1. Remove the "bare except" pattern in the existing shutdown code, which can mask problems and make debugging difficult. 2. Ensure that post-shutdown cleanup is always performed, even when graceful termination fails. 3. Unify cleanup paths such that no matter how the VM is terminated, the same functions and steps are always taken to reset the object state. 4. Rewrite shutdown() such that any error encountered when attempting a graceful shutdown will be raised as an AbnormalShutdown exception. The pythonic idiom is to allow the caller to decide if this is a problem or not. - Modify part of the python/qemu library to comply with: . mypy --strict . pylint . flake8 - Script for the TCG Continuous Benchmarking project that uses callgrind to dissect QEMU execution into three main phases: . code generation . JIT execution . helpers execution CI jobs results: . https://cirrus-ci.com/build/5421349961203712 . https://gitlab.com/philmd/qemu/-/pipelines/166556001 . https://travis-ci.org/github/philmd/qemu/builds/708102347 # gpg: Signature made Tue 14 Jul 2020 21:40:05 BST # gpg: using RSA key FAABE75E12917221DCFD6BB2E3E32C2CDEADC0DE # gpg: Good signature from "Philippe Mathieu-Daudé (F4BUG) <[email protected]>" [full] # Primary key fingerprint: FAAB E75E 1291 7221 DCFD 6BB2 E3E3 2C2C DEAD C0DE * remotes/philmd-gitlab/tags/python-next-20200714: python/qmp.py: add QMPProtocolError python/qmp.py: add casts to JSON deserialization python/qmp.py: Do not return None from cmd_obj python/qmp.py: re-absorb MonitorResponseError iotests.py: use qemu.qmp type aliases python/qmp.py: Define common types python/machine.py: change default wait timeout to 3 seconds python/machine.py: re-add sigkill warning suppression python/machine.py: split shutdown into hard and soft flavors tests/acceptance: Don't test reboot on cubieboard tests/acceptance: wait() instead of shutdown() where appropriate python/machine.py: Make wait() call shutdown() python/machine.py: Add a configurable timeout to shutdown() python/machine.py: Prohibit multiple shutdown() calls python/machine.py: Perform early cleanup for wait() calls, too python/machine.py: Add _early_cleanup hook python/machine.py: Close QMP socket in cleanup python/machine.py: consolidate _post_shutdown() scripts/performance: Add dissect.py script Signed-off-by: Peter Maydell <[email protected]>
2 parents 3a9163a + 84dcdf0 commit 6732053

File tree

7 files changed

+368
-71
lines changed

7 files changed

+368
-71
lines changed

python/qemu/machine.py

Lines changed: 128 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import os
2323
import subprocess
2424
import shutil
25+
import signal
2526
import socket
2627
import tempfile
2728
from typing import Optional, Type
@@ -49,17 +50,10 @@ class QEMUMachineAddDeviceError(QEMUMachineError):
4950
"""
5051

5152

52-
class MonitorResponseError(qmp.QMPError):
53+
class AbnormalShutdown(QEMUMachineError):
5354
"""
54-
Represents erroneous QMP monitor reply
55+
Exception raised when a graceful shutdown was requested, but not performed.
5556
"""
56-
def __init__(self, reply):
57-
try:
58-
desc = reply["error"]["desc"]
59-
except KeyError:
60-
desc = reply
61-
super().__init__(desc)
62-
self.reply = reply
6357

6458

6559
class QEMUMachine:
@@ -127,6 +121,7 @@ def __init__(self, binary, args=None, wrapper=None, name=None,
127121
self._console_address = None
128122
self._console_socket = None
129123
self._remove_files = []
124+
self._user_killed = False
130125
self._console_log_path = console_log
131126
if self._console_log_path:
132127
# In order to log the console, buffering needs to be enabled.
@@ -294,6 +289,19 @@ def _post_launch(self):
294289
self._qmp.accept()
295290

296291
def _post_shutdown(self):
292+
"""
293+
Called to cleanup the VM instance after the process has exited.
294+
May also be called after a failed launch.
295+
"""
296+
# Comprehensive reset for the failed launch case:
297+
self._early_cleanup()
298+
299+
if self._qmp:
300+
self._qmp.close()
301+
self._qmp = None
302+
303+
self._load_io_log()
304+
297305
if self._qemu_log_file is not None:
298306
self._qemu_log_file.close()
299307
self._qemu_log_file = None
@@ -307,6 +315,19 @@ def _post_shutdown(self):
307315
while len(self._remove_files) > 0:
308316
self._remove_if_exists(self._remove_files.pop())
309317

318+
exitcode = self.exitcode()
319+
if (exitcode is not None and exitcode < 0
320+
and not (self._user_killed and exitcode == -signal.SIGKILL)):
321+
msg = 'qemu received signal %i; command: "%s"'
322+
if self._qemu_full_args:
323+
command = ' '.join(self._qemu_full_args)
324+
else:
325+
command = ''
326+
LOG.warning(msg, -int(exitcode), command)
327+
328+
self._user_killed = False
329+
self._launched = False
330+
310331
def launch(self):
311332
"""
312333
Launch the VM and make sure we cleanup and expose the
@@ -322,7 +343,7 @@ def launch(self):
322343
self._launch()
323344
self._launched = True
324345
except:
325-
self.shutdown()
346+
self._post_shutdown()
326347

327348
LOG.debug('Error launching VM')
328349
if self._qemu_full_args:
@@ -348,19 +369,12 @@ def _launch(self):
348369
close_fds=False)
349370
self._post_launch()
350371

351-
def wait(self):
352-
"""
353-
Wait for the VM to power off
372+
def _early_cleanup(self) -> None:
354373
"""
355-
self._popen.wait()
356-
if self._qmp:
357-
self._qmp.close()
358-
self._load_io_log()
359-
self._post_shutdown()
374+
Perform any cleanup that needs to happen before the VM exits.
360375
361-
def shutdown(self, has_quit=False, hard=False):
362-
"""
363-
Terminate the VM and clean up
376+
May be invoked by both soft and hard shutdown in failover scenarios.
377+
Called additionally by _post_shutdown for comprehensive cleanup.
364378
"""
365379
# If we keep the console socket open, we may deadlock waiting
366380
# for QEMU to exit, while QEMU is waiting for the socket to
@@ -369,37 +383,105 @@ def shutdown(self, has_quit=False, hard=False):
369383
self._console_socket.close()
370384
self._console_socket = None
371385

372-
if self.is_running():
373-
if hard:
374-
self._popen.kill()
375-
elif self._qmp:
376-
try:
377-
if not has_quit:
378-
self._qmp.cmd('quit')
379-
self._qmp.close()
380-
self._popen.wait(timeout=3)
381-
except:
382-
self._popen.kill()
383-
self._popen.wait()
386+
def _hard_shutdown(self) -> None:
387+
"""
388+
Perform early cleanup, kill the VM, and wait for it to terminate.
384389
385-
self._load_io_log()
386-
self._post_shutdown()
390+
:raise subprocess.Timeout: When timeout is exceeds 60 seconds
391+
waiting for the QEMU process to terminate.
392+
"""
393+
self._early_cleanup()
394+
self._popen.kill()
395+
self._popen.wait(timeout=60)
387396

388-
exitcode = self.exitcode()
389-
if exitcode is not None and exitcode < 0 and \
390-
not (exitcode == -9 and hard):
391-
msg = 'qemu received signal %i: %s'
392-
if self._qemu_full_args:
393-
command = ' '.join(self._qemu_full_args)
394-
else:
395-
command = ''
396-
LOG.warning(msg, -int(exitcode), command)
397+
def _soft_shutdown(self, has_quit: bool = False,
398+
timeout: Optional[int] = 3) -> None:
399+
"""
400+
Perform early cleanup, attempt to gracefully shut down the VM, and wait
401+
for it to terminate.
397402
398-
self._launched = False
403+
:param has_quit: When True, don't attempt to issue 'quit' QMP command
404+
:param timeout: Optional timeout in seconds for graceful shutdown.
405+
Default 3 seconds, A value of None is an infinite wait.
406+
407+
:raise ConnectionReset: On QMP communication errors
408+
:raise subprocess.TimeoutExpired: When timeout is exceeded waiting for
409+
the QEMU process to terminate.
410+
"""
411+
self._early_cleanup()
412+
413+
if self._qmp is not None:
414+
if not has_quit:
415+
# Might raise ConnectionReset
416+
self._qmp.cmd('quit')
417+
418+
# May raise subprocess.TimeoutExpired
419+
self._popen.wait(timeout=timeout)
420+
421+
def _do_shutdown(self, has_quit: bool = False,
422+
timeout: Optional[int] = 3) -> None:
423+
"""
424+
Attempt to shutdown the VM gracefully; fallback to a hard shutdown.
425+
426+
:param has_quit: When True, don't attempt to issue 'quit' QMP command
427+
:param timeout: Optional timeout in seconds for graceful shutdown.
428+
Default 3 seconds, A value of None is an infinite wait.
429+
430+
:raise AbnormalShutdown: When the VM could not be shut down gracefully.
431+
The inner exception will likely be ConnectionReset or
432+
subprocess.TimeoutExpired. In rare cases, non-graceful termination
433+
may result in its own exceptions, likely subprocess.TimeoutExpired.
434+
"""
435+
try:
436+
self._soft_shutdown(has_quit, timeout)
437+
except Exception as exc:
438+
self._hard_shutdown()
439+
raise AbnormalShutdown("Could not perform graceful shutdown") \
440+
from exc
441+
442+
def shutdown(self, has_quit: bool = False,
443+
hard: bool = False,
444+
timeout: Optional[int] = 3) -> None:
445+
"""
446+
Terminate the VM (gracefully if possible) and perform cleanup.
447+
Cleanup will always be performed.
448+
449+
If the VM has not yet been launched, or shutdown(), wait(), or kill()
450+
have already been called, this method does nothing.
451+
452+
:param has_quit: When true, do not attempt to issue 'quit' QMP command.
453+
:param hard: When true, do not attempt graceful shutdown, and
454+
suppress the SIGKILL warning log message.
455+
:param timeout: Optional timeout in seconds for graceful shutdown.
456+
Default 3 seconds, A value of None is an infinite wait.
457+
"""
458+
if not self._launched:
459+
return
460+
461+
try:
462+
if hard:
463+
self._user_killed = True
464+
self._hard_shutdown()
465+
else:
466+
self._do_shutdown(has_quit, timeout=timeout)
467+
finally:
468+
self._post_shutdown()
399469

400470
def kill(self):
471+
"""
472+
Terminate the VM forcefully, wait for it to exit, and perform cleanup.
473+
"""
401474
self.shutdown(hard=True)
402475

476+
def wait(self, timeout: Optional[int] = 3) -> None:
477+
"""
478+
Wait for the VM to power off and perform post-shutdown cleanup.
479+
480+
:param timeout: Optional timeout in seconds.
481+
Default 3 seconds, A value of None is an infinite wait.
482+
"""
483+
self.shutdown(has_quit=True, timeout=timeout)
484+
403485
def set_qmp_monitor(self, enabled=True):
404486
"""
405487
Set the QMP monitor.
@@ -438,7 +520,7 @@ def command(self, cmd, conv_keys=True, **args):
438520
if reply is None:
439521
raise qmp.QMPError("Monitor is closed")
440522
if "error" in reply:
441-
raise MonitorResponseError(reply)
523+
raise qmp.QMPResponseError(reply)
442524
return reply["return"]
443525

444526
def get_qmp_event(self, wait=False):

python/qemu/qmp.py

Lines changed: 54 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,32 @@
1212
import socket
1313
import logging
1414
from typing import (
15+
Any,
16+
cast,
17+
Dict,
1518
Optional,
1619
TextIO,
1720
Type,
21+
Tuple,
22+
Union,
1823
)
1924
from types import TracebackType
2025

2126

27+
# QMPMessage is a QMP Message of any kind.
28+
# e.g. {'yee': 'haw'}
29+
#
30+
# QMPReturnValue is the inner value of return values only.
31+
# {'return': {}} is the QMPMessage,
32+
# {} is the QMPReturnValue.
33+
QMPMessage = Dict[str, Any]
34+
QMPReturnValue = Dict[str, Any]
35+
36+
InternetAddrT = Tuple[str, str]
37+
UnixAddrT = str
38+
SocketAddrT = Union[InternetAddrT, UnixAddrT]
39+
40+
2241
class QMPError(Exception):
2342
"""
2443
QMP base exception
@@ -43,6 +62,25 @@ class QMPTimeoutError(QMPError):
4362
"""
4463

4564

65+
class QMPProtocolError(QMPError):
66+
"""
67+
QMP protocol error; unexpected response
68+
"""
69+
70+
71+
class QMPResponseError(QMPError):
72+
"""
73+
Represents erroneous QMP monitor reply
74+
"""
75+
def __init__(self, reply: QMPMessage):
76+
try:
77+
desc = reply['error']['desc']
78+
except KeyError:
79+
desc = reply
80+
super().__init__(desc)
81+
self.reply = reply
82+
83+
4684
class QEMUMonitorProtocol:
4785
"""
4886
Provide an API to connect to QEMU via QEMU Monitor Protocol (QMP) and then
@@ -99,7 +137,10 @@ def __json_read(self, only_event=False):
99137
data = self.__sockfile.readline()
100138
if not data:
101139
return None
102-
resp = json.loads(data)
140+
# By definition, any JSON received from QMP is a QMPMessage,
141+
# and we are asserting only at static analysis time that it
142+
# has a particular shape.
143+
resp: QMPMessage = json.loads(data)
103144
if 'event' in resp:
104145
self.logger.debug("<<< %s", resp)
105146
self.__events.append(resp)
@@ -194,22 +235,18 @@ def accept(self, timeout=15.0):
194235
self.__sockfile = self.__sock.makefile(mode='r')
195236
return self.__negotiate_capabilities()
196237

197-
def cmd_obj(self, qmp_cmd):
238+
def cmd_obj(self, qmp_cmd: QMPMessage) -> QMPMessage:
198239
"""
199240
Send a QMP command to the QMP Monitor.
200241
201242
@param qmp_cmd: QMP command to be sent as a Python dict
202-
@return QMP response as a Python dict or None if the connection has
203-
been closed
243+
@return QMP response as a Python dict
204244
"""
205245
self.logger.debug(">>> %s", qmp_cmd)
206-
try:
207-
self.__sock.sendall(json.dumps(qmp_cmd).encode('utf-8'))
208-
except OSError as err:
209-
if err.errno == errno.EPIPE:
210-
return None
211-
raise err
246+
self.__sock.sendall(json.dumps(qmp_cmd).encode('utf-8'))
212247
resp = self.__json_read()
248+
if resp is None:
249+
raise QMPConnectError("Unexpected empty reply from server")
213250
self.logger.debug("<<< %s", resp)
214251
return resp
215252

@@ -233,9 +270,13 @@ def command(self, cmd, **kwds):
233270
Build and send a QMP command to the monitor, report errors if any
234271
"""
235272
ret = self.cmd(cmd, kwds)
236-
if "error" in ret:
237-
raise Exception(ret['error']['desc'])
238-
return ret['return']
273+
if 'error' in ret:
274+
raise QMPResponseError(ret)
275+
if 'return' not in ret:
276+
raise QMPProtocolError(
277+
"'return' key not found in QMP response '{}'".format(str(ret))
278+
)
279+
return cast(QMPReturnValue, ret['return'])
239280

240281
def pull_event(self, wait=False):
241282
"""

0 commit comments

Comments
 (0)