diff --git a/tests/framework/microvm.py b/tests/framework/microvm.py index 03f90905843..fa9dea79b82 100644 --- a/tests/framework/microvm.py +++ b/tests/framework/microvm.py @@ -301,7 +301,7 @@ def mark_killed(self): self._killed = True - def kill(self): + def kill(self, might_be_dead=False): """All clean up associated with this microVM should go here.""" # pylint: disable=subprocess-run-check # if it was already killed, return @@ -314,7 +314,7 @@ def kill(self): # Kill all background SSH connections for connection in self._connections: - connection.close() + connection.close(strict=not might_be_dead) # We start with vhost-user backends, # because if we stop Firecracker first, the backend will want @@ -325,8 +325,10 @@ def kill(self): assert ( "Shutting down VM after intercepting signal" not in self.log_data + or might_be_dead ), self.log_data + # pylint: disable=bare-except try: if self.firecracker_pid: os.kill(self.firecracker_pid, signal.SIGKILL) @@ -334,15 +336,16 @@ def kill(self): if self.screen_pid: os.kill(self.screen_pid, signal.SIGKILL) except: - msg = ( - "Failed to kill Firecracker Process. Did it already die (or did the UFFD handler process die and take it down)?" - if self.uffd_handler - else "Failed to kill Firecracker Process. Did it already die?" - ) + if not might_be_dead: + msg = ( + "Failed to kill Firecracker Process. Did it already die (or did the UFFD handler process die and take it down)?" + if self.uffd_handler + else "Failed to kill Firecracker Process. Did it already die?" + ) - self._dump_debug_information(msg) + self._dump_debug_information(msg) - raise + raise # if microvm was spawned then check if it gets killed if self._spawned: diff --git a/tests/host_tools/network.py b/tests/host_tools/network.py index 93cdb323c50..c8f45ed1898 100644 --- a/tests/host_tools/network.py +++ b/tests/host_tools/network.py @@ -145,11 +145,21 @@ def _init_connection(self): self.close() raise - def _check_liveness(self) -> int: - """Checks whether the ControlPersist connection is still alive""" + def _check_liveness(self, strict=True) -> int | None: + """Checks whether the ControlPersist connection is still alive + + It will return the pid of the ControlMaster if it is still running, + otherwise None + """ check_cmd = ["ssh", "-O", "check", *self.options, self.user_host] - _, _, stderr = self._exec(check_cmd, check=True) + try: + _, _, stderr = self._exec(check_cmd, check=True) + except ChildProcessError: + if strict: + raise + + return None pid_match = re.match(r"Master running \(pid=(\d+)\)", stderr) @@ -157,9 +167,11 @@ def _check_liveness(self) -> int: return int(pid_match.group(1)) - def close(self): + def close(self, strict=True): """Closes the ControlPersist connection""" - master_pid = self._check_liveness() + master_pid = self._check_liveness(strict) + if master_pid is None: + return stop_cmd = ["ssh", "-O", "stop", *self.options, self.user_host] @@ -182,7 +194,7 @@ def run(self, cmd_string, timeout=100, *, check=False, debug=False): If `debug` is set, pass `-vvv` to `ssh`. Note that this will clobber stderr. """ - self._check_liveness() + self._check_liveness(True) command = ["ssh", *self.options, self.user_host, cmd_string] diff --git a/tests/integration_tests/functional/test_balloon.py b/tests/integration_tests/functional/test_balloon.py index 314cd9b5afd..f8960bedb6d 100644 --- a/tests/integration_tests/functional/test_balloon.py +++ b/tests/integration_tests/functional/test_balloon.py @@ -228,6 +228,9 @@ def test_deflate_on_oom(uvm_plain_any, deflate_on_oom): assert balloon_size_after < balloon_size_before, "Balloon did not deflate" else: assert balloon_size_after >= balloon_size_before, "Balloon deflated" + # Kill it here, letting the infrastructure know that the process might + # be dead already. + test_microvm.kill(might_be_dead=True) # pylint: disable=C0103