Skip to content

Commit df82a41

Browse files
roypatpb8o
authored andcommitted
Combine SSHConnection._init_connection and Microvm.wait_for_ssh_up
We use `Microvm.wait_for_ssh_up` to wait for a booted guest's userland to be initialized enough that it can process incoming SSH connections. However, it turns out that we werent waiting the intended 10s, we were waiting 3s. The reason for this is that before the `true` command in `wait_for_ssh_up` is even sent to the guest, we go through the `Microvm.ssh` property, which calls the `Microvm.ssh_face` function, which constructs an `SSHConnection` object, whose construtor calls `SSHConnection._init_connection`. And inside `_init_connection`, we _already_ try to send a `true` command to the guest, to ascertain it is up and running. However, here we do it in a retry loop (needed because if port 22 is still closed, `ssh` exits with non-zero code and "connection refuses") that does 20 retries at 0.15s intervals. Or tries to 3 seconds. Most importantly, the retries in `_init_connection` are done without timeouts, so even _if_ we actually hung during connection establishment, `wait_for_ssh_up` wouldn't abort after 10 seconds, because it would get stuck inside of `_init_connection`, which doesn't know anything about a 10s timeout. Fix all this up by merging `_init_connection` and `wait_for_ssh_up`. Skip sending the `true` command in `wait_for_ssh_up`, and just call `ssh_iface(0)` directly. Increment the retry time in `_init_connection` to actually be 10s. And add a 10s timeout to the commands in `_init_connection`. Signed-off-by: Patrick Roy <[email protected]>
1 parent 82eec3e commit df82a41

File tree

3 files changed

+13
-21
lines changed

3 files changed

+13
-21
lines changed

tests/framework/microvm.py

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
import select
1919
import shutil
2020
import signal
21-
import subprocess
2221
import time
2322
import uuid
2423
from collections import namedtuple
@@ -1003,26 +1002,19 @@ def thread_backtraces(self):
10031002
)
10041003
return "\n".join(backtraces)
10051004

1006-
def wait_for_ssh_up(self, timeout=10):
1007-
"""Wait for guest running inside the microVM to come up and respond.
1008-
1009-
:param timeout: seconds to wait.
1010-
"""
1005+
def wait_for_ssh_up(self):
1006+
"""Wait for guest running inside the microVM to come up and respond."""
10111007
try:
1012-
rc, stdout, stderr = self.ssh.run("true", timeout)
1013-
except subprocess.TimeoutExpired:
1008+
# Ensure that we have an initialized SSH connection to the guest that can
1009+
# run commands. The actual connection retry loop happens in SSHConnection._init_connection
1010+
self.ssh_iface(0)
1011+
except Exception as exc:
10141012
print(
1015-
f"Remote command did not respond within {timeout}s\n\n"
1013+
f"Failed to establish SSH connection to guest: {exc}\n\n"
10161014
f"Firecracker logs:\n{self.log_data}\n"
10171015
f"Thread backtraces:\n{self.thread_backtraces}"
10181016
)
10191017
raise
1020-
assert rc == 0, (
1021-
f"Remote command exited with non-0 status code\n\n"
1022-
f"{rc=}\n{stdout=}\n{stderr=}\n\n"
1023-
f"Firecracker logs:\n{self.log_data}\n"
1024-
f"Thread backtraces:\n{self.thread_backtraces}"
1025-
)
10261018

10271019

10281020
class MicroVMFactory:

tests/host_tools/network.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def scp_get(self, remote_path, local_path, recursive=False):
7878

7979
@retry(
8080
retry=retry_if_exception_type(ChildProcessError),
81-
wait=wait_fixed(0.15),
81+
wait=wait_fixed(0.5),
8282
stop=stop_after_attempt(20),
8383
reraise=True,
8484
)
@@ -90,7 +90,7 @@ def _init_connection(self):
9090
We'll keep trying to execute a remote command that can't fail
9191
(`/bin/true`), until we get a successful (0) exit code.
9292
"""
93-
self.check_output("true")
93+
self.check_output("true", timeout=10)
9494

9595
def run(self, cmd_string, timeout=None, *, check=False):
9696
"""Execute the command passed as a string in the ssh context."""

tests/integration_tests/functional/test_pause_resume.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,16 +49,16 @@ def test_pause_resume(uvm_nano):
4949
microvm.flush_metrics()
5050

5151
# Verify guest is no longer active.
52-
with pytest.raises(AssertionError):
53-
microvm.wait_for_ssh_up()
52+
with pytest.raises(ChildProcessError):
53+
microvm.ssh.check_output("true")
5454

5555
# Verify emulation was indeed paused and no events from either
5656
# guest or host side were handled.
5757
verify_net_emulation_paused(microvm.flush_metrics())
5858

5959
# Verify guest is no longer active.
60-
with pytest.raises(AssertionError):
61-
microvm.wait_for_ssh_up()
60+
with pytest.raises(ChildProcessError):
61+
microvm.ssh.check_output("true")
6262

6363
# Pausing the microVM when it is already `Paused` is allowed
6464
# (microVM remains in `Paused` state).

0 commit comments

Comments
 (0)