diff --git a/tests/framework/http_api.py b/tests/framework/http_api.py index ea8efd3df4f..7534fe829d2 100644 --- a/tests/framework/http_api.py +++ b/tests/framework/http_api.py @@ -58,7 +58,12 @@ def __init__(self, api, resource, id_field=None): def get(self): """Make a GET request""" url = self._api.endpoint + self.resource - res = self._api.session.get(url) + try: + res = self._api.session.get(url) + except Exception as e: + if self._api.error_callback: + self._api.error_callback("GET", self.resource, str(e)) + raise assert res.status_code == HTTPStatus.OK, res.json() return res @@ -66,7 +71,12 @@ def request(self, method, path, **kwargs): """Make an HTTP request""" kwargs = {key: val for key, val in kwargs.items() if val is not None} url = self._api.endpoint + path - res = self._api.session.request(method, url, json=kwargs) + try: + res = self._api.session.request(method, url, json=kwargs) + except Exception as e: + if self._api.error_callback: + self._api.error_callback(method, path, str(e)) + raise if res.status_code != HTTPStatus.NO_CONTENT: json = res.json() msg = res.content @@ -95,7 +105,8 @@ def patch(self, **kwargs): class Api: """A simple HTTP client for the Firecracker API""" - def __init__(self, api_usocket_full_name): + def __init__(self, api_usocket_full_name, *, on_error=None): + self.error_callback = on_error self.socket = api_usocket_full_name url_encoded_path = urllib.parse.quote_plus(api_usocket_full_name) self.endpoint = DEFAULT_SCHEME + url_encoded_path diff --git a/tests/framework/microvm.py b/tests/framework/microvm.py index 7f67a5f9754..a6582515491 100644 --- a/tests/framework/microvm.py +++ b/tests/framework/microvm.py @@ -615,7 +615,12 @@ def spawn( # pylint: disable=subprocess-run-check # pylint: disable=too-many-branches self.jailer.setup() - self.api = Api(self.jailer.api_socket_path()) + self.api = Api( + self.jailer.api_socket_path(), + on_error=lambda verb, uri, err_msg: self._dump_debug_information( + f"Error during {verb} {uri}: {err_msg}" + ), + ) if log_file is not None: self.log_file = Path(self.path) / log_file @@ -683,21 +688,33 @@ def spawn( if emit_metrics: self.monitors.append(FCMetricsMonitor(self)) - # Wait for the jailer to create resources needed, and Firecracker to - # create its API socket. - # We expect the jailer to start within 80 ms. However, we wait for - # 1 sec since we are rechecking the existence of the socket 5 times - # and leave 0.2 delay between them. - if "no-api" not in self.jailer.extra_args: - self._wait_create() + # Ensure Firecracker is in as good a state as possible wrts guest + # responsiveness / API availability. + # If we are using a config file and it has a network device specified, + # use SSH to wait until guest userspace is available. If we are + # using the API, wait until the log message indicating the API server + # has finished initializing is printed (if logging is enabled), or + # until the API socket file has been created. + # If none of these apply, do a last ditch effort to make sure the + # Firecracker process itself at least came up by checking + # for the startup log message. Otherwise, you're on your own kid. if "config-file" in self.jailer.extra_args and self.iface: self.wait_for_ssh_up() - if self.log_file and log_level in ("Trace", "Debug", "Info"): + elif "no-api" not in self.jailer.extra_args: + if self.log_file and log_level in ("Trace", "Debug", "Info"): + self.check_log_message("API server started.") + else: + self._wait_for_api_socket() + elif self.log_file and log_level in ("Trace", "Debug", "Info"): self.check_log_message("Running Firecracker") @retry(wait=wait_fixed(0.2), stop=stop_after_attempt(5), reraise=True) - def _wait_create(self): + def _wait_for_api_socket(self): """Wait until the API socket and chroot folder are available.""" + + # We expect the jailer to start within 80 ms. However, we wait for + # 1 sec since we are rechecking the existence of the socket 5 times + # and leave 0.2 delay between them. os.stat(self.jailer.api_socket_path()) @retry(wait=wait_fixed(0.2), stop=stop_after_attempt(5), reraise=True) @@ -1096,10 +1113,12 @@ def thread_backtraces(self): backtraces = [] for thread_name, thread_pids in utils.get_threads(self.firecracker_pid).items(): for pid in thread_pids: - backtraces.append( - f"{thread_name} ({pid=}):\n" - f"{utils.check_output(f'cat /proc/{pid}/stack').stdout}" - ) + try: + stack = Path(f"/proc/{pid}/stack").read_text("UTF-8") + except FileNotFoundError: + continue # process might've gone away between get_threads() call and here + + backtraces.append(f"{thread_name} ({pid=}):\n{stack}") return "\n".join(backtraces) def _dump_debug_information(self, what: str): diff --git a/tests/framework/utils.py b/tests/framework/utils.py index f047fbe2284..f35819236ad 100644 --- a/tests/framework/utils.py +++ b/tests/framework/utils.py @@ -34,11 +34,15 @@ def get_threads(pid: int) -> dict: """Return dict consisting of child threads.""" - threads_map = defaultdict(list) - proc = psutil.Process(pid) - for thread in proc.threads(): - threads_map[psutil.Process(thread.id).name()].append(thread.id) - return threads_map + try: + proc = psutil.Process(pid) + + threads_map = defaultdict(list) + for thread in proc.threads(): + threads_map[psutil.Process(thread.id).name()].append(thread.id) + return threads_map + except psutil.NoSuchProcess: + return {} def get_cpu_affinity(pid: int) -> list: diff --git a/tests/integration_tests/functional/test_api_server.py b/tests/integration_tests/functional/test_api_server.py index ee72d2043a3..b502dff7e6d 100644 --- a/tests/integration_tests/functional/test_api_server.py +++ b/tests/integration_tests/functional/test_api_server.py @@ -23,7 +23,7 @@ def test_api_socket_in_use(uvm_plain): sock = socket.socket(socket.AF_UNIX) sock.bind(microvm.jailer.api_socket_path()) - microvm.spawn() + microvm.spawn(log_level="warn") msg = "Failed to open the API socket at: /run/firecracker.socket. Check that it is not already used." microvm.check_log_message(msg) diff --git a/tests/integration_tests/functional/test_cmd_line_parameters.py b/tests/integration_tests/functional/test_cmd_line_parameters.py index 79af938b1f7..9c2c79d9552 100644 --- a/tests/integration_tests/functional/test_cmd_line_parameters.py +++ b/tests/integration_tests/functional/test_cmd_line_parameters.py @@ -28,7 +28,8 @@ def test_describe_snapshot_all_versions( fc_binary_path=firecracker_release.path, jailer_binary_path=firecracker_release.jailer, ) - vm.spawn() + # FIXME: Once only FC versions >= 1.12 are supported, drop log_level="warn" + vm.spawn(log_level="warn") vm.basic_config(track_dirty_pages=True) vm.start() snapshot = vm.snapshot_diff() diff --git a/tests/integration_tests/functional/test_cmd_line_start.py b/tests/integration_tests/functional/test_cmd_line_start.py index 84b954e7038..dd0e1e0227e 100644 --- a/tests/integration_tests/functional/test_cmd_line_start.py +++ b/tests/integration_tests/functional/test_cmd_line_start.py @@ -371,7 +371,7 @@ def test_start_with_missing_metadata(uvm_plain): try: test_microvm.spawn() - except FileNotFoundError: + except: # pylint: disable=bare-except pass finally: test_microvm.check_log_message( @@ -394,7 +394,7 @@ def test_start_with_invalid_metadata(uvm_plain): try: test_microvm.spawn() - except FileNotFoundError: + except: # pylint: disable=bare-except pass finally: test_microvm.check_log_message("MMDS error: metadata provided not valid json")