Skip to content

Commit 4a58521

Browse files
authored
Merge branch 'main' into ext_topo_leaf
2 parents 2cdd96f + fbf24fe commit 4a58521

File tree

8 files changed

+81
-35
lines changed

8 files changed

+81
-35
lines changed

tests/framework/http_api.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,15 +58,25 @@ def __init__(self, api, resource, id_field=None):
5858
def get(self):
5959
"""Make a GET request"""
6060
url = self._api.endpoint + self.resource
61-
res = self._api.session.get(url)
61+
try:
62+
res = self._api.session.get(url)
63+
except Exception as e:
64+
if self._api.error_callback:
65+
self._api.error_callback("GET", self.resource, str(e))
66+
raise
6267
assert res.status_code == HTTPStatus.OK, res.json()
6368
return res
6469

6570
def request(self, method, path, **kwargs):
6671
"""Make an HTTP request"""
6772
kwargs = {key: val for key, val in kwargs.items() if val is not None}
6873
url = self._api.endpoint + path
69-
res = self._api.session.request(method, url, json=kwargs)
74+
try:
75+
res = self._api.session.request(method, url, json=kwargs)
76+
except Exception as e:
77+
if self._api.error_callback:
78+
self._api.error_callback(method, path, str(e))
79+
raise
7080
if res.status_code != HTTPStatus.NO_CONTENT:
7181
json = res.json()
7282
msg = res.content
@@ -95,7 +105,8 @@ def patch(self, **kwargs):
95105
class Api:
96106
"""A simple HTTP client for the Firecracker API"""
97107

98-
def __init__(self, api_usocket_full_name):
108+
def __init__(self, api_usocket_full_name, *, on_error=None):
109+
self.error_callback = on_error
99110
self.socket = api_usocket_full_name
100111
url_encoded_path = urllib.parse.quote_plus(api_usocket_full_name)
101112
self.endpoint = DEFAULT_SCHEME + url_encoded_path

tests/framework/microvm.py

Lines changed: 48 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -333,13 +333,23 @@ def kill(self):
333333
# https://github.com/firecracker-microvm/firecracker/pull/4442/commits/d63eb7a65ffaaae0409d15ed55d99ecbd29bc572
334334

335335
# filter ps results for the jailer's unique id
336-
_, stdout, stderr = utils.check_output(
337-
f"ps ax -o cmd -ww | grep {self.jailer.jailer_id}"
336+
_, stdout, stderr = utils.run_cmd(
337+
f"ps ax -o pid,cmd -ww | grep {self.jailer.jailer_id}"
338338
)
339+
340+
assert not stderr, f"error querying processes using `ps`: {stderr}"
341+
342+
offenders = []
343+
for proc in stdout.splitlines():
344+
_, cmd = proc.lower().split(maxsplit=1)
345+
if "firecracker" in proc and not cmd.startswith("screen"):
346+
offenders.append(proc)
347+
339348
# make sure firecracker was killed
340-
assert (
341-
stderr == "" and "firecracker" not in stdout
342-
), f"Firecracker reported its pid {self.firecracker_pid}, which was killed, but there still exist processes using the supposedly dead Firecracker's jailer_id: {stdout}"
349+
assert not offenders, (
350+
f"Firecracker reported its pid {self.firecracker_pid}, which was killed, but there still exist processes using the supposedly dead Firecracker's jailer_id: \n"
351+
+ "\n".join(offenders)
352+
)
343353

344354
if self.uffd_handler and self.uffd_handler.is_running():
345355
self.uffd_handler.kill()
@@ -605,7 +615,12 @@ def spawn(
605615
# pylint: disable=subprocess-run-check
606616
# pylint: disable=too-many-branches
607617
self.jailer.setup()
608-
self.api = Api(self.jailer.api_socket_path())
618+
self.api = Api(
619+
self.jailer.api_socket_path(),
620+
on_error=lambda verb, uri, err_msg: self._dump_debug_information(
621+
f"Error during {verb} {uri}: {err_msg}"
622+
),
623+
)
609624

610625
if log_file is not None:
611626
self.log_file = Path(self.path) / log_file
@@ -673,21 +688,33 @@ def spawn(
673688
if emit_metrics:
674689
self.monitors.append(FCMetricsMonitor(self))
675690

676-
# Wait for the jailer to create resources needed, and Firecracker to
677-
# create its API socket.
678-
# We expect the jailer to start within 80 ms. However, we wait for
679-
# 1 sec since we are rechecking the existence of the socket 5 times
680-
# and leave 0.2 delay between them.
681-
if "no-api" not in self.jailer.extra_args:
682-
self._wait_create()
691+
# Ensure Firecracker is in as good a state as possible wrts guest
692+
# responsiveness / API availability.
693+
# If we are using a config file and it has a network device specified,
694+
# use SSH to wait until guest userspace is available. If we are
695+
# using the API, wait until the log message indicating the API server
696+
# has finished initializing is printed (if logging is enabled), or
697+
# until the API socket file has been created.
698+
# If none of these apply, do a last ditch effort to make sure the
699+
# Firecracker process itself at least came up by checking
700+
# for the startup log message. Otherwise, you're on your own kid.
683701
if "config-file" in self.jailer.extra_args and self.iface:
684702
self.wait_for_ssh_up()
685-
if self.log_file and log_level in ("Trace", "Debug", "Info"):
703+
elif "no-api" not in self.jailer.extra_args:
704+
if self.log_file and log_level in ("Trace", "Debug", "Info"):
705+
self.check_log_message("API server started.")
706+
else:
707+
self._wait_for_api_socket()
708+
elif self.log_file and log_level in ("Trace", "Debug", "Info"):
686709
self.check_log_message("Running Firecracker")
687710

688711
@retry(wait=wait_fixed(0.2), stop=stop_after_attempt(5), reraise=True)
689-
def _wait_create(self):
712+
def _wait_for_api_socket(self):
690713
"""Wait until the API socket and chroot folder are available."""
714+
715+
# We expect the jailer to start within 80 ms. However, we wait for
716+
# 1 sec since we are rechecking the existence of the socket 5 times
717+
# and leave 0.2 delay between them.
691718
os.stat(self.jailer.api_socket_path())
692719

693720
@retry(wait=wait_fixed(0.2), stop=stop_after_attempt(5), reraise=True)
@@ -1086,10 +1113,12 @@ def thread_backtraces(self):
10861113
backtraces = []
10871114
for thread_name, thread_pids in utils.get_threads(self.firecracker_pid).items():
10881115
for pid in thread_pids:
1089-
backtraces.append(
1090-
f"{thread_name} ({pid=}):\n"
1091-
f"{utils.check_output(f'cat /proc/{pid}/stack').stdout}"
1092-
)
1116+
try:
1117+
stack = Path(f"/proc/{pid}/stack").read_text("UTF-8")
1118+
except FileNotFoundError:
1119+
continue # process might've gone away between get_threads() call and here
1120+
1121+
backtraces.append(f"{thread_name} ({pid=}):\n{stack}")
10931122
return "\n".join(backtraces)
10941123

10951124
def _dump_debug_information(self, what: str):

tests/framework/utils.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,15 @@
3434

3535
def get_threads(pid: int) -> dict:
3636
"""Return dict consisting of child threads."""
37-
threads_map = defaultdict(list)
38-
proc = psutil.Process(pid)
39-
for thread in proc.threads():
40-
threads_map[psutil.Process(thread.id).name()].append(thread.id)
41-
return threads_map
37+
try:
38+
proc = psutil.Process(pid)
39+
40+
threads_map = defaultdict(list)
41+
for thread in proc.threads():
42+
threads_map[psutil.Process(thread.id).name()].append(thread.id)
43+
return threads_map
44+
except psutil.NoSuchProcess:
45+
return {}
4246

4347

4448
def get_cpu_affinity(pid: int) -> list:

tests/integration_tests/functional/test_api_server.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def test_api_socket_in_use(uvm_plain):
2323

2424
sock = socket.socket(socket.AF_UNIX)
2525
sock.bind(microvm.jailer.api_socket_path())
26-
microvm.spawn()
26+
microvm.spawn(log_level="warn")
2727
msg = "Failed to open the API socket at: /run/firecracker.socket. Check that it is not already used."
2828
microvm.check_log_message(msg)
2929

tests/integration_tests/functional/test_cmd_line_parameters.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ def test_describe_snapshot_all_versions(
2828
fc_binary_path=firecracker_release.path,
2929
jailer_binary_path=firecracker_release.jailer,
3030
)
31-
vm.spawn()
31+
# FIXME: Once only FC versions >= 1.12 are supported, drop log_level="warn"
32+
vm.spawn(log_level="warn")
3233
vm.basic_config(track_dirty_pages=True)
3334
vm.start()
3435
snapshot = vm.snapshot_diff()

tests/integration_tests/functional/test_cmd_line_start.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ def test_start_with_missing_metadata(uvm_plain):
371371

372372
try:
373373
test_microvm.spawn()
374-
except FileNotFoundError:
374+
except: # pylint: disable=bare-except
375375
pass
376376
finally:
377377
test_microvm.check_log_message(
@@ -394,7 +394,7 @@ def test_start_with_invalid_metadata(uvm_plain):
394394

395395
try:
396396
test_microvm.spawn()
397-
except FileNotFoundError:
397+
except: # pylint: disable=bare-except
398398
pass
399399
finally:
400400
test_microvm.check_log_message("MMDS error: metadata provided not valid json")

tests/integration_tests/functional/test_cpu_features_x86_64.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -541,7 +541,7 @@ def test_cpu_cpuid_snapshot(microvm_factory, guest_kernel, rootfs, cpu_template_
541541
"""
542542
cpu_template_name = get_cpu_template_name(cpu_template_any)
543543
if cpu_template_name not in MSR_SUPPORTED_TEMPLATES:
544-
pytest.skip("This test does not support {cpu_template_name} template.")
544+
pytest.skip(f"This test does not support {cpu_template_name} template.")
545545

546546
shared_names = SNAPSHOT_RESTORE_SHARED_NAMES
547547

@@ -611,7 +611,7 @@ def test_cpu_cpuid_restore(microvm_factory, guest_kernel, cpu_template_any):
611611
"""
612612
cpu_template_name = get_cpu_template_name(cpu_template_any)
613613
if cpu_template_name not in MSR_SUPPORTED_TEMPLATES:
614-
pytest.skip("This test does not support {cpu_template_name} template.")
614+
pytest.skip(f"This test does not support {cpu_template_name} template.")
615615

616616
shared_names = SNAPSHOT_RESTORE_SHARED_NAMES
617617
snapshot_artifacts_dir = (
@@ -646,8 +646,8 @@ def test_cpu_template(uvm_plain_any, cpu_template_any, microvm_factory):
646646
supported CPU templates.
647647
"""
648648
cpu_template_name = get_cpu_template_name(cpu_template_any)
649-
if cpu_template_name not in ["T2", "T2S", "SPR_TO_T2_5.10", "SPR_TO_T2_6.1" "C3"]:
650-
pytest.skip("This test does not support {cpu_template_name} template.")
649+
if cpu_template_name not in ["T2", "T2S", "SPR_TO_T2_5.10", "SPR_TO_T2_6.1", "C3"]:
650+
pytest.skip(f"This test does not support {cpu_template_name} template.")
651651

652652
test_microvm = uvm_plain_any
653653
test_microvm.spawn()

tests/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,4 +55,5 @@ disable = [
5555
"duplicate-code",
5656
"too-many-positional-arguments",
5757
"too-few-public-methods",
58+
"too-many-branches",
5859
]

0 commit comments

Comments
 (0)