Skip to content

Commit 5122c4d

Browse files
committed
test: ab: operate on directories instead of commit SHAs
The tools/ab_test.py script relies on precompiled binaries existing in well-defined locations, and refuses to compile firecracker itself if they're missing. It derives these locations from commit SHAs, so conceptually, it makes more sense to cut out this middle step and just directly pass in directories instead of SHAs (with the expectation that the directories contain firecracker and jailer binaries). However, the commit SHAs were still used to print the commit ranges over which A/B-tests were done. It turns out that this was not working in the vast majority of cases though, as the commit log printing logic did not contain all the resolution logic that goes into compilation step (e.g. for parsing revisions and such). So just remove that part. In the EMF metrics, we now tag each report with the directory path instead of the commit SHA. For the post-merge runs, this makes no difference, as the SHA is part of the path. Signed-off-by: Patrick Roy <[email protected]>
1 parent 2c0333f commit 5122c4d

File tree

2 files changed

+21
-34
lines changed

2 files changed

+21
-34
lines changed

.buildkite/pipeline_perf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@
9898
pytest_opts = ""
9999
if REVISION_A:
100100
devtool_opts += " --ab"
101-
pytest_opts = f"{ab_opts} run {REVISION_A} {REVISION_B} --test {test_path}"
101+
pytest_opts = f"{ab_opts} run build/{REVISION_A}/build/cargo_target/$(uname -m)-unknown-linux-musl/release build/{REVISION_B}/build/cargo_target/$(uname -m)-unknown-linux-musl/release --test {test_path}"
102102
else:
103103
# Passing `-m ''` below instructs pytest to collect tests regardless of
104104
# their markers (e.g. it will collect both tests marked as nonci, and

tools/ab_test.py

Lines changed: 20 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,8 @@
3232
sys.path.append(str(Path(__file__).parent.parent / "tests"))
3333

3434
# pylint:disable=wrong-import-position
35-
from framework import utils
36-
from framework.ab_test import check_regression, git_ab_test
35+
from framework.ab_test import binary_ab_test, check_regression
3736
from framework.properties import global_props
38-
from host_tools.cargo_build import get_binary
3937
from host_tools.metrics import (
4038
emit_raw_emf,
4139
format_with_reduced_unit,
@@ -107,7 +105,7 @@ def find_unit(emf: dict, metric: str):
107105
return metrics.get(metric, "None")
108106

109107

110-
def load_data_series(report_path: Path, revision: str = None, *, reemit: bool = False):
108+
def load_data_series(report_path: Path, tag=None, *, reemit: bool = False):
111109
"""Loads the data series relevant for A/B-testing from test_results/test-report.json
112110
into a dictionary mapping each message's cloudwatch dimensions to a dictionary of
113111
its list-valued properties/metrics.
@@ -126,10 +124,9 @@ def load_data_series(report_path: Path, revision: str = None, *, reemit: bool =
126124
emf = json.loads(line)
127125

128126
if reemit:
129-
assert revision is not None
127+
assert tag is not None
130128

131-
# These will show up in Cloudwatch, so canonicalize to long commit SHAs
132-
emf["git_commit_id"] = canonicalize_revision(revision)
129+
emf["git_commit_id"] = str(tag)
133130
emit_raw_emf(emf)
134131

135132
dimensions, result = process_log_entry(emf)
@@ -159,7 +156,6 @@ def load_data_series(report_path: Path, revision: str = None, *, reemit: bool =
159156
def collect_data(binary_dir: Path, tests: list[str]):
160157
"""Executes the specified test using the provided firecracker binaries"""
161158
# Example binary_dir: ../build/main/build/cargo_target/x86_64-unknown-linux-musl/release
162-
revision = binary_dir.parents[3].name
163159

164160
print(f"Collecting samples with {binary_dir}")
165161
subprocess.run(
@@ -172,7 +168,7 @@ def collect_data(binary_dir: Path, tests: list[str]):
172168
check=True,
173169
)
174170
return load_data_series(
175-
Path("test_results/test-report.json"), revision, reemit=True
171+
Path("test_results/test-report.json"), binary_dir, reemit=True
176172
)
177173

178174

@@ -311,23 +307,17 @@ def analyze_data(
311307

312308

313309
def ab_performance_test(
314-
a_revision, b_revision, tests, p_thresh, strength_abs_thresh, noise_threshold
310+
a_revision: Path,
311+
b_revision: Path,
312+
tests,
313+
p_thresh,
314+
strength_abs_thresh,
315+
noise_threshold,
315316
):
316-
"""Does an A/B-test of the specified test across the given revisions"""
317-
_, commit_list, _ = utils.check_output(
318-
f"git --no-pager log --oneline {a_revision}..{b_revision}"
319-
)
320-
print(
321-
f"Performance A/B-test across {a_revision}..{b_revision}. This includes the following commits:"
322-
)
323-
print(commit_list.strip())
317+
"""Does an A/B-test of the specified test with the given firecracker/jailer binaries"""
324318

325-
def test_runner(workspace, _is_ab: bool):
326-
bin_dir = get_binary("firecracker", workspace_dir=workspace).parent
327-
return collect_data(bin_dir, tests)
328-
329-
return git_ab_test(
330-
test_runner,
319+
return binary_ab_test(
320+
lambda bin_dir, _: collect_data(bin_dir, tests),
331321
lambda ah, be: analyze_data(
332322
ah,
333323
be,
@@ -336,16 +326,11 @@ def test_runner(workspace, _is_ab: bool):
336326
noise_threshold,
337327
n_resamples=int(100 / p_thresh),
338328
),
339-
a_revision=a_revision,
340-
b_revision=b_revision,
329+
a_directory=a_revision,
330+
b_directory=b_revision,
341331
)
342332

343333

344-
def canonicalize_revision(revision):
345-
"""Canonicalizes the given revision to a 40 digit hex SHA"""
346-
return utils.check_output(f"git rev-parse {revision}").stdout.strip()
347-
348-
349334
if __name__ == "__main__":
350335
parser = argparse.ArgumentParser(
351336
description="Executes Firecracker's A/B testsuite across the specified commits"
@@ -357,11 +342,13 @@ def canonicalize_revision(revision):
357342
)
358343
run_parser.add_argument(
359344
"a_revision",
360-
help="The baseline revision compared to which we want to avoid regressing",
345+
help="Directory containing firecracker and jailer binaries to be considered the performance baseline",
346+
type=Path,
361347
)
362348
run_parser.add_argument(
363349
"b_revision",
364-
help="The revision whose performance we want to compare against the results from a_revision",
350+
help="Directory containing firecracker and jailer binaries whose performance we want to compare against the results from a_revision",
351+
type=Path,
365352
)
366353
run_parser.add_argument("--test", help="The test to run", nargs="+", required=True)
367354
analyze_parser = subparsers.add_parser(

0 commit comments

Comments
 (0)