Skip to content

Commit d2939ef

Browse files
committed
feat: store test results for A/B runs
Currently when A/B is run, only results for B test are available in the `test_results` dir because this dir is shared for both runs and the last one overwrites the data. Now we store results into separate dirs. Signed-off-by: Egor Lazarchuk <[email protected]>
1 parent a619613 commit d2939ef

File tree

2 files changed

+15
-11
lines changed

2 files changed

+15
-11
lines changed

tests/framework/ab_test.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ def git_ab_test(
103103

104104

105105
def binary_ab_test(
106-
test_runner: Callable[[Path, bool], T],
106+
test_runner: Callable[[str, Path, bool], T],
107107
comparator: Callable[[T, T], U] = default_comparator,
108108
*,
109109
a_directory: Path = DEFAULT_A_DIRECTORY,
@@ -113,8 +113,8 @@ def binary_ab_test(
113113
Similar to `git_ab_test`, but instead of locally checking out different revisions, it operates on
114114
directories containing firecracker/jailer binaries
115115
"""
116-
result_a = test_runner(a_directory, True)
117-
result_b = test_runner(b_directory, False)
116+
result_a = test_runner("A", a_directory, True)
117+
result_b = test_runner("B", b_directory, False)
118118

119119
return result_a, result_b, comparator(result_a, result_b)
120120

tools/ab_test.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
between the two runs, performing statistical regression test across all the list-
2020
valued properties collected.
2121
"""
22+
2223
import argparse
2324
import json
2425
import os
@@ -180,13 +181,17 @@ def uninteresting_dimensions(processed_emf):
180181
return uninteresting
181182

182183

183-
def collect_data(binary_dir: Path, pytest_opts: str):
184-
"""Executes the specified test using the provided firecracker binaries"""
184+
def collect_data(tag: str, binary_dir: Path, pytest_opts: str):
185+
"""
186+
Executes the specified test using the provided firecracker binaries and
187+
stores results into the `test_results/tag` directory
188+
"""
185189
binary_dir = binary_dir.resolve()
186190

187191
print(f"Collecting samples with {binary_dir}")
192+
test_report_path = f"test_results/{tag}/test-report.json"
188193
subprocess.run(
189-
f"./tools/test.sh --binary-dir={binary_dir} {pytest_opts} -m ''",
194+
f"./tools/test.sh --binary-dir={binary_dir} {pytest_opts} -m '' --json-report-file=../{test_report_path}",
190195
env=os.environ
191196
| {
192197
"AWS_EMF_ENVIRONMENT": "local",
@@ -195,9 +200,8 @@ def collect_data(binary_dir: Path, pytest_opts: str):
195200
check=True,
196201
shell=True,
197202
)
198-
return load_data_series(
199-
Path("test_results/test-report.json"), binary_dir, reemit=True
200-
)
203+
204+
return load_data_series(Path(test_report_path), binary_dir, reemit=True)
201205

202206

203207
def analyze_data(
@@ -327,7 +331,7 @@ def analyze_data(
327331
f"for metric \033[1m{metric}\033[0m with \033[0;31m\033[1mp={result.pvalue}\033[0m. "
328332
f"This means that observing a change of this magnitude or worse, assuming that performance "
329333
f"characteristics did not change across the tested commits, has a probability of {result.pvalue:.2%}. "
330-
f"Tested Dimensions:\n{json.dumps({k: v for k,v in dimension_set if k not in do_not_print_list}, indent=2, sort_keys=True)}"
334+
f"Tested Dimensions:\n{json.dumps({k: v for k, v in dimension_set if k not in do_not_print_list}, indent=2, sort_keys=True)}"
331335
)
332336
messages.append(msg)
333337

@@ -346,7 +350,7 @@ def ab_performance_test(
346350
"""Does an A/B-test of the specified test with the given firecracker/jailer binaries"""
347351

348352
return binary_ab_test(
349-
lambda bin_dir, _: collect_data(bin_dir, pytest_opts),
353+
lambda tag, bin_dir, _: collect_data(tag, bin_dir, pytest_opts),
350354
lambda ah, be: analyze_data(
351355
ah,
352356
be,

0 commit comments

Comments
 (0)