test: pin criterion benchmark to CPU

roypat · pb8o · commit 038c0e0627c5 · 2023-06-08T15:42:19.000+02:00
Pinning the benchmark executable to a single thread should decrease
variance. Note that to avoid having cargo run single-threaded, we
separate the compilation and execution steps.

Signed-off-by: Patrick Roy &lt;roypat@amazon.co.uk&gt;
diff --git a/tests/integration_tests/performance/test_benchmarks.py b/tests/integration_tests/performance/test_benchmarks.py
@@ -16,6 +16,34 @@
 LOGGER = logging.getLogger(__name__)
 
 
+def cargo_bench():
+    """Executes all benchmarks by running "cargo bench --no-run", finding the executables, and running them pinned to some CPU"""
+    # Passing --message-format json to cargo tells it to print its log in a json format. At the end, instead of the
+    # usual "placed executable <...> at <...>" we'll get a json object with an 'executable' key, from which we
+    # extract the path to the compiled benchmark binary.
+    _, stdout, _ = cargo(
+        "bench",
+        f"--all --quiet --target {platform.machine()}-unknown-linux-musl --message-format json --no-run",
+    )
+
+    executables = []
+    for line in stdout.split("\n"):
+        if line:
+            msg = json.loads(line)
+            executable = msg.get("executable")
+            if executable:
+                executables.append(executable)
+
+    output = ""
+
+    for executable in executables:
+        output += utils.run_cmd(
+            f"CARGO_TARGET_DIR=../build/cargo_target taskset -c 1 {executable} --bench"
+        ).stdout
+
+    return output
+
+
 @pytest.mark.no_block_pr
 @pytest.mark.timeout(600)
 def test_no_regression_relative_to_target_branch():
@@ -27,14 +55,12 @@ def test_no_regression_relative_to_target_branch():
     # the test was originally executed
     _, pr_head_commit_sha, _ = utils.run_cmd("git rev-parse HEAD")
     utils.run_cmd(f"git switch {TARGET_BRANCH}")
-    cargo("bench", f"--all --quiet --target {platform.machine()}-unknown-linux-musl")
+    cargo_bench()
 
     # Switch back to pull request, and run benchmarks again. Criterion will automatically notice that
     # data from a previous run exists, and do a comparison
     utils.run_cmd(f"git checkout {pr_head_commit_sha}")
-    _, criterion_output, _ = cargo(
-        "bench", f"--all --quiet --target {platform.machine()}-unknown-linux-musl"
-    )
+    criterion_output = cargo_bench()
 
     # Criterion separates reports for benchmarks by two newlines. We filter and print the ones
     # that contain the string 'Performance has regression.', which criterion uses to indicate a regression