Add a feature for using the same number of loops as a previous run (#327)

mdboom · web-flow · commit 79f80a4c75d8 · 2024-02-01T17:31:31.000-07:00
Motivation:

On the Faster CPython team, we often collect pystats (counters of various interpreter events) by running the benchmark suite.  It is very useful to compare the stats between two commits to see how a pull request affects the interpreter.  Unfortunately, with pyperformance's default behavior where the number of loops is automatically calibrated, each benchmark may not be run the same number of times from run-to-run, making the data hard to compare.

This change adds a new argument to the "run" command which will use the same number of loops as a previous run.  The loops for each benchmark is looked up from the metadata in the .json output of that previous run, and passed to the underlying call to pyperf using the --loops argument.

Additionally, this modifies one of the benchmarks (sqlglot) to be compatible with that scheme.  sqlglot is the only run_benchmark.py script that runs multiple benchmarks within it in a single call to the script.  This makes it impossible to set the number of loops independently for each of these benchmarks.  It's been updated to use the pattern from other "suites" of benchmarks (e.g. async_tree) where each benchmark has its own .toml file and is run independently.  This should still be backward compatible with older data collected from this benchmark, but doing "pyperformance run -b sqlglot" will now only run a single benchmark.
diff --git a/doc/changelog.rst b/doc/changelog.rst
@@ -1,6 +1,9 @@
 Changelog
 =========
 
+* Add a --same-loops option to the run command to use the exact same number of
+  loops as a previous run (without recalibrating).
+
 Version 1.10.0 (2023-10-22)
 --------------
 * Add benchmark for asyncio_webockets
diff --git a/doc/usage.rst b/doc/usage.rst
@@ -140,6 +140,10 @@ options::
   -p PYTHON, --python PYTHON
                         Python executable (default: use running
                         Python)
+  --same-loops SAME_LOOPS
+                        Use the same number of loops as a previous run
+                        (i.e., don't recalibrate). Should be a path to a
+                        .json file from a previous run.
 
 show
 ----
diff --git a/pyperformance/cli.py b/pyperformance/cli.py
@@ -75,6 +75,10 @@ def parse_args():
     cmd.add_argument("--min-time", metavar="MIN_TIME",
                      help="Minimum duration in seconds of a single "
                      "value, used to calibrate the number of loops")
+    cmd.add_argument("--same-loops",
+                     help="Use the same number of loops as a previous run "
+                     "(i.e., don't recalibrate). Should be a path to a "
+                     ".json file from a previous run.")
     filter_opts(cmd)
 
     # show
diff --git a/pyperformance/compile.py b/pyperformance/compile.py
@@ -543,6 +543,8 @@ def run_benchmark(self, python=None):
             cmd.extend(('--affinity', self.conf.affinity))
         if self.conf.debug:
             cmd.append('--debug-single-value')
+        if self.conf.same_loops:
+            cmd.append('--same_loops=%s' % self.conf.same_loops)
         exitcode = self.run_nocheck(*cmd)
 
         if os.path.exists(self.filename):
@@ -812,6 +814,7 @@ def getint(section, key, default=None):
         conf.benchmarks = getstr('run_benchmark', 'benchmarks', default='')
         conf.affinity = getstr('run_benchmark', 'affinity', default='')
         conf.upload = getboolean('run_benchmark', 'upload', False)
+        conf.same_loops = getfile('run_benchmark', 'same_loops', default='')
 
         # paths
         conf.build_dir = os.path.join(conf.directory, 'build')
diff --git a/pyperformance/data-files/benchmarks/MANIFEST b/pyperformance/data-files/benchmarks/MANIFEST
@@ -77,6 +77,9 @@ spectral_norm	<local>
 sqlalchemy_declarative	<local>
 sqlalchemy_imperative	<local>
 sqlglot	<local>
+sqlglot_parse	<local:sqlglot>
+sqlglot_transpile	<local:sqlglot>
+sqlglot_optimize	<local:sqlglot>
 sqlite_synth	<local>
 sympy	<local>
 telco	<local>
diff --git a/pyperformance/data-files/benchmarks/bm_sqlglot/bm_sqlglot_optimize.toml b/pyperformance/data-files/benchmarks/bm_sqlglot/bm_sqlglot_optimize.toml
@@ -0,0 +1,3 @@
+[tool.pyperformance]
+name = "sqlglot_optimize"
+extra_opts = ["optimize"]
diff --git a/pyperformance/data-files/benchmarks/bm_sqlglot/bm_sqlglot_parse.toml b/pyperformance/data-files/benchmarks/bm_sqlglot/bm_sqlglot_parse.toml
@@ -0,0 +1,3 @@
+[tool.pyperformance]
+name = "sqlglot_parse"
+extra_opts = ["parse"]
diff --git a/pyperformance/data-files/benchmarks/bm_sqlglot/bm_sqlglot_transpile.toml b/pyperformance/data-files/benchmarks/bm_sqlglot/bm_sqlglot_transpile.toml
@@ -0,0 +1,3 @@
+[tool.pyperformance]
+name = "sqlglot_transpile"
+extra_opts = ["transpile"]
diff --git a/pyperformance/data-files/benchmarks/bm_sqlglot/pyproject.toml b/pyperformance/data-files/benchmarks/bm_sqlglot/pyproject.toml
@@ -10,3 +10,4 @@ dynamic = ["version"]
 
 [tool.pyperformance]
 name = "sqlglot"
+extra_opts = ["normalize"]
diff --git a/pyperformance/data-files/benchmarks/bm_sqlglot/run_benchmark.py b/pyperformance/data-files/benchmarks/bm_sqlglot/run_benchmark.py
@@ -164,10 +164,31 @@ def bench_normalize(loops):
     return elapsed
 
 
+BENCHMARKS = {
+    "parse": bench_parse,
+    "transpile": bench_transpile,
+    "optimize": bench_optimize,
+    "normalize": bench_normalize
+}
+
+
+def add_cmdline_args(cmd, args):
+    cmd.append(args.benchmark)
+
+
+def add_parser_args(parser):
+    parser.add_argument(
+        "benchmark",
+        choices=BENCHMARKS,
+        help="Which benchmark to run."
+    )
+
+
 if __name__ == "__main__":
-    runner = pyperf.Runner()
+    runner = pyperf.Runner(add_cmdline_args=add_cmdline_args)
     runner.metadata['description'] = "SQLGlot benchmark"
-    runner.bench_time_func("sqlglot_parse", bench_parse)
-    runner.bench_time_func("sqlglot_transpile", bench_transpile)
-    runner.bench_time_func("sqlglot_optimize", bench_optimize)
-    runner.bench_time_func("sqlglot_normalize", bench_normalize)
+    add_parser_args(runner.argparser)
+    args = runner.parse_args()
+    benchmark = args.benchmark
+
+    runner.bench_time_func(f"sqlglot_{benchmark}", BENCHMARKS[benchmark])
diff --git a/pyperformance/run.py b/pyperformance/run.py
@@ -1,5 +1,6 @@
 from collections import namedtuple
 import hashlib
+import json
 import sys
 import time
 import traceback
@@ -50,7 +51,28 @@ def get_run_id(python, bench=None):
     return RunID(py_id, compat_id, bench, ts)
 
 
+def get_loops_from_file(filename):
+    with open(filename) as fd:
+        data = json.load(fd)
+
+    loops = {}
+    for benchmark in data["benchmarks"]:
+        metadata = benchmark.get("metadata", data["metadata"])
+        name = metadata["name"]
+        if name.endswith("_none"):
+            name = name[:-len("_none")]
+        if "loops" in metadata:
+            loops[name] = metadata["loops"]
+
+    return loops
+
+
 def run_benchmarks(should_run, python, options):
+    if options.same_loops is not None:
+        loops = get_loops_from_file(options.same_loops)
+    else:
+        loops = {}
+
     to_run = sorted(should_run)
 
     info = _pythoninfo.get_info(python)
@@ -136,6 +158,9 @@ def add_bench(dest_suite, obj):
 
             return dest_suite
 
+        if name in loops:
+            pyperf_opts.append(f"--loops={loops[name]}")
+
         bench_venv, bench_runid = benchmarks.get(bench)
         if bench_venv is None:
             print("ERROR: Benchmark %s failed: could not install requirements" % name)

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+[tool.pyperformance]`
	`2`	`+name = "sqlglot_optimize"`
	`3`	`+extra_opts = ["optimize"]`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+[tool.pyperformance]`
	`2`	`+name = "sqlglot_parse"`
	`3`	`+extra_opts = ["parse"]`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+[tool.pyperformance]`
	`2`	`+name = "sqlglot_transpile"`
	`3`	`+extra_opts = ["transpile"]`
Original file line number	Diff line number	Diff line change
`@@ -10,3 +10,4 @@ dynamic = ["version"]`
`10`	`10`
`11`	`11`	`[tool.pyperformance]`
`12`	`12`	`name = "sqlglot"`
	`13`	`+extra_opts = ["normalize"]`