ionelmc · ntninja · Apr 9, 2020 · ionelmc · May 10, 2020 · ntninja
diff --git a/AUTHORS.rst b/AUTHORS.rst
@@ -28,3 +28,4 @@ Authors
 * Stanislav Levin - https://github.com/stanislavlevin
 * Grygorii Iermolenko - https://github.com/gyermolenko
 * Jonathan Simon Prates - https://github.com/jonathansp
+* Alexander Schlarb – https://ninetailed.ninja/
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -2,6 +2,12 @@
 Changelog
 =========
 
+Future
+------
+
+* Add ``baseline`` boolean option to benchmark options to allow adding benchmarks
+  for comparision that do not affect relative scores.
+
 3.2.3 (2020-01-10)
 ------------------
 

diff --git a/docs/usage.rst b/docs/usage.rst
@@ -240,6 +240,7 @@ You can set per-test options with the ``benchmark`` marker:
 
     @pytest.mark.benchmark(
         group="group-name",
+        baseline=True,
         min_time=0.1,
         max_time=0.5,
         min_rounds=5,
@@ -258,6 +259,20 @@ You can set per-test options with the ``benchmark`` marker:
         # Note: this code is not measured.
         assert result is None
 
+Additionally to the options whose name coincides with the relevant
+command-line options, this allows modifying the following values:
+
+``group``
+    A user-defined group name that this benchmark belongs to. Use this
+    to group related benchmarks for comparing values in the results
+    printed by pytest.
+
+``baseline``
+    Whether this benchmark's results should be considered as possible
+    base line values when comparing them to other results of the same
+    group? Use this if you want to include some results just for
+    comparison, without them affecting the relative scores displayed
+    for other results.
 
 Extra info
 ==========

diff --git a/src/pytest_benchmark/fixture.py b/src/pytest_benchmark/fixture.py
@@ -33,8 +33,9 @@ class BenchmarkFixture(object):
     _precisions = {}
 
     def __init__(self, node, disable_gc, timer, min_rounds, min_time, max_time, warmup, warmup_iterations,
-                 calibration_precision, add_stats, logger, warner, disabled, cprofile, group=None):
+                 calibration_precision, add_stats, logger, warner, disabled, cprofile, group=None, baseline=True):
         self.name = node.name
+        self.baseline = baseline
         self.fullname = node._nodeid
         self.disabled = disabled
         if hasattr(node, 'callspec'):

diff --git a/src/pytest_benchmark/plugin.py b/src/pytest_benchmark/plugin.py
@@ -424,7 +424,7 @@ def pytest_runtest_setup(item):
         for name in marker.kwargs:
             if name not in (
                     "max_time", "min_rounds", "min_time", "timer", "group", "disable_gc", "warmup",
-                    "warmup_iterations", "calibration_precision", "cprofile"):
+                    "warmup_iterations", "calibration_precision", "cprofile", "baseline"):
                 raise ValueError("benchmark mark can't have %r keyword argument." % name)
 
 

diff --git a/src/pytest_benchmark/stats.py b/src/pytest_benchmark/stats.py
@@ -172,6 +172,7 @@ def ops(self):
 class Metadata(object):
     def __init__(self, fixture, iterations, options):
         self.name = fixture.name
+        self.baseline = fixture.baseline
         self.fullname = fixture.fullname
         self.group = fixture.group
         self.param = fixture.param
@@ -210,6 +211,7 @@ def as_dict(self, include_data=True, flat=False, stats=True, cprofile=None):
         result = {
             "group": self.group,
             "name": self.name,
+            "baseline": self.baseline,
             "fullname": self.fullname,
             "params": self.params,
             "param": self.param,

diff --git a/src/pytest_benchmark/table.py b/src/pytest_benchmark/table.py
@@ -29,6 +29,7 @@ def display(self, tr, groups, progress_reporter=report_progress):
                 bench["name"] = self.name_format(bench)
 
             worst = {}
+            baseline = {}
             best = {}
             solo = len(benchmarks) == 1
             for line, prop in progress_reporter(("min", "max", "mean", "median", "iqr", "stddev", "ops"),
@@ -38,11 +39,23 @@ def display(self, tr, groups, progress_reporter=report_progress):
                         benchmarks, tr, "{line} ({pos}/{total})", line=line))
                     best[prop] = max(bench[prop] for _, bench in progress_reporter(
                         benchmarks, tr, "{line} ({pos}/{total})", line=line))
+                    try:
+                        baseline[prop] = max(bench[prop] for _, bench in progress_reporter(
+                            benchmarks, tr, "{line} ({pos}/{total})", line=line)
+                            if bench.get("baseline", True))
+                    except ValueError:
+                        baseline[prop] = None
                 else:
                     worst[prop] = max(bench[prop] for _, bench in progress_reporter(
                         benchmarks, tr, "{line} ({pos}/{total})", line=line))
                     best[prop] = min(bench[prop] for _, bench in progress_reporter(
                         benchmarks, tr, "{line} ({pos}/{total})", line=line))
+                    try:
+                        baseline[prop] = min(bench[prop] for _, bench in progress_reporter(
+                            benchmarks, tr, "{line} ({pos}/{total})", line=line)
+                            if bench.get("baseline", True))
+                    except ValueError:
+                        baseline[prop] = None
             for line, prop in progress_reporter(("outliers", "rounds", "iterations"), tr, "{line}: {value}", line=line):
                 worst[prop] = max(benchmark[prop] for _, benchmark in progress_reporter(
                     benchmarks, tr, "{line} ({pos}/{total})", line=line))
@@ -106,7 +119,7 @@ def display(self, tr, groups, progress_reporter=report_progress):
                             ALIGNED_NUMBER_FMT.format(
                                 bench[prop] * adjustment,
                                 widths[prop],
-                                compute_baseline_scale(best[prop], bench[prop], rpadding),
+                                compute_baseline_scale(baseline[prop], bench[prop], rpadding),
                                 rpadding
                             ),
                             green=not solo and bench[prop] == best.get(prop),
@@ -118,7 +131,7 @@ def display(self, tr, groups, progress_reporter=report_progress):
                             ALIGNED_NUMBER_FMT.format(
                                 bench[prop] * ops_adjustment,
                                 widths[prop],
-                                compute_baseline_scale(best[prop], bench[prop], rpadding),
+                                compute_baseline_scale(baseline[prop], bench[prop], rpadding),
                                 rpadding
                             ),
                             green=not solo and bench[prop] == best.get(prop),
@@ -147,7 +160,7 @@ def display(self, tr, groups, progress_reporter=report_progress):
 
 
 def compute_baseline_scale(baseline, value, width):
-    if not width:
+    if not width or baseline is None:
         return ""
     if value == baseline:
         return " (1.0)".ljust(width)

diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
@@ -1096,3 +1096,26 @@ def test_columns(testdir):
         "Name (time in ?s) * Max * Iterations * Min *",
         "------*",
     ])
+
+
+def test_report_table_order(testdir):
+    test = testdir.makepyfile('''
+import time
+import pytest
+
+@pytest.mark.benchmark(baseline=False)
+def test_fast(benchmark):
+    @benchmark
+    def result():
+        return time.sleep(0.000001)
+    assert result == None
+
+def test_slow(benchmark):
+    benchmark(lambda: time.sleep(0.1))
+    assert 1 == 1
+''')
+    result = testdir.runpytest_subprocess(test)
+    result.stdout.fnmatch_lines([
+        "test_fast         * (0.*)  *",
+        "test_slow         * (1.0)  *"
+    ])