Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,4 @@ Authors
* Stanislav Levin - https://github.com/stanislavlevin
* Grygorii Iermolenko - https://github.com/gyermolenko
* Jonathan Simon Prates - https://github.com/jonathansp
* Alexander Schlarb – https://ninetailed.ninja/
6 changes: 6 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
Changelog
=========

Future
------

* Add ``baseline`` boolean option to benchmark options to allow adding benchmarks
for comparision that do not affect relative scores.

3.2.3 (2020-01-10)
------------------

Expand Down
15 changes: 15 additions & 0 deletions docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ You can set per-test options with the ``benchmark`` marker:

@pytest.mark.benchmark(
group="group-name",
baseline=True,
min_time=0.1,
max_time=0.5,
min_rounds=5,
Expand All @@ -258,6 +259,20 @@ You can set per-test options with the ``benchmark`` marker:
# Note: this code is not measured.
assert result is None

Additionally to the options whose name coincides with the relevant
command-line options, this allows modifying the following values:

``group``
A user-defined group name that this benchmark belongs to. Use this
to group related benchmarks for comparing values in the results
printed by pytest.

``baseline``
Whether this benchmark's results should be considered as possible
base line values when comparing them to other results of the same
group? Use this if you want to include some results just for
comparison, without them affecting the relative scores displayed
for other results.

Extra info
==========
Expand Down
3 changes: 2 additions & 1 deletion src/pytest_benchmark/fixture.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,9 @@ class BenchmarkFixture(object):
_precisions = {}

def __init__(self, node, disable_gc, timer, min_rounds, min_time, max_time, warmup, warmup_iterations,
calibration_precision, add_stats, logger, warner, disabled, cprofile, group=None):
calibration_precision, add_stats, logger, warner, disabled, cprofile, group=None, baseline=True):
self.name = node.name
self.baseline = baseline
self.fullname = node._nodeid
self.disabled = disabled
if hasattr(node, 'callspec'):
Expand Down
2 changes: 1 addition & 1 deletion src/pytest_benchmark/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ def pytest_runtest_setup(item):
for name in marker.kwargs:
if name not in (
"max_time", "min_rounds", "min_time", "timer", "group", "disable_gc", "warmup",
"warmup_iterations", "calibration_precision", "cprofile"):
"warmup_iterations", "calibration_precision", "cprofile", "baseline"):
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now sure how but we should have some way to validate that there is only 1 baseline per group. It doesn't make sense to have 2 baselines right? Lets not let users wonder why stuff doesn't work as expected (the annoying silent failure).

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The current implementation marks all results as possible baselines by default and only excludes the ones marked as baseline=False. If there is more than one baseline=True benchmark available it will choose the one with the lowest value/highest score. This perfectly integrates with the existing behaviour and means that I don't have to choose the baseline value selected for all times (as performance may differ between systems, etc). The included docs actually mention this. As you mentioned there cannot be baselines when the output is rendered, but there can be more than one potential baseline scores.

Unless you have strong feelings on this, I'd like to keep it this way for extra flexibility. The wording could be improved however: maybe something along the lines of potential_baseline, but shorter?

raise ValueError("benchmark mark can't have %r keyword argument." % name)


Expand Down
2 changes: 2 additions & 0 deletions src/pytest_benchmark/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ def ops(self):
class Metadata(object):
def __init__(self, fixture, iterations, options):
self.name = fixture.name
self.baseline = fixture.baseline
self.fullname = fixture.fullname
self.group = fixture.group
self.param = fixture.param
Expand Down Expand Up @@ -210,6 +211,7 @@ def as_dict(self, include_data=True, flat=False, stats=True, cprofile=None):
result = {
"group": self.group,
"name": self.name,
"baseline": self.baseline,
"fullname": self.fullname,
"params": self.params,
"param": self.param,
Expand Down
19 changes: 16 additions & 3 deletions src/pytest_benchmark/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def display(self, tr, groups, progress_reporter=report_progress):
bench["name"] = self.name_format(bench)

worst = {}
baseline = {}
best = {}
solo = len(benchmarks) == 1
for line, prop in progress_reporter(("min", "max", "mean", "median", "iqr", "stddev", "ops"),
Expand All @@ -38,11 +39,23 @@ def display(self, tr, groups, progress_reporter=report_progress):
benchmarks, tr, "{line} ({pos}/{total})", line=line))
best[prop] = max(bench[prop] for _, bench in progress_reporter(
benchmarks, tr, "{line} ({pos}/{total})", line=line))
try:
baseline[prop] = max(bench[prop] for _, bench in progress_reporter(
benchmarks, tr, "{line} ({pos}/{total})", line=line)
if bench.get("baseline", True))
except ValueError:
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we avoid the try/except somehow? What actually can raise that error?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If there is no benchmark in the group marked as baseline this will end up calling max(()) which raises ValueError: max() argument is empty sequence.

Should I convert this to an if? It would require evaluating the array of values up-front, then check their len(…). Or do you prefer me adding a comment about this?

baseline[prop] = None
else:
worst[prop] = max(bench[prop] for _, bench in progress_reporter(
benchmarks, tr, "{line} ({pos}/{total})", line=line))
best[prop] = min(bench[prop] for _, bench in progress_reporter(
benchmarks, tr, "{line} ({pos}/{total})", line=line))
try:
baseline[prop] = min(bench[prop] for _, bench in progress_reporter(
benchmarks, tr, "{line} ({pos}/{total})", line=line)
if bench.get("baseline", True))
except ValueError:
baseline[prop] = None
for line, prop in progress_reporter(("outliers", "rounds", "iterations"), tr, "{line}: {value}", line=line):
worst[prop] = max(benchmark[prop] for _, benchmark in progress_reporter(
benchmarks, tr, "{line} ({pos}/{total})", line=line))
Expand Down Expand Up @@ -106,7 +119,7 @@ def display(self, tr, groups, progress_reporter=report_progress):
ALIGNED_NUMBER_FMT.format(
bench[prop] * adjustment,
widths[prop],
compute_baseline_scale(best[prop], bench[prop], rpadding),
compute_baseline_scale(baseline[prop], bench[prop], rpadding),
rpadding
),
green=not solo and bench[prop] == best.get(prop),
Expand All @@ -118,7 +131,7 @@ def display(self, tr, groups, progress_reporter=report_progress):
ALIGNED_NUMBER_FMT.format(
bench[prop] * ops_adjustment,
widths[prop],
compute_baseline_scale(best[prop], bench[prop], rpadding),
compute_baseline_scale(baseline[prop], bench[prop], rpadding),
rpadding
),
green=not solo and bench[prop] == best.get(prop),
Expand Down Expand Up @@ -147,7 +160,7 @@ def display(self, tr, groups, progress_reporter=report_progress):


def compute_baseline_scale(baseline, value, width):
if not width:
if not width or baseline is None:
return ""
if value == baseline:
return " (1.0)".ljust(width)
Expand Down
23 changes: 23 additions & 0 deletions tests/test_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -1096,3 +1096,26 @@ def test_columns(testdir):
"Name (time in ?s) * Max * Iterations * Min *",
"------*",
])


def test_report_table_order(testdir):
test = testdir.makepyfile('''
import time
import pytest

@pytest.mark.benchmark(baseline=False)
def test_fast(benchmark):
@benchmark
def result():
return time.sleep(0.000001)
assert result == None

def test_slow(benchmark):
benchmark(lambda: time.sleep(0.1))
assert 1 == 1
''')
result = testdir.runpytest_subprocess(test)
result.stdout.fnmatch_lines([
"test_fast * (0.*) *",
"test_slow * (1.0) *"
])