diff --git a/docs/usage.rst b/docs/usage.rst index 028f6ea..2e868d5 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -159,7 +159,8 @@ Commandline options @pytest.parametrize. Default: 'group' --benchmark-columns=LABELS Comma-separated list of columns to show in the result - table. Default: 'min, max, mean, stddev, median, iqr, + table. Use 'pXX.XX' (e.g. 'p99.9') to show percentiles. + Default: 'min, max, mean, stddev, median, iqr, outliers, rounds, iterations' --benchmark-name=FORMAT How to format names in results. Can be one of 'short', diff --git a/src/pytest_benchmark/plugin.py b/src/pytest_benchmark/plugin.py index 1275cbd..0f05477 100644 --- a/src/pytest_benchmark/plugin.py +++ b/src/pytest_benchmark/plugin.py @@ -69,8 +69,8 @@ def add_display_options(addoption, prefix="benchmark-"): "--{0}columns".format(prefix), metavar="LABELS", type=parse_columns, default=["min", "max", "mean", "stddev", "median", "iqr", "outliers", "ops", "rounds", "iterations"], - help="Comma-separated list of columns to show in the result table. Default: " - "'min, max, mean, stddev, median, iqr, outliers, rounds, iterations'" + help="Comma-separated list of columns to show in the result table. Use 'pXX.XX' (e.g. 'p99.9') to show " + "percentiles. Default: 'min, max, mean, stddev, median, iqr, outliers, rounds, iterations'" ) addoption( "--{0}name".format(prefix), @@ -374,9 +374,10 @@ def pytest_benchmark_generate_json(config, benchmarks, include_data, machine_inf "datetime": datetime.utcnow().isoformat(), "version": __version__, } + columns = config.getoption("benchmark_columns") for bench in benchmarks: if not bench.has_error: - benchmarks_json.append(bench.as_dict(include_data=include_data)) + benchmarks_json.append(bench.as_dict(include_data=include_data, columns=columns)) return output_json diff --git a/src/pytest_benchmark/session.py b/src/pytest_benchmark/session.py index e399144..e159265 100644 --- a/src/pytest_benchmark/session.py +++ b/src/pytest_benchmark/session.py @@ -120,7 +120,12 @@ def prepare_benchmarks(self): if fail: self.performance_regressions.append((self.name_format(flat_bench), fail)) yield flat_bench - flat_bench = bench.as_dict(include_data=False, flat=True, cprofile=self.cprofile_sort_by) + flat_bench = bench.as_dict( + include_data=False, + flat=True, + cprofile=self.cprofile_sort_by, + columns=self.columns + ) flat_bench["path"] = None flat_bench["source"] = compared and "NOW" yield flat_bench diff --git a/src/pytest_benchmark/stats.py b/src/pytest_benchmark/stats.py index ddb5202..66de12a 100644 --- a/src/pytest_benchmark/stats.py +++ b/src/pytest_benchmark/stats.py @@ -6,6 +6,7 @@ from bisect import bisect_left from bisect import bisect_right +from .utils import PERCENTILE_COL_RX from .utils import cached_property from .utils import funcname from .utils import get_cprofile_functions @@ -26,10 +27,11 @@ def __bool__(self): def __nonzero__(self): return bool(self.data) - def as_dict(self): + def as_dict(self, extra_fields=None): + fields = Stats.fields + tuple(extra_fields) if extra_fields else Stats.fields return dict( (field, getattr(self, field)) - for field in self.fields + for field in fields ) def update(self, duration): @@ -168,6 +170,52 @@ def ops(self): return self.rounds / self.total return 0 + def __getattr__(self, name): + m = PERCENTILE_COL_RX.match(name) + if not m: + raise AttributeError(name) + + p = float(m.group(1)) / 100.0 + return self.percentile(p) + + def percentile(self, percent): + ''' Compute the interpolated percentile. + + This is the method recommmended by NIST: + http://www.itl.nist.gov/div898/handbook/prc/section2/prc262.htm + + percent must be in the range [0.0, 1.0]. + ''' + if not (0.0 <= percent <= 1.0): + raise ValueError('percent must be in the range [0.0, 1.0]') + + if not hasattr(self, '_percentile_cache'): + self._percentile_cache = {} + + # Check the cache first + # This isn't perfect with floats for the usual reasons, but is good enough + cached = self._percentile_cache.get(percent) + if cached is not None: + return cached + + # percentiles require sorted data + data = self.sorted_data + N = len(data) + if percent <= 1/(N+1): + # Too small, return min + return self._percentile_cache.setdefault(percent, data[0]) + elif percent >= N/(N+1): + # too big, return max + return self._percentile_cache.setdefault(percent, data[-1]) + else: + r = percent * (N + 1) + k = r // 1 + d = r % 1 + + n = int(k - 1) # zero-indexed lists + result = data[n] + d * (data[n+1] - data[n]) + return self._percentile_cache.setdefault(percent, result) + class Metadata(object): def __init__(self, fixture, iterations, options): @@ -180,9 +228,9 @@ def __init__(self, fixture, iterations, options): self.cprofile_stats = fixture.cprofile_stats self.iterations = iterations - self.stats = Stats() self.options = options self.fixture = fixture + self.stats = Stats() def __bool__(self): return bool(self.stats) @@ -206,7 +254,7 @@ def __getitem__(self, key): def has_error(self): return self.fixture.has_error - def as_dict(self, include_data=True, flat=False, stats=True, cprofile=None): + def as_dict(self, include_data=True, flat=False, stats=True, cprofile=None, columns=None): result = { "group": self.group, "name": self.name, @@ -236,7 +284,12 @@ def as_dict(self, include_data=True, flat=False, stats=True, cprofile=None): if cprofile is None or len(cprofile_functions) == len(cprofile_list): break if stats: - stats = self.stats.as_dict() + if columns is not None: + extra_fields = tuple(c for c in columns if c not in Stats.fields and PERCENTILE_COL_RX.match(c)) + else: + extra_fields = None + + stats = self.stats.as_dict(extra_fields=extra_fields) if include_data: stats["data"] = self.stats.data stats["iterations"] = self.iterations diff --git a/src/pytest_benchmark/table.py b/src/pytest_benchmark/table.py index ed086e5..943eb53 100644 --- a/src/pytest_benchmark/table.py +++ b/src/pytest_benchmark/table.py @@ -5,6 +5,7 @@ import sys from math import isinf +from .utils import PERCENTILE_COL_RX from .utils import operations_unit from .utils import report_progress from .utils import time_unit @@ -14,6 +15,8 @@ class TableResults(object): + standard_columns = ("min", "max", "mean", "stddev", "median", "iqr") + def __init__(self, columns, sort, histogram, name_format, logger): self.columns = columns self.sort = sort @@ -22,6 +25,9 @@ def __init__(self, columns, sort, histogram, name_format, logger): self.logger = logger def display(self, tr, groups, progress_reporter=report_progress): + percentile_columns = tuple(c for c in self.columns if PERCENTILE_COL_RX.match(c)) + numeric_columns = self.standard_columns + percentile_columns + tr.write_line("") tr.rewrite("Computing stats ...", black=True, bold=True) for line, (group, benchmarks) in progress_reporter(groups, tr, "Computing stats ... group {pos}/{total}"): @@ -32,8 +38,7 @@ def display(self, tr, groups, progress_reporter=report_progress): worst = {} best = {} solo = len(benchmarks) == 1 - for line, prop in progress_reporter(("min", "max", "mean", "median", "iqr", "stddev", "ops"), - tr, "{line}: {value}", line=line): + for line, prop in progress_reporter(numeric_columns + ("ops",), tr, "{line}: {value}", line=line): if prop == "ops": worst[prop] = min(bench[prop] for _, bench in progress_reporter( benchmarks, tr, "{line} ({pos}/{total})", line=line)) @@ -66,6 +71,8 @@ def display(self, tr, groups, progress_reporter=report_progress): "outliers": "Outliers", "ops": "OPS ({0}ops/s)".format(ops_unit) if ops_unit else "OPS", } + labels.update(dict((c, c.upper()) for c in percentile_columns)) + widths = { "name": 3 + max(len(labels["name"]), max(len(benchmark["name"]) for benchmark in benchmarks)), "rounds": 2 + max(len(labels["rounds"]), len(str(worst["rounds"]))), @@ -73,7 +80,7 @@ def display(self, tr, groups, progress_reporter=report_progress): "outliers": 2 + max(len(labels["outliers"]), len(str(worst["outliers"]))), "ops": 2 + max(len(labels["ops"]), len(NUMBER_FMT.format(best["ops"] * ops_adjustment))), } - for prop in "min", "max", "mean", "stddev", "median", "iqr": + for prop in numeric_columns: widths[prop] = 2 + max(len(labels[prop]), max( len(NUMBER_FMT.format(bench[prop] * adjustment)) for bench in benchmarks @@ -83,7 +90,7 @@ def display(self, tr, groups, progress_reporter=report_progress): labels_line = labels["name"].ljust(widths["name"]) + "".join( labels[prop].rjust(widths[prop]) + ( " " * rpadding - if prop not in ["outliers", "rounds", "iterations"] + if prop not in ("outliers", "rounds", "iterations") else "" ) for prop in self.columns @@ -103,7 +110,7 @@ def display(self, tr, groups, progress_reporter=report_progress): has_error = bench.get("has_error") tr.write(bench["name"].ljust(widths["name"]), red=has_error, invert=has_error) for prop in self.columns: - if prop in ("min", "max", "mean", "stddev", "median", "iqr"): + if prop in numeric_columns: tr.write( ALIGNED_NUMBER_FMT.format( bench[prop] * adjustment, diff --git a/src/pytest_benchmark/utils.py b/src/pytest_benchmark/utils.py index d9a18f6..3bf9ed8 100644 --- a/src/pytest_benchmark/utils.py +++ b/src/pytest_benchmark/utils.py @@ -56,6 +56,7 @@ def check_output(*popenargs, **kwargs): "n": "Nanoseconds (ns)" } ALLOWED_COLUMNS = ["min", "max", "mean", "stddev", "median", "iqr", "ops", "outliers", "rounds", "iterations"] +PERCENTILE_COL_RX = re.compile(r'p(\d+(?:.\d+)?)') class SecondsDecimal(Decimal): @@ -360,11 +361,11 @@ def parse_sort(string): def parse_columns(string): columns = [str.strip(s) for s in string.lower().split(',')] - invalid = set(columns) - set(ALLOWED_COLUMNS) + invalid = set(columns) - set(ALLOWED_COLUMNS) - set(c for c in columns if PERCENTILE_COL_RX.match(c)) if invalid: # there are extra items in columns! msg = "Invalid column name(s): %s. " % ', '.join(invalid) - msg += "The only valid column names are: %s" % ', '.join(ALLOWED_COLUMNS) + msg += "The only valid column names are: %s, pXX.XX" % ', '.join(ALLOWED_COLUMNS) raise argparse.ArgumentTypeError(msg) return columns diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index 57d5a3f..62d1544 100644 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -87,8 +87,9 @@ def test_help(testdir): " @pytest.parametrize. Default: 'group'", " --benchmark-columns=LABELS", " Comma-separated list of columns to show in the result", - " table. Default: 'min, max, mean, stddev, median, iqr,", - " outliers, rounds, iterations'", + " table. Use 'pXX.XX' (e.g. 'p99.9') to show", + " percentiles. Default: 'min, max, mean, stddev, median,", + " iqr, outliers, rounds, iterations'", " --benchmark-histogram=[FILENAME-PREFIX]", " Plot graphs of min/max/avg/stddev over time in", " FILENAME-PREFIX-test_name.svg. If FILENAME-PREFIX", @@ -679,6 +680,18 @@ def test_extra(benchmark): assert bench_info['extra_info'] == {'foo': 'bar'} +def test_save_percentiles(testdir): + test = testdir.makepyfile(SIMPLE_TEST) + result = testdir.runpytest('--doctest-modules', '--benchmark-save=foobar', + '--benchmark-max-time=0.0000001', '--benchmark-columns=min,p99,max', test) + result.stderr.fnmatch_lines([ + "Saved benchmark data in: *", + ]) + info = json.loads(testdir.tmpdir.join('.benchmarks').listdir()[0].join('0001_foobar.json').read()) + bench_info = info['benchmarks'][0] + assert 'p99' in bench_info['stats'] + + def test_histogram(testdir): test = testdir.makepyfile(SIMPLE_TEST) result = testdir.runpytest('--doctest-modules', '--benchmark-histogram=foobar', @@ -1072,3 +1085,14 @@ def test_columns(testdir): "Name (time in ?s) * Max * Iterations * Min *", "------*", ]) + +def test_columns_percentiles(testdir): + test = testdir.makepyfile(SIMPLE_TEST) + result = testdir.runpytest('--doctest-modules', '--benchmark-columns=max,p99,iterations,min', test) + result.stdout.fnmatch_lines([ + "*collected 3 items", + "test_columns_percentiles.py ...", + "* benchmark: 2 tests *", + "Name (time in ?s) * Max * P99 * Iterations * Min *", + "------*", + ]) diff --git a/tests/test_cli.py b/tests/test_cli.py index 35055c0..5252a7d 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -101,8 +101,9 @@ def test_help_compare(testdir, args): " 'param:NAME', where NAME is the name passed to", " @pytest.parametrize. Default: 'group'", " --columns LABELS Comma-separated list of columns to show in the result", - " table. Default: 'min, max, mean, stddev, median, iqr,", - " outliers, rounds, iterations'", + " table. Use 'pXX.XX' (e.g. 'p99.9') to show", + " percentiles. Default: 'min, max, mean, stddev, median,", + " iqr, outliers, rounds, iterations'", " --name FORMAT How to format names in results. Can be one of 'short',", " 'normal', 'long'. Default: 'normal'", " --histogram [FILENAME-PREFIX]", diff --git a/tests/test_elasticsearch_storage.py b/tests/test_elasticsearch_storage.py index 016f39f..9cbdcb8 100644 --- a/tests/test_elasticsearch_storage.py +++ b/tests/test_elasticsearch_storage.py @@ -81,6 +81,8 @@ def __init__(self): 'max_time': 345, } self.compare_fail = [] + self.columns = ['min', 'max', 'mean', 'stddev', 'median', 'iqr', + 'outliers', 'rounds', 'iterations'] self.config = Namespace(hook=Namespace( pytest_benchmark_group_stats=pytest_benchmark_group_stats, pytest_benchmark_generate_machine_info=lambda **kwargs: {'foo': 'bar'}, @@ -90,20 +92,18 @@ def __init__(self): pytest_benchmark_update_json=lambda **kwargs: None, pytest_benchmark_generate_commit_info=lambda **kwargs: {'foo': 'bar'}, pytest_benchmark_update_commit_info=lambda **kwargs: None, - )) + ), getoption=lambda name: {'benchmark_columns': self.columns}[name]) self.elasticsearch_host = "localhost:9200" self.elasticsearch_index = "benchmark" self.elasticsearch_doctype = "benchmark" self.storage = MockStorage() self.group_by = 'group' - self.columns = ['min', 'max', 'mean', 'stddev', 'median', 'iqr', - 'outliers', 'rounds', 'iterations'] self.benchmarks = [] with BENCHFILE.open('rU') as fh: data = json.load(fh) self.benchmarks.extend( Namespace( - as_dict=lambda include_data=False, stats=True, flat=False, _bench=bench: + as_dict=lambda include_data=False, stats=True, flat=False, _bench=bench, columns=None: dict(_bench, **_bench["stats"]) if flat else dict(_bench), name=bench['name'], fullname=bench['fullname'], diff --git a/tests/test_stats.py b/tests/test_stats.py index 5de4560..d55c5d6 100644 --- a/tests/test_stats.py +++ b/tests/test_stats.py @@ -110,3 +110,67 @@ def test_ops(): stats.update(0) assert stats.mean == 0 assert stats.ops == 0 + + +def test_percentile(): + stats = Stats() + + # Taken from http://onlinestatbook.com/2/introduction/percentiles.html + for i in [4, 4, 5, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9, 10, 10, 10]: + stats.update(i) + + assert stats.percentile(0.0) == stats.min + assert stats.percentile(1.0) == stats.max + assert stats.percentile(0.5) == stats.median + assert stats.percentile(0.25) == 5.0 + assert stats.percentile(0.85) == 9.849999999999998 # approx(9.85) + + assert hasattr(stats, '_percentile_cache') + assert 0.85 in stats._percentile_cache + assert stats._percentile_cache[0.85] == 9.849999999999998 # approx(9.85) + + +def test_percentile2(): + stats = Stats() + + # Taken from http://www.itl.nist.gov/div898/handbook/prc/section2/prc262.htm + for i in [95.1772, 95.1567, 95.1937, 95.1959, 95.1442, 95.0610, 95.1591, 95.1195, 95.1065, 95.0925, 95.1990, 95.1682]: + stats.update(i) + + assert stats.p0 == stats.min + assert stats.p100 == stats.max + assert stats.p50 == stats.median + assert stats.p90 == 95.19807 # approx(95.1981) + + +def test_extra_fields(): + # Test that percentiles are included in .as_dict() results + expected = { + 'p85': 9.849999999999998, + 'p25': 5.0, + "min": 4, + "max": 10, + "mean": 7, + "stddev": 2.0261449005179113, + "rounds": 20, + "median": 7.0, + "iqr": 4.0, + "q1": 5.0, + "q3": 9.0, + "iqr_outliers": 0, + "stddev_outliers": 5, + "outliers": '5;0', + "ld15iqr": 4, + "hd15iqr": 10, + "ops": 0.14285714285714285, + "total": 140, + } + + stats = Stats() + + for i in [4, 4, 5, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9, 10, 10, 10]: + stats.update(i) + + result = stats.as_dict(extra_fields=('p25', 'p85')) + + assert result == expected diff --git a/tests/test_storage.py b/tests/test_storage.py index f066ee1..db21861 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -77,6 +77,8 @@ def __init__(self, name_format): } self.cprofile_sort_by = 'cumtime' self.compare_fail = [] + self.columns = ['min', 'max', 'mean', 'stddev', 'median', 'iqr', + 'outliers', 'rounds', 'iterations', 'ops'] self.config = Namespace(hook=Namespace( pytest_benchmark_group_stats=pytest_benchmark_group_stats, pytest_benchmark_generate_machine_info=lambda **kwargs: {'foo': 'bar'}, @@ -86,15 +88,13 @@ def __init__(self, name_format): pytest_benchmark_update_json=lambda **kwargs: None, pytest_benchmark_generate_commit_info=lambda **kwargs: {'foo': 'bar'}, pytest_benchmark_update_commit_info=lambda **kwargs: None, - )) + ), getoption=lambda name: {'benchmark_columns': self.columns}[name]) self.storage = FileStorage(str(STORAGE), default_machine_id=get_machine_id(), logger=self.logger) self.group_by = 'group' - self.columns = ['min', 'max', 'mean', 'stddev', 'median', 'iqr', - 'outliers', 'rounds', 'iterations', 'ops'] for bench_file, data in reversed(list(self.storage.load("[0-9][0-9][0-9][0-9]_*"))): self.benchmarks.extend( Namespace( - as_dict=lambda include_data=False, stats=True, flat=False, _bench=bench, cprofile='cumtime': + as_dict=lambda include_data=False, stats=True, flat=False, _bench=bench, cprofile='cumtime', columns=None: dict(_bench, **_bench["stats"]) if flat else dict(_bench), name=bench['name'], fullname=bench['fullname'], diff --git a/tests/test_utils.py b/tests/test_utils.py index f4ad5e1..327e9c6 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -138,6 +138,8 @@ def test_parse_warmup(): def test_parse_columns(): assert parse_columns('min,max') == ['min', 'max'] assert parse_columns('MIN, max ') == ['min', 'max'] + assert parse_columns('min,max,p99') == ['min', 'max', 'p99'] + assert parse_columns('p0,p50,p99,p99.9,p100') == ['p0', 'p50', 'p99', 'p99.9', 'p100'] with pytest.raises(argparse.ArgumentTypeError): parse_columns('min,max,x')