|
| 1 | +"""Helper functions and classes for benchmarking. |
| 2 | +
|
| 3 | +Functions to be benchmarked in a module should be decorated with `benchmark` which |
| 4 | +takes one positional argument corresponding to a function to peform any necessary |
| 5 | +set up for the benchmarked function (returning a dictionary, potentially empty with |
| 6 | +any precomputed values to pass to benchmark function as keyword arguments) and zero |
| 7 | +or more keyword arguments specifying parameter names and values lists to benchmark |
| 8 | +over (the Cartesian product of all specified parameter values is used). The benchmark |
| 9 | +function is passed the union of any precomputed values outputted by the setup function |
| 10 | +and the parameters values as keyword arguments. |
| 11 | +
|
| 12 | +As a simple example, the following defines a benchmarkfor computing the mean of a list |
| 13 | +of numbers. |
| 14 | +
|
| 15 | +```Python |
| 16 | +import random |
| 17 | +from benchmarking import benchmark |
| 18 | +
|
| 19 | +def setup_mean(n): |
| 20 | + return {"x": [random.random() for _ in range(n)]} |
| 21 | +
|
| 22 | +@benchmark(setup_computation, n=[1, 2, 3, 4]) |
| 23 | +def mean(x, n): |
| 24 | + return sum(x) / n |
| 25 | +``` |
| 26 | +
|
| 27 | +The `skip` function can be used to skip the benchmark for certain parameter values. |
| 28 | +For example |
| 29 | +
|
| 30 | +```Python |
| 31 | +import random |
| 32 | +from benchmarking import benchmark, skip |
| 33 | +
|
| 34 | +def setup_mean(n): |
| 35 | + return {"x": [random.random() for _ in range(n)]} |
| 36 | +
|
| 37 | +@benchmark(setup_computation, n=[0, 1, 2, 3, 4]) |
| 38 | +def mean(x, n): |
| 39 | + if n == 0: |
| 40 | + skip("number of items must be positive") |
| 41 | + return sum(x) / n |
| 42 | +``` |
| 43 | +
|
| 44 | +This makes most sense when excluding certain combinations of parameters. |
| 45 | +
|
| 46 | +The `parse_args_collect_and_run_benchmarks` function should be called within a |
| 47 | +`if __name__ == '__main__'` block at the end of the module defining the benchmarks |
| 48 | +to allow it to be executed as a script for runnning the benchmarks: |
| 49 | +
|
| 50 | +```Python |
| 51 | +
|
| 52 | +from benchmarking import benchmark, parse_args_collect_and_run_benchmarks |
| 53 | +
|
| 54 | +... |
| 55 | +
|
| 56 | +if __name__ == "__main__": |
| 57 | + parse_args_collect_and_run_benchmarks() |
| 58 | +
|
| 59 | +``` |
| 60 | +""" |
| 61 | + |
| 62 | +import argparse |
| 63 | +from ast import literal_eval |
| 64 | +from functools import partial |
| 65 | +from itertools import product |
| 66 | +from pathlib import Path |
| 67 | +import json |
| 68 | +import timeit |
| 69 | +import inspect |
| 70 | + |
| 71 | +try: |
| 72 | + import memory_profiler |
| 73 | + |
| 74 | + MEMORY_PROFILER_AVAILABLE = True |
| 75 | +except ImportError: |
| 76 | + MEMORY_PROFILER_AVAILABLE = False |
| 77 | + |
| 78 | + |
| 79 | +class SkipBenchmarkException(Exception): |
| 80 | + """Exception to be raised to skip benchmark for some parameter set.""" |
| 81 | + |
| 82 | + |
| 83 | +def skip(message): |
| 84 | + """Skip benchmark for a particular parameter set with explanatory message. |
| 85 | +
|
| 86 | + Args: |
| 87 | + message (str): Message explaining why benchmark parameter set was skipped. |
| 88 | + """ |
| 89 | + raise SkipBenchmarkException(message) |
| 90 | + |
| 91 | + |
| 92 | +def benchmark(setup_=None, **parameters): |
| 93 | + """Decorator for defining a function to be benchmarker |
| 94 | +
|
| 95 | + Args: |
| 96 | + setup_: Function performing any necessary set up for benchmark, and the resource |
| 97 | + usage of which will not be tracked in benchmarking. The function should |
| 98 | + return a dictionary of values to pass to the benchmark as keyword arguments. |
| 99 | +
|
| 100 | + Kwargs: |
| 101 | + Parameter names and associated lists of values over which to run benchmark. |
| 102 | + The benchmark is run for the Cartesian product of all parameter values. |
| 103 | +
|
| 104 | + Returns: |
| 105 | + Decorator which marks function as benchmark and sets setup function and |
| 106 | + parameters attributes. |
| 107 | + """ |
| 108 | + |
| 109 | + def decorator(function): |
| 110 | + function.is_benchmark = True |
| 111 | + function.setup = setup_ if setup_ is not None else lambda: {} |
| 112 | + function.parameters = parameters |
| 113 | + return function |
| 114 | + |
| 115 | + return decorator |
| 116 | + |
| 117 | + |
| 118 | +def _parameters_string(parameters): |
| 119 | + """Format parameter values as string for printing benchmark results.""" |
| 120 | + return "(" + ", ".join(f"{name}: {val}" for name, val in parameters.items()) + ")" |
| 121 | + |
| 122 | + |
| 123 | +def _dict_product(dicts): |
| 124 | + """Generator corresponding to Cartesian product of dictionaries.""" |
| 125 | + return (dict(zip(dicts.keys(), values)) for values in product(*dicts.values())) |
| 126 | + |
| 127 | + |
| 128 | +def _parse_value(value): |
| 129 | + """Parse a value passed at command line as a Python literal or string as fallback""" |
| 130 | + try: |
| 131 | + return literal_eval(value) |
| 132 | + except ValueError: |
| 133 | + return str(value) |
| 134 | + |
| 135 | + |
| 136 | +def _parse_parameter_overrides(parameter_overrides): |
| 137 | + """Parse any parameter override values passed as command line arguments""" |
| 138 | + return ( |
| 139 | + { |
| 140 | + parameter: [_parse_value(v) for v in values] |
| 141 | + for parameter, *values in parameter_overrides |
| 142 | + } |
| 143 | + if parameter_overrides is not None |
| 144 | + else {} |
| 145 | + ) |
| 146 | + |
| 147 | + |
| 148 | +def _parse_cli_arguments(): |
| 149 | + """Parse command line arguments passed for controlling benchmark runs""" |
| 150 | + parser = argparse.ArgumentParser("Run benchmarks") |
| 151 | + parser.add_argument( |
| 152 | + "-number-runs", |
| 153 | + type=int, |
| 154 | + default=10, |
| 155 | + help="Number of times to run the benchmark in succession in each timing run.", |
| 156 | + ) |
| 157 | + parser.add_argument( |
| 158 | + "-repeats", |
| 159 | + type=int, |
| 160 | + default=3, |
| 161 | + help="Number of times to repeat the benchmark runs.", |
| 162 | + ) |
| 163 | + parser.add_argument( |
| 164 | + "-parameter-overrides", |
| 165 | + type=str, |
| 166 | + action="append", |
| 167 | + nargs="*", |
| 168 | + help=( |
| 169 | + "Override for values to use for benchmark parameter. A parameter name " |
| 170 | + "followed by space separated list of values to use. May be specified " |
| 171 | + "multiple times to override multiple parameters. " |
| 172 | + ), |
| 173 | + ) |
| 174 | + parser.add_argument( |
| 175 | + "-output-file", type=Path, help="File path to write JSON formatted results to." |
| 176 | + ) |
| 177 | + return parser.parse_args() |
| 178 | + |
| 179 | + |
| 180 | +def _is_benchmark(object): |
| 181 | + """Predicate for testing whether an object is a benchmark function or not.""" |
| 182 | + return ( |
| 183 | + inspect.isfunction(object) |
| 184 | + and hasattr(object, "is_benchmark") |
| 185 | + and object.is_benchmark |
| 186 | + ) |
| 187 | + |
| 188 | + |
| 189 | +def collect_benchmarks(module): |
| 190 | + """Collect all benchmark functions from a module. |
| 191 | +
|
| 192 | + Args: |
| 193 | + module: Python module containing benchmark functions. |
| 194 | +
|
| 195 | + Returns: |
| 196 | + List of functions in module with `is_benchmark` attribute set to `True`. |
| 197 | + """ |
| 198 | + return [function for name, function in inspect.getmembers(module, _is_benchmark)] |
| 199 | + |
| 200 | + |
| 201 | +def run_benchmarks( |
| 202 | + benchmarks, |
| 203 | + number_runs, |
| 204 | + number_repeats, |
| 205 | + print_results=True, |
| 206 | + parameter_overrides=None, |
| 207 | +): |
| 208 | + """Run a set of benchmarks. |
| 209 | +
|
| 210 | + Args: |
| 211 | + benchmarks: Benchmark functions to run with `setup` and `parameter` attributes |
| 212 | + specifying setup function and parameters set. |
| 213 | + number_of_runs: Number of times to run the benchmark in succession in each |
| 214 | + timing run. Larger values will reduce noise but be slower to run. |
| 215 | + number_repeats: Number of repeats of timing runs of benchmark. Larger values |
| 216 | + will give more recorded values to characterise spread but be slower to run. |
| 217 | + print_results: Whether to print benchmark results to stdout. |
| 218 | + parameter_overrides: Dictionary specifying any overrides for parameter values |
| 219 | + set in `benchmark` decorator. |
| 220 | +
|
| 221 | + Returns: |
| 222 | + Dictionary containing timing (and potentially memory usage) results for each |
| 223 | + parameters set of each benchmark function. |
| 224 | + """ |
| 225 | + results = {} |
| 226 | + for benchmark in benchmarks: |
| 227 | + results[benchmark.__name__] = {} |
| 228 | + if print_results: |
| 229 | + print(benchmark.__name__) |
| 230 | + parameters = benchmark.parameters.copy() |
| 231 | + if parameter_overrides is not None: |
| 232 | + parameters.update(parameter_overrides) |
| 233 | + for parameter_set in _dict_product(parameters): |
| 234 | + try: |
| 235 | + precomputes = benchmark.setup(**parameter_set) |
| 236 | + benchmark_function = partial(benchmark, **precomputes, **parameter_set) |
| 237 | + run_times = [ |
| 238 | + time / number_runs |
| 239 | + for time in timeit.repeat( |
| 240 | + benchmark_function, number=number_runs, repeat=number_repeats |
| 241 | + ) |
| 242 | + ] |
| 243 | + results[benchmark.__name__] = {**parameter_set, "times / s": run_times} |
| 244 | + if MEMORY_PROFILER_AVAILABLE: |
| 245 | + baseline_memory = memory_profiler.memory_usage(max_usage=True) |
| 246 | + peak_memory = ( |
| 247 | + memory_profiler.memory_usage( |
| 248 | + benchmark_function, |
| 249 | + interval=max(run_times) * number_repeats, |
| 250 | + max_usage=True, |
| 251 | + max_iterations=number_repeats, |
| 252 | + include_children=True, |
| 253 | + ) |
| 254 | + - baseline_memory |
| 255 | + ) |
| 256 | + results[benchmark.__name__]["peak_memory / MiB"] = peak_memory |
| 257 | + if print_results: |
| 258 | + print( |
| 259 | + ( |
| 260 | + f"{_parameters_string(parameter_set):>40}: \n " |
| 261 | + if len(parameter_set) != 0 |
| 262 | + else " " |
| 263 | + ) |
| 264 | + + f"min(time): {min(run_times):>#7.2g}s, " |
| 265 | + + f"max(time): {max(run_times):>#7.2g}s, " |
| 266 | + + ( |
| 267 | + f"peak mem.: {peak_memory:>#7.2g}MiB" |
| 268 | + if MEMORY_PROFILER_AVAILABLE |
| 269 | + else "" |
| 270 | + ) |
| 271 | + ) |
| 272 | + except SkipBenchmarkException as e: |
| 273 | + if print_results: |
| 274 | + print( |
| 275 | + f"{_parameters_string(parameter_set):>40}: skipped - {str(e)}" |
| 276 | + ) |
| 277 | + return results |
| 278 | + |
| 279 | + |
| 280 | +def parse_args_collect_and_run_benchmarks(module=None): |
| 281 | + """Collect and run all benchmarks in a module and parse command line arguments. |
| 282 | +
|
| 283 | + Args: |
| 284 | + module: Module containing benchmarks to run. Defaults to module from which |
| 285 | + this function was called if not specified (set to `None`). |
| 286 | +
|
| 287 | + Returns: |
| 288 | + Dictionary containing timing (and potentially memory usage) results for each |
| 289 | + parameters set of each benchmark function. |
| 290 | + """ |
| 291 | + args = _parse_cli_arguments() |
| 292 | + parameter_overrides = _parse_parameter_overrides(args.parameter_overrides) |
| 293 | + if module is None: |
| 294 | + frame = inspect.stack()[1] |
| 295 | + module = inspect.getmodule(frame[0]) |
| 296 | + results = run_benchmarks( |
| 297 | + benchmarks=collect_benchmarks(module), |
| 298 | + number_runs=args.number_runs, |
| 299 | + number_repeats=args.repeats, |
| 300 | + parameter_overrides=parameter_overrides, |
| 301 | + ) |
| 302 | + if args.output_file is not None: |
| 303 | + with open(args.output_file, "w") as f: |
| 304 | + json.dump(results, f) |
| 305 | + return results |
0 commit comments