Skip to content

Commit d854f0f

Browse files
committed
[benchmark] test_performance with BenchmarkDriver
Refactored `test_perfomance` function to use existing BenchmarkDriver and TestComparator. This replaces hand-rolled parser and comparison logic with library functions which already have full unit test coverage.
1 parent cd4886a commit d854f0f

File tree

1 file changed

+54
-82
lines changed

1 file changed

+54
-82
lines changed

benchmark/scripts/run_smoke_bench

Lines changed: 54 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,10 @@ from __future__ import print_function
2626
import argparse
2727
import glob
2828
import os
29-
import re
3029
import subprocess
3130
import sys
3231

33-
from compare_perf_tests import LogParser, create_report
32+
from compare_perf_tests import LogParser, TestComparator, create_report
3433

3534
from imp import load_source
3635
# import Benchmark_Driver # doesn't work because it misses '.py' extension
@@ -149,89 +148,61 @@ def test_opt_levels(args):
149148
return 0
150149

151150

152-
def test_performance(opt_level, old_dir, new_dir, threshold, num_samples,
153-
output_file):
154-
num_results_dont_differ = 0
155-
iter = 1
156-
to_test = None
157-
prev_num_tests = None
151+
def measure(driver, tests, i):
152+
"""Log and measure samples of the tests with the given driver.
158153
159-
old_lines = ""
160-
new_lines = ""
154+
Collect increasing number of samples, depending on the iteration.
155+
"""
156+
num_samples = min(i + 3, 10)
157+
msg = ' Iteration {0} for {1}: num samples = {2}, '.format(
158+
i, driver.args.tests, num_samples)
159+
msg += ('running all tests' if driver.all_tests == tests else
160+
're-testing {0} tests'.format(len(tests)))
161+
log(msg)
162+
driver.tests = tests
163+
return driver.run(num_samples=num_samples, sample_time=0.0025)
161164

162-
# #,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),PEAK_MEMORY(B)
163-
score_re = re.compile(r"(\d+),([\w.\-]+),\d+,(\d+)")
164-
165-
while to_test is None or len(to_test) > 0:
166-
tested_benchmarks = set()
167-
168-
# (benchmark_name, benchmark_directory) -> (min_value, result_line)
169-
values = {}
170-
171-
# Run the benchmarks and store the results in 'values'.
172-
for bench_dir in (old_dir, new_dir):
173-
log(' Iteration ' + str(iter) + ' for ' + bench_dir +
174-
': num samples = ' + str(num_samples) +
175-
(', running all tests' if to_test is None
176-
else ', re-testing ' + str(len(to_test)) + ' tests'))
177-
178-
result = get_results(bench_dir, opt_level, num_samples, to_test)
179-
for line in result.splitlines():
180-
m = score_re.match(line)
181-
if m:
182-
testname = m.group(2)
183-
val = int(m.group(3))
184-
values[(testname, bench_dir)] = (val, line)
185-
tested_benchmarks.add(testname)
186-
187-
# Some local utility functions
188-
189-
def bench_in(bench, bench_dir):
190-
return (bench, bench_dir) in values
191-
192-
def within_threshold(bench):
193-
old_val = values[(bench, old_dir)][0]
194-
new_val = values[(bench, new_dir)][0]
195-
if not new_val:
196-
return True
197-
f = float(old_val) / float(new_val)
198-
return f >= 1.0 - threshold and f <= 1.0 + threshold
199-
200-
def result_line(bench, bench_dir):
201-
result_line = values[(bench, bench_dir)][1]
202-
return result_line + '\n'
203-
204-
# Check which benchmarks are added/removed and which need to be re-run
205-
to_test = []
206-
for bench in sorted(tested_benchmarks):
207-
if bench_in(bench, old_dir) and not bench_in(bench, new_dir):
208-
old_lines += result_line(bench, old_dir)
209-
elif bench_in(bench, new_dir) and not bench_in(bench, old_dir):
210-
new_lines += result_line(bench, new_dir)
211-
elif within_threshold(bench) or num_results_dont_differ >= 4:
212-
old_lines += result_line(bench, old_dir)
213-
new_lines += result_line(bench, new_dir)
214-
else:
215-
to_test.append(bench)
216-
if VERBOSE:
217-
log(' test again ' + bench)
218-
219-
# Track how many times we could not reduce the number of benchmarks
220-
if prev_num_tests == len(to_test):
221-
num_results_dont_differ += 1
222-
else:
223-
num_results_dont_differ = 0
224-
prev_num_tests = len(to_test)
225165

226-
# Increase the number of samples for benchmarks which re-run
227-
if num_samples < 10:
228-
num_samples += 1
166+
def merge(results, other_results):
167+
""""Merge the other PreformanceTestResults into the first dictionary."""
168+
for test, result in other_results.items():
169+
results[test].merge(result)
170+
return results
229171

230-
iter += 1
172+
173+
def test_performance(opt_level, old_dir, new_dir, threshold, num_samples,
174+
output_file):
175+
"""Detect performance changes in benchmarks.
176+
177+
Start fast with few samples per benchmark and gradually spend more time
178+
gathering more precise measurements of the change candidates.
179+
"""
180+
181+
i, unchanged_length_count = 0, 0
182+
old, new = [BenchmarkDriver(DriverArgs(dir, optimization=opt_level))
183+
for dir in [old_dir, new_dir]]
184+
results = [measure(driver, driver.tests, i) for driver in [old, new]]
185+
tests = TestComparator(results[0], results[1], threshold)
186+
changed = tests.decreased + tests.increased
187+
188+
while len(changed) > 0 and unchanged_length_count < 5:
189+
i += 1
190+
if VERBOSE:
191+
log(' test again: ' + str([test.name for test in changed]))
192+
results = [merge(the_results,
193+
measure(driver, [test.name for test in changed], i))
194+
for the_results, driver in zip(results, [old, new])]
195+
tests = TestComparator(results[0], results[1], threshold)
196+
changed = tests.decreased + tests.increased
197+
198+
if len(old.tests) == len(changed):
199+
unchanged_length_count += 1
200+
else:
201+
unchanged_length_count = 0
231202

232203
log('')
233-
return report_results("Performance: -" + opt_level,
234-
old_lines, new_lines, threshold * 1.4, output_file)
204+
return report_results("Performance: -" + opt_level, None, None,
205+
threshold * 1.4, output_file, *results)
235206

236207

237208
def get_results(bench_dir, opt_level, num_samples, to_test):
@@ -294,9 +265,10 @@ def get_codesize(filename):
294265
return int(data_line.split('\t')[0])
295266

296267

297-
def report_results(title, old_lines, new_lines, threshold, output_file):
298-
old_results = LogParser.results_from_string(old_lines)
299-
new_results = LogParser.results_from_string(new_lines)
268+
def report_results(title, old_lines, new_lines, threshold, output_file,
269+
old_results=None, new_results=None):
270+
old_results = old_results or LogParser.results_from_string(old_lines)
271+
new_results = new_results or LogParser.results_from_string(new_lines)
300272

301273
print("------- " + title + " -------")
302274
print(create_report(old_results, new_results, threshold, 'git'))

0 commit comments

Comments
 (0)