diff --git a/test_perf.sh b/test_perf.sh index 5880769dae177..c71f37b233b7a 100755 --- a/test_perf.sh +++ b/test_perf.sh @@ -3,10 +3,10 @@ CURDIR=$(pwd) BASEDIR=$(readlink -f $(dirname $0 )) -echo "Use vbench to compare HEAD against a known-good baseline." +echo "Use vbench to compare the performance of one commit against another." echo "Make sure the python 'vbench' library is installed..\n" cd "$BASEDIR/vb_suite/" -python test_perf.py +python test_perf.py $@ cd "$CURDIR" diff --git a/vb_suite/test_perf.py b/vb_suite/test_perf.py index a6534e2d88aaa..0d11f403b7651 100755 --- a/vb_suite/test_perf.py +++ b/vb_suite/test_perf.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # -*- coding: utf-8 -*- """ @@ -27,21 +27,38 @@ everything and calculate a ration for the timing information. 7) print the results to the log file and to stdout. -Known Issues: vbench fails to locate a baseline if HEAD is not a descendent """ -import sys -import shutil -from pandas import * -from vbench.api import BenchmarkRunner -from vbench.db import BenchmarkDB -from vbench.git import GitRepo +import shutil +import os +import argparse import tempfile -from suite import * - -BASELINE_COMMIT = 'bdbca8e' # v0.9,1 + regression fix -LOG_FILE = os.path.abspath(os.path.join(REPO_PATH, 'vb_suite.log')) +from pandas import DataFrame + +DEFAULT_MIN_DURATION = 0.01 +BASELINE_COMMIT = 'bdbca8e3dc' # 9,1 + regression fix # TODO: detect upstream/master + +parser = argparse.ArgumentParser(description='Use vbench to generate a report comparing performance between two commits.') +parser.add_argument('-a', '--auto', + help='Execute a run using the defaults for the base and target commits.', + action='store_true', + default=False) +parser.add_argument('-b','--base-commit', + help='The commit serving as performance baseline (default: %s).' % BASELINE_COMMIT, + type=str) +parser.add_argument('-t','--target-commit', + help='The commit to compare against the baseline (default: HEAD).', + type=str) +parser.add_argument('-m', '--min-duration', + help='Minimum duration (in ms) of baseline test for inclusion in report (default: %.3f).' % DEFAULT_MIN_DURATION, + type=float, + default=0.01) +parser.add_argument('-o', '--output', + metavar="", + dest='log_file', + help='path of file in which to save the report (default: vb_suite.log).') +args = parser.parse_args() def get_results_df(db,rev): """Takes a git commit hash and returns a Dataframe of benchmark results @@ -59,26 +76,35 @@ def prprint(s): print("*** %s"%s) def main(): + from vbench.api import BenchmarkRunner + from vbench.db import BenchmarkDB + from suite import REPO_PATH, BUILD, DB_PATH, PREPARE, dependencies, benchmarks + + if not args.base_commit: + args.base_commit = BASELINE_COMMIT + + # GitRepo wants exactly 7 character hash? + args.base_commit = args.base_commit[:7] + if args.target_commit: + args.target_commit = args.target_commit[:7] + + if not args.log_file: + args.log_file = os.path.abspath(os.path.join(REPO_PATH, 'vb_suite.log')) + TMP_DIR = tempfile.mkdtemp() prprint("TMP_DIR = %s" % TMP_DIR) - prprint("LOG_FILE = %s\n" % LOG_FILE) + prprint("LOG_FILE = %s\n" % args.log_file) try: - logfile = open(LOG_FILE, 'w') + logfile = open(args.log_file, 'w') prprint( "Processing Repo at '%s'..." % REPO_PATH) - repo = GitRepo(REPO_PATH) # get hashes of baseline and current head - h_head = repo.shas[-1] - h_baseline = BASELINE_COMMIT prprint( "Opening DB at '%s'...\n" % DB_PATH) db = BenchmarkDB(DB_PATH) - prprint( 'Comparing Head [%s] : %s ' % (h_head, repo.messages.get(h_head,""))) - prprint( 'Against baseline [%s] : %s \n' % (h_baseline, - repo.messages.get(h_baseline,""))) prprint("Initializing Runner...") runner = BenchmarkRunner(benchmarks, REPO_PATH, REPO_PATH, BUILD, DB_PATH, @@ -86,6 +112,21 @@ def main(): # run_option='eod', start_date=START_DATE, module_dependencies=dependencies) + repo = runner.repo #(steal the parsed git repo used by runner) + + # ARGH. reparse the repo, not discarding any commits, + # and overwrite the previous parse results + #prprint ("Slaughtering kittens..." ) + (repo.shas, repo.messages, + repo.timestamps, repo.authors) = _parse_commit_log(REPO_PATH) + + h_head = args.target_commit or repo.shas[-1] + h_baseline = args.base_commit + + prprint('Target [%s] : %s\n' % (h_head, repo.messages.get(h_head,""))) + prprint('Baseline [%s] : %s\n' % (h_baseline,repo.messages.get(h_baseline,""))) + + prprint ("removing any previous measurements for the commits." ) db.delete_rev_results(h_baseline) db.delete_rev_results(h_head) @@ -93,10 +134,10 @@ def main(): # TODO: we could skip this, but we need to make sure all # results are in the DB, which is a little tricky with # start dates and so on. - prprint( "Running benchmarks for baseline commit '%s'" % h_baseline) + prprint( "Running benchmarks for baseline [%s]" % h_baseline) runner._run_and_write_results(h_baseline) - prprint ("Running benchmarks for current HEAD '%s'" % h_head) + prprint ("Running benchmarks for target [%s]" % h_head) runner._run_and_write_results(h_head) prprint( 'Processing results...') @@ -108,26 +149,71 @@ def main(): t_baseline=baseline_res['timing'], ratio=ratio, name=baseline_res.name),columns=["t_head","t_baseline","ratio","name"]) - totals = totals.ix[totals.t_head > 0.010] # ignore sub 10micros + totals = totals.ix[totals.t_head > args.min_duration] # ignore below threshold totals = totals.dropna().sort("ratio").set_index('name') # sort in ascending order s = "\n\nResults:\n" + totals.to_string(float_format=lambda x: "%0.4f" %x) + "\n\n" - s += "Columns: test_name | head_time [ms] | baseline_time [ms] | ratio\n\n" - s += "- a Ratio of 1.30 means HEAD is 30% slower then the Baseline.\n\n" + s += "Columns: test_name | target_duration [ms] | baseline_duration [ms] | ratio\n\n" + s += "- a Ratio of 1.30 means the target commit is 30% slower then the baseline.\n\n" - s += 'Head [%s] : %s\n' % (h_head, repo.messages.get(h_head,"")) + s += 'Target [%s] : %s\n' % (h_head, repo.messages.get(h_head,"")) s += 'Baseline [%s] : %s\n\n' % (h_baseline,repo.messages.get(h_baseline,"")) logfile.write(s) logfile.close() prprint(s ) - prprint("Results were also written to the logfile at '%s'\n" % LOG_FILE) + prprint("Results were also written to the logfile at '%s'\n" % args.log_file) finally: # print("Disposing of TMP_DIR: %s" % TMP_DIR) shutil.rmtree(TMP_DIR) logfile.close() + +# hack , vbench.git ignores some commits, but we +# need to be able to reference any commit. +# modified from vbench.git +def _parse_commit_log(repo_path): + from vbench.git import parser, _convert_timezones + from pandas import Series + git_cmd = 'git --git-dir=%s/.git --work-tree=%s ' % (repo_path, repo_path) + githist = git_cmd + ('log --graph --pretty=format:' + '\"::%h::%cd::%s::%an\" > githist.txt') + os.system(githist) + githist = open('githist.txt').read() + os.remove('githist.txt') + + shas = [] + timestamps = [] + messages = [] + authors = [] + for line in githist.split('\n'): + if '*' not in line.split("::")[0]: # skip non-commit lines + continue + + _, sha, stamp, message, author = line.split('::', 4) + + # parse timestamp into datetime object + stamp = parser.parse(stamp) + + shas.append(sha) + timestamps.append(stamp) + messages.append(message) + authors.append(author) + + # to UTC for now + timestamps = _convert_timezones(timestamps) + + shas = Series(shas, timestamps) + messages = Series(messages, shas) + timestamps = Series(timestamps, shas) + authors = Series(authors, shas) + return shas[::-1], messages[::-1], timestamps[::-1], authors[::-1] + + if __name__ == '__main__': - main() + if not args.auto and not args.base_commit and not args.target_commit: + parser.print_help() + else: + main()