Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions test_perf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
CURDIR=$(pwd)
BASEDIR=$(readlink -f $(dirname $0 ))

echo "Use vbench to compare HEAD against a known-good baseline."
echo "Use vbench to compare the performance of one commit against another."
echo "Make sure the python 'vbench' library is installed..\n"

cd "$BASEDIR/vb_suite/"
python test_perf.py
python test_perf.py $@

cd "$CURDIR"
142 changes: 114 additions & 28 deletions vb_suite/test_perf.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/python
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
Expand Down Expand Up @@ -27,21 +27,38 @@
everything and calculate a ration for the timing information.
7) print the results to the log file and to stdout.

Known Issues: vbench fails to locate a baseline if HEAD is not a descendent
"""
import sys
import shutil

from pandas import *
from vbench.api import BenchmarkRunner
from vbench.db import BenchmarkDB
from vbench.git import GitRepo
import shutil
import os
import argparse
import tempfile

from suite import *

BASELINE_COMMIT = 'bdbca8e' # v0.9,1 + regression fix
LOG_FILE = os.path.abspath(os.path.join(REPO_PATH, 'vb_suite.log'))
from pandas import DataFrame

DEFAULT_MIN_DURATION = 0.01
BASELINE_COMMIT = 'bdbca8e3dc' # 9,1 + regression fix # TODO: detect upstream/master

parser = argparse.ArgumentParser(description='Use vbench to generate a report comparing performance between two commits.')
parser.add_argument('-a', '--auto',
help='Execute a run using the defaults for the base and target commits.',
action='store_true',
default=False)
parser.add_argument('-b','--base-commit',
help='The commit serving as performance baseline (default: %s).' % BASELINE_COMMIT,
type=str)
parser.add_argument('-t','--target-commit',
help='The commit to compare against the baseline (default: HEAD).',
type=str)
parser.add_argument('-m', '--min-duration',
help='Minimum duration (in ms) of baseline test for inclusion in report (default: %.3f).' % DEFAULT_MIN_DURATION,
type=float,
default=0.01)
parser.add_argument('-o', '--output',
metavar="<file>",
dest='log_file',
help='path of file in which to save the report (default: vb_suite.log).')
args = parser.parse_args()

def get_results_df(db,rev):
"""Takes a git commit hash and returns a Dataframe of benchmark results
Expand All @@ -59,44 +76,68 @@ def prprint(s):
print("*** %s"%s)

def main():
from vbench.api import BenchmarkRunner
from vbench.db import BenchmarkDB
from suite import REPO_PATH, BUILD, DB_PATH, PREPARE, dependencies, benchmarks

if not args.base_commit:
args.base_commit = BASELINE_COMMIT

# GitRepo wants exactly 7 character hash?
args.base_commit = args.base_commit[:7]
if args.target_commit:
args.target_commit = args.target_commit[:7]

if not args.log_file:
args.log_file = os.path.abspath(os.path.join(REPO_PATH, 'vb_suite.log'))

TMP_DIR = tempfile.mkdtemp()
prprint("TMP_DIR = %s" % TMP_DIR)
prprint("LOG_FILE = %s\n" % LOG_FILE)
prprint("LOG_FILE = %s\n" % args.log_file)

try:
logfile = open(LOG_FILE, 'w')
logfile = open(args.log_file, 'w')

prprint( "Processing Repo at '%s'..." % REPO_PATH)
repo = GitRepo(REPO_PATH)

# get hashes of baseline and current head
h_head = repo.shas[-1]
h_baseline = BASELINE_COMMIT

prprint( "Opening DB at '%s'...\n" % DB_PATH)
db = BenchmarkDB(DB_PATH)

prprint( 'Comparing Head [%s] : %s ' % (h_head, repo.messages.get(h_head,"")))
prprint( 'Against baseline [%s] : %s \n' % (h_baseline,
repo.messages.get(h_baseline,"")))

prprint("Initializing Runner...")
runner = BenchmarkRunner(benchmarks, REPO_PATH, REPO_PATH, BUILD, DB_PATH,
TMP_DIR, PREPARE, always_clean=True,
# run_option='eod', start_date=START_DATE,
module_dependencies=dependencies)

repo = runner.repo #(steal the parsed git repo used by runner)

# ARGH. reparse the repo, not discarding any commits,
# and overwrite the previous parse results
#prprint ("Slaughtering kittens..." )
(repo.shas, repo.messages,
repo.timestamps, repo.authors) = _parse_commit_log(REPO_PATH)

h_head = args.target_commit or repo.shas[-1]
h_baseline = args.base_commit

prprint('Target [%s] : %s\n' % (h_head, repo.messages.get(h_head,"")))
prprint('Baseline [%s] : %s\n' % (h_baseline,repo.messages.get(h_baseline,"")))


prprint ("removing any previous measurements for the commits." )
db.delete_rev_results(h_baseline)
db.delete_rev_results(h_head)

# TODO: we could skip this, but we need to make sure all
# results are in the DB, which is a little tricky with
# start dates and so on.
prprint( "Running benchmarks for baseline commit '%s'" % h_baseline)
prprint( "Running benchmarks for baseline [%s]" % h_baseline)
runner._run_and_write_results(h_baseline)

prprint ("Running benchmarks for current HEAD '%s'" % h_head)
prprint ("Running benchmarks for target [%s]" % h_head)
runner._run_and_write_results(h_head)

prprint( 'Processing results...')
Expand All @@ -108,26 +149,71 @@ def main():
t_baseline=baseline_res['timing'],
ratio=ratio,
name=baseline_res.name),columns=["t_head","t_baseline","ratio","name"])
totals = totals.ix[totals.t_head > 0.010] # ignore sub 10micros
totals = totals.ix[totals.t_head > args.min_duration] # ignore below threshold
totals = totals.dropna().sort("ratio").set_index('name') # sort in ascending order

s = "\n\nResults:\n" + totals.to_string(float_format=lambda x: "%0.4f" %x) + "\n\n"
s += "Columns: test_name | head_time [ms] | baseline_time [ms] | ratio\n\n"
s += "- a Ratio of 1.30 means HEAD is 30% slower then the Baseline.\n\n"
s += "Columns: test_name | target_duration [ms] | baseline_duration [ms] | ratio\n\n"
s += "- a Ratio of 1.30 means the target commit is 30% slower then the baseline.\n\n"

s += 'Head [%s] : %s\n' % (h_head, repo.messages.get(h_head,""))
s += 'Target [%s] : %s\n' % (h_head, repo.messages.get(h_head,""))
s += 'Baseline [%s] : %s\n\n' % (h_baseline,repo.messages.get(h_baseline,""))

logfile.write(s)
logfile.close()

prprint(s )
prprint("Results were also written to the logfile at '%s'\n" % LOG_FILE)
prprint("Results were also written to the logfile at '%s'\n" % args.log_file)

finally:
# print("Disposing of TMP_DIR: %s" % TMP_DIR)
shutil.rmtree(TMP_DIR)
logfile.close()


# hack , vbench.git ignores some commits, but we
# need to be able to reference any commit.
# modified from vbench.git
def _parse_commit_log(repo_path):
from vbench.git import parser, _convert_timezones
from pandas import Series
git_cmd = 'git --git-dir=%s/.git --work-tree=%s ' % (repo_path, repo_path)
githist = git_cmd + ('log --graph --pretty=format:'
'\"::%h::%cd::%s::%an\" > githist.txt')
os.system(githist)
githist = open('githist.txt').read()
os.remove('githist.txt')

shas = []
timestamps = []
messages = []
authors = []
for line in githist.split('\n'):
if '*' not in line.split("::")[0]: # skip non-commit lines
continue

_, sha, stamp, message, author = line.split('::', 4)

# parse timestamp into datetime object
stamp = parser.parse(stamp)

shas.append(sha)
timestamps.append(stamp)
messages.append(message)
authors.append(author)

# to UTC for now
timestamps = _convert_timezones(timestamps)

shas = Series(shas, timestamps)
messages = Series(messages, shas)
timestamps = Series(timestamps, shas)
authors = Series(authors, shas)
return shas[::-1], messages[::-1], timestamps[::-1], authors[::-1]


if __name__ == '__main__':
main()
if not args.auto and not args.base_commit and not args.target_commit:
parser.print_help()
else:
main()