diff --git a/vb_suite/test_perf.py b/vb_suite/test_perf.py index 72b441d79be84..b0d029de7371a 100755 --- a/vb_suite/test_perf.py +++ b/vb_suite/test_perf.py @@ -37,10 +37,18 @@ import random import numpy as np +import pandas as pd from pandas import DataFrame, Series +try: + import git # gitpython +except Exception: + print("Error: Please install the `gitpython` package\n") + sys.exit(1) + from suite import REPO_PATH +VB_DIR = os.path.dirname(os.path.abspath(__file__)) DEFAULT_MIN_DURATION = 0.01 HEAD_COL="head[ms]" BASE_COL="base[ms]" @@ -57,6 +65,14 @@ parser.add_argument('-t', '--target-commit', help='The commit to compare against the baseline (default: HEAD).', type=str) +parser.add_argument('--base-pickle', + help='name of pickle file with timings data generated by a former `-H -d FILE` run. '\ + 'filename must be of the form -*.* or specify --base-commit seperately', + type=str) +parser.add_argument('--target-pickle', + help='name of pickle file with timings data generated by a former `-H -d FILE` run '\ + 'filename must be of the form -*.* or specify --target-commit seperately', + type=str) parser.add_argument('-m', '--min-duration', help='Minimum duration (in ms) of baseline test for inclusion in report (default: %.3f).' % DEFAULT_MIN_DURATION, type=float, @@ -104,8 +120,7 @@ parser.add_argument('-a', '--affinity', metavar="a", dest='affinity', - default=1, - type=int, + default=None, help='set processor affinity of processm by default bind to cpu/core #1 only' 'requires the "affinity" python module , will raise Warning otherwise' ) @@ -206,21 +221,34 @@ def profile_comparative(benchmarks): head_res = get_results_df(db, h_head) baseline_res = get_results_df(db, h_baseline) - totals = prep_totals(baseline_res, head_res) - - h_msg = repo.messages.get(h_head, "") - b_msg = repo.messages.get(h_baseline, "") - print_report(totals,h_head=h_head,h_msg=h_msg, - h_baseline=h_baseline,b_msg=b_msg) + report_comparative(head_res,baseline_res) - if args.outdf: - prprint("The results DataFrame was written to '%s'\n" % args.outdf) - totals.save(args.outdf) finally: # print("Disposing of TMP_DIR: %s" % TMP_DIR) shutil.rmtree(TMP_DIR) +def prep_pickle_for_total(df, agg_name='median'): + """ + accepts a datafram resulting from invocation with -H -d o.pickle + If multiple data columns are present (-N was used), the + `agg_name` attr of the datafram will be used to reduce + them to a single value per vbench, df.median is used by defa + ult. + + Returns a datadrame of the form expected by prep_totals + """ + def prep(df): + agg = getattr(df,agg_name) + df = DataFrame(agg(1)) + cols = list(df.columns) + cols[0]='timing' + df.columns=cols + df['name'] = list(df.index) + return df + + return prep(df) + def prep_totals(head_res, baseline_res): """ Each argument should be a dataframe with 'timing' and 'name' columns @@ -241,6 +269,27 @@ def prep_totals(head_res, baseline_res): ).sort("ratio").set_index('name') # sort in ascending order return totals +def report_comparative(head_res,baseline_res): + try: + r=git.Repo(VB_DIR) + except: + import pdb + pdb.set_trace() + + totals = prep_totals(head_res,baseline_res) + + h_head = args.target_commit + h_baseline = args.base_commit + h_msg = r.commit(h_head).message.strip() + b_msg = r.commit(h_baseline).message.strip() + + print_report(totals,h_head=h_head,h_msg=h_msg, + h_baseline=h_baseline,b_msg=b_msg) + + if args.outdf: + prprint("The results DataFrame was written to '%s'\n" % args.outdf) + totals.save(args.outdf) + def profile_head_single(benchmark): import gc results = [] @@ -398,18 +447,23 @@ def main(): random.seed(args.seed) np.random.seed(args.seed) - try: - import affinity - affinity.set_process_affinity_mask(0,args.affinity) - assert affinity.get_process_affinity_mask(0) == args.affinity - print("CPU affinity set to %d" % args.affinity) - except ImportError: - import warnings - print("\n\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"+ - "The 'affinity' module is not available, results may be unreliable\n" + - "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n" - ) - time.sleep(2) + if args.base_pickle and args.target_pickle: + baseline_res = prep_pickle_for_total(pd.load(args.base_pickle)) + target_res = prep_pickle_for_total(pd.load(args.target_pickle)) + + report_comparative(target_res, baseline_res) + sys.exit(0) + + if args.affinity is not None: + try: + import affinity + + affinity.set_process_affinity_mask(0,args.affinity) + assert affinity.get_process_affinity_mask(0) == args.affinity + print("CPU affinity set to %d" % args.affinity) + except ImportError: + print("-a/--afinity specified, but the 'affinity' module is not available, aborting.\n") + sys.exit(1) print("\n") prprint("LOG_FILE = %s" % args.log_file) @@ -489,10 +543,40 @@ def inner(repo_path): if __name__ == '__main__': args = parser.parse_args() - if not args.head and (not args.base_commit and not args.target_commit): + if (not args.head + and not (args.base_commit and args.target_commit) + and not (args.base_pickle and args.target_pickle)): parser.print_help() - else: - import warnings - warnings.filterwarnings('ignore',category=FutureWarning) - warnings.filterwarnings('ignore',category=DeprecationWarning) - main() + sys.exit(1) + elif ((args.base_pickle or args.target_pickle) and not + (args.base_pickle and args.target_pickle)): + print("Must specify Both --base-pickle and --target-pickle.") + sys.exit(1) + + if ((args.base_pickle or args.target_pickle) and not + (args.base_commit and args.target_commit)): + if not args.base_commit: + print("base_commit not specified, Assuming base_pickle is named -foo.*") + args.base_commit = args.base_pickle.split('-')[0] + if not args.target_commit: + print("target_commit not specified, Assuming target_pickle is named -foo.*") + print(args.target_pickle.split('-')[0]) + args.target_commit = args.target_pickle.split('-')[0] + + import warnings + warnings.filterwarnings('ignore',category=FutureWarning) + warnings.filterwarnings('ignore',category=DeprecationWarning) + + if args.base_commit and args.target_commit: + print("Verifying specified commits exist in repo...") + r=git.Repo(VB_DIR) + for c in [ args.base_commit, args.target_commit ]: + try: + msg = r.commit(c).message.strip() + except git.BadObject: + print("The commit '%s' was not found, aborting" % c) + sys.exit(1) + else: + print("%s: %s" % (c,msg)) + + main()