|
| 1 | +import json |
| 2 | +import os |
| 3 | +import sys |
| 4 | + |
| 5 | +import pandas as pd |
| 6 | + |
| 7 | +from redisbench_admin.utils.utils import retrieve_local_or_remote_input_json |
| 8 | + |
| 9 | + |
| 10 | +def get_key_results_and_values(baseline_json, step, use_result): |
| 11 | + selected_run = None |
| 12 | + metrics = {} |
| 13 | + for name, value in baseline_json["key-results"][step][use_result][0].items(): |
| 14 | + if name == "run-name": |
| 15 | + selected_run = value |
| 16 | + else: |
| 17 | + metrics[name] = value |
| 18 | + return selected_run, metrics |
| 19 | + |
| 20 | + |
1 | 21 | def compare_command_logic(args): |
2 | | - pass |
| 22 | + baseline_file = args.baseline_file |
| 23 | + comparison_file = args.comparison_file |
| 24 | + local_path = os.path.abspath(args.local_dir) |
| 25 | + use_result = args.use_result |
| 26 | + included_steps = args.steps.split(",") |
| 27 | + max_pct_change = args.fail_above_pct_change |
| 28 | + max_negative_pct_change = max_pct_change * -1.0 |
| 29 | + enabled_fail = args.enable_fail_above |
| 30 | + |
| 31 | + baseline_json = retrieve_local_or_remote_input_json(baseline_file, local_path, "--baseline-file") |
| 32 | + if baseline_json is None: |
| 33 | + print('Error while retrieving {}! Exiting..'.format(baseline_file)) |
| 34 | + sys.exit(1) |
| 35 | + |
| 36 | + comparison_json = retrieve_local_or_remote_input_json(comparison_file, local_path, "--comparison-file") |
| 37 | + if comparison_json is None: |
| 38 | + print('Error while retrieving {}! Exiting..'.format(comparison_file)) |
| 39 | + sys.exit(1) |
| 40 | + |
| 41 | + ##### Comparison starts here ##### |
| 42 | + baseline_key_results_steps = baseline_json["key-results"].keys() |
| 43 | + comparison_key_results_steps = comparison_json["key-results"].keys() |
| 44 | + baseline_df_config = generate_comparison_dataframe_configs(baseline_json["benchmark-config"], |
| 45 | + baseline_key_results_steps) |
| 46 | + comparison_df_config = generate_comparison_dataframe_configs(comparison_json["benchmark-config"], |
| 47 | + comparison_key_results_steps) |
| 48 | + |
| 49 | + percentange_change_map = {} |
| 50 | + for step in baseline_key_results_steps: |
| 51 | + if step in included_steps: |
| 52 | + df_dict = {} |
| 53 | + percentange_change_map[step] = {} |
| 54 | + print("##############################") |
| 55 | + print("Comparing {} step".format(step)) |
| 56 | + key_result_run_name, baseline_metrics = get_key_results_and_values(baseline_json, step, use_result) |
| 57 | + key_result_run_name, comparison_metrics = get_key_results_and_values(comparison_json, step, use_result) |
| 58 | + for baseline_metric_name, baseline_metric_value in baseline_metrics.items(): |
| 59 | + comparison_metric_value = None |
| 60 | + if baseline_metric_name in comparison_metrics: |
| 61 | + comparison_metric_value = comparison_metrics[baseline_metric_name] |
| 62 | + df_dict[baseline_metric_name] = [baseline_metric_value, comparison_metric_value] |
| 63 | + df = pd.DataFrame(df_dict, index=["baseline", "comparison"]) |
| 64 | + print("Percentage of change for comparison on {}".format(step)) |
| 65 | + df = df.append(df.pct_change().rename(index={'comparison': 'pct_change'}).loc['pct_change'] * 100.0) |
| 66 | + |
| 67 | + for metric_name, items in df.iteritems(): |
| 68 | + |
| 69 | + lower_is_better = baseline_df_config[step]["sorting_metric_sorting_direction_map"][metric_name] |
| 70 | + |
| 71 | + multiplier = 1.0 |
| 72 | + # if lower is better than negative changes are and performance improvement |
| 73 | + if lower_is_better: |
| 74 | + multiplier = -1.0 |
| 75 | + |
| 76 | + pct_change = items.get("pct_change") * multiplier |
| 77 | + df.at['pct_change', metric_name] = pct_change |
| 78 | + percentange_change_map[step][metric_name] = pct_change |
| 79 | + |
| 80 | + print(df) |
| 81 | + if enabled_fail: |
| 82 | + failing_metrics_serie = df.loc['pct_change'] <= max_negative_pct_change |
| 83 | + failing_metrics = df.loc['pct_change'][failing_metrics_serie] |
| 84 | + ammount_of_failing_metrics = len (failing_metrics) |
| 85 | + if ammount_of_failing_metrics > 0: |
| 86 | + df_keys = df.keys() |
| 87 | + # print(df.loc['pct_change'][0]) |
| 88 | + # print([0]) |
| 89 | + print( "There was a total of {} metrics that presented a regression above {} %".format(ammount_of_failing_metrics,max_pct_change) ) |
| 90 | + for pos,failed in enumerate(failing_metrics_serie): |
| 91 | + if failed: |
| 92 | + print("\tMetric '{}' failed. with an percentage of change of {:.2f} %".format(df_keys[pos],df.loc['pct_change'][pos])) |
| 93 | + sys.exit(1) |
| 94 | + else: |
| 95 | + print("Skipping step: {} due to command line argument --steps not containing it ({})".format(step, ",".join( |
| 96 | + included_steps))) |
| 97 | + |
| 98 | + |
| 99 | +def generate_comparison_dataframe_configs(benchmark_config, steps): |
| 100 | + step_df_dict = {} |
| 101 | + for step in steps: |
| 102 | + step_df_dict[step] = {} |
| 103 | + step_df_dict[step]["df_dict"] = {"run-name": []} |
| 104 | + step_df_dict[step]["sorting_metric_names"] = [] |
| 105 | + step_df_dict[step]["sorting_metric_sorting_direction"] = [] |
| 106 | + step_df_dict[step]["sorting_metric_sorting_direction_map"] = {} |
| 107 | + step_df_dict[step]["metric_json_path"] = [] |
| 108 | + for metric in benchmark_config["key-metrics"]: |
| 109 | + step = metric["step"] |
| 110 | + metric_name = metric["metric-name"] |
| 111 | + metric_json_path = metric["metric-json-path"] |
| 112 | + step_df_dict[step]["sorting_metric_names"].append(metric_name) |
| 113 | + step_df_dict[step]["metric_json_path"].append(metric_json_path) |
| 114 | + step_df_dict[step]["df_dict"][metric_name] = [] |
| 115 | + step_df_dict[step]["sorting_metric_sorting_direction"].append( |
| 116 | + False if metric["comparison"] == "higher-better" else True) |
| 117 | + step_df_dict[step]["sorting_metric_sorting_direction_map"][metric_name] = False if metric[ |
| 118 | + "comparison"] == "higher-better" else True |
| 119 | + return step_df_dict |
3 | 120 |
|
4 | 121 |
|
5 | | -def create_compare_arguments(parser): |
6 | | - return parser |
| 122 | +def from_resultsDF_to_key_results_dict(resultsDataFrame, step, step_df_dict): |
| 123 | + key_results_dict = {} |
| 124 | + key_results_dict["table"] = json.loads(resultsDataFrame.to_json(orient='records')) |
| 125 | + best_result = resultsDataFrame.head(n=1) |
| 126 | + worst_result = resultsDataFrame.tail(n=1) |
| 127 | + first_sorting_col = step_df_dict[step]["sorting_metric_names"][0] |
| 128 | + first_sorting_median = resultsDataFrame[first_sorting_col].median() |
| 129 | + result_index = resultsDataFrame[first_sorting_col].sub(first_sorting_median).abs().idxmin() |
| 130 | + median_result = resultsDataFrame.loc[[result_index]] |
| 131 | + key_results_dict["best-result"] = json.loads(best_result.to_json(orient='records')) |
| 132 | + key_results_dict["median-result"] = json.loads( |
| 133 | + median_result.to_json(orient='records')) |
| 134 | + key_results_dict["worst-result"] = json.loads(worst_result.to_json(orient='records')) |
| 135 | + key_results_dict["reliability-analysis"] = { |
| 136 | + 'var': json.loads(resultsDataFrame.var().to_json()), |
| 137 | + 'stddev': json.loads( |
| 138 | + resultsDataFrame.std().to_json())} |
| 139 | + return key_results_dict |
0 commit comments