Skip to content

Commit 0bd7488

Browse files
committed
document + add main function to compare.py
1 parent c65540d commit 0bd7488

File tree

1 file changed

+124
-15
lines changed

1 file changed

+124
-15
lines changed

devops/scripts/benchmarks/compare.py

Lines changed: 124 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
1+
from utils.aggregate import SimpleMedian
2+
from utils.validate import Validate
3+
from utils.result import Result, BenchmarkRun
4+
from options import options
5+
16
import os
27
import sys
38
import json
9+
import argparse
410
from pathlib import Path
511
from dataclasses import dataclass, asdict
612

7-
from utils.aggregate import SimpleMedian
8-
from utils.validate import Validate
9-
from utils.result import Result, BenchmarkRun
10-
from options import options
11-
1213
@dataclass
1314
class BenchmarkHistoricAverage:
15+
"""Contains historic average information for 1 benchmark"""
1416
# Name of benchmark as defined in Benchmark class definition
1517
name: str
1618

@@ -32,14 +34,32 @@ class BenchmarkHistoricAverage:
3234
command_args: set[str]
3335
# TODO Ensure ONEAPI_DEVICE_SELECTOR? GPU name itself?
3436

35-
class Compare:
3637

38+
class Compare:
39+
"""Class containing logic for comparisons between results"""
3740
@staticmethod
3841
def get_hist_avg(
3942
result_name: str, result_dir: str, cutoff: str, aggregator=SimpleMedian,
4043
exclude: list[str] = []
4144
) -> dict[str, BenchmarkHistoricAverage]:
45+
"""
46+
Create a historic average for results named result_name in result_dir
47+
using the specified aggregator
48+
49+
Args:
50+
result_name (str): Name of benchmarking result to obtain average for
51+
result_dir (str): Path to folder containing benchmark results
52+
cutoff (str): Timestamp in YYYYMMDD_HHMMSS of oldest results used in
53+
average calcultaion
54+
aggregator (Aggregator): The aggregator to use for calculating the
55+
historic average
56+
exclude (list[str]): List of filenames (only the stem) to exclude
57+
from average calculation
4258
59+
Returns:
60+
A dictionary mapping benchmark names to BenchmarkHistoricAverage
61+
objects
62+
"""
4363
def get_timestamp(f: str) -> str:
4464
"""Extract timestamp from result filename"""
4565
return str(f)[-len("YYYYMMDD_HHMMSS.json") : -len(".json")]
@@ -121,6 +141,17 @@ def reset_aggregate() -> dict:
121141
def to_hist_avg(
122142
hist_avg: dict[str, BenchmarkHistoricAverage], compare_file: str
123143
) -> tuple:
144+
"""
145+
Compare results in compare_file to a pre-existing map of historic
146+
averages
147+
148+
Args:
149+
hist_avg (dict): A historic average map generated from get_hist_avg
150+
compare_file (str): Full filepath of result to compare against
151+
152+
Returns:
153+
A tuple returning (list of improved tests, list of regressed tests).
154+
"""
124155
with open(compare_file, 'r') as compare_f:
125156
compare_result = BenchmarkRun.from_json(json.load(compare_f))
126157

@@ -153,36 +184,114 @@ def perf_diff_entry() -> dict:
153184
regression.append(perf_diff_entry())
154185

155186
return improvement, regression
156-
157187

158188

159189

160190
def to_hist(
161-
avg_type: str, result_name: str, compare_name: str, result_dir: str, cutoff: str,
191+
avg_type: str, result_name: str, compare_file: str, result_dir: str, cutoff: str,
162192

163193
) -> tuple:
164194
"""
165-
This function generates a historic average from results named result_name
166-
in result_dir and compares it to the results in compare_file
195+
Pregenerate a historic average from results named result_name in
196+
result_dir, and compares the results in compare_file to it
167197
168-
Parameters:
198+
Args:
169199
result_name (str): Save name of the result
170200
compare_name (str): Result file name to compare historic average against
171201
result_dir (str): Directory to look for results in
172202
cutoff (str): Timestamp (in YYYYMMDD_HHMMSS) indicating the oldest
173203
result included in the historic average calculation
174204
avg_type (str): Type of "average" (measure of central tendency) to
175205
use in historic "average" calculation
206+
207+
Returns:
208+
A tuple returning (list of improved tests, list of regressed tests).
209+
Each element in each list is a BenchmarkRun object with a hist_avg,
210+
avg_type, and delta field added, indicating the historic average,
211+
type of central tendency used for historic average, and the delta
212+
from the average for this benchmark run.
176213
"""
177214

178215
if avg_type != "median":
179216
print("Only median is currently supported: refusing to continue.")
180217
exit(1)
181218

182219
# TODO call validator on cutoff timestamp
183-
hist_avg = Compare.get_hist_avg(result_name, result_dir, cutoff, exclude=[compare_name])
184-
return Compare.to_hist_avg(hist_avg, f"{result_dir}/{compare_name}.json")
220+
hist_avg = Compare.get_hist_avg(
221+
result_name,
222+
result_dir,
223+
cutoff,
224+
exclude=[Path(compare_file).stem]
225+
)
226+
return Compare.to_hist_avg(hist_avg, compare_file)
227+
228+
229+
if __name__ == "__main__":
230+
parser = argparse.ArgumentParser(description="Compare benchmark results")
231+
subparsers = parser.add_subparsers(dest="operation", required=True)
232+
parser_avg = subparsers.add_parser("to_hist", help="Compare a benchmark result to historic average")
233+
parser_avg.add_argument(
234+
"--avg_type",
235+
type=str,
236+
help="Measure of central tendency to use when computing historic average",
237+
default="median"
238+
)
239+
parser_avg.add_argument(
240+
"--name",
241+
type=str,
242+
required=True,
243+
help="Save name of the benchmark results to compare to"
244+
)
245+
parser_avg.add_argument(
246+
"--compare_file",
247+
type=str,
248+
required=True,
249+
help="Result file to compare against te historic average"
250+
)
251+
parser_avg.add_argument(
252+
"--results_dir",
253+
type=str,
254+
required=True,
255+
help="Directory storing results"
256+
)
257+
parser_avg.add_argument(
258+
"--cutoff",
259+
type=str,
260+
help="Timestamp (in YYYYMMDD_HHMMSS) of oldest result to include in historic average calculation",
261+
default="20000101_010101"
262+
)
263+
264+
args = parser.parse_args()
265+
266+
if args.operation == "to_hist":
267+
if args.avg_type != "median":
268+
print("Only median is currently supported: exiting.")
269+
exit(1)
270+
if not Validate.timestamp(args.cutoff):
271+
raise ValueError("Timestamp must be provided as YYYYMMDD_HHMMSS.")
272+
273+
improvements, regressions = Compare.to_hist(
274+
"median",
275+
args.name,
276+
args.compare_file,
277+
args.results_dir,
278+
args.cutoff
279+
)
185280

281+
def print_regression(entry: dict):
282+
"""Print an entry outputted from Compare.to_hist"""
283+
print(f"Test: {entry['name']}")
284+
print(f"-- Historic {entry['avg_type']}: {entry['hist_avg']}")
285+
print(f"-- Run result: {test['value']}")
286+
print(f"-- Delta: {test['delta']}")
287+
print("")
186288

187-
res = Compare.to_hist("median", "Baseline_PVC_L0", "Baseline_PVC_L0_20250314_170754", "./", "00000000_000000")
188-
print(res)
289+
if improvements:
290+
print("#\n# Improvements:\n#\n")
291+
for test in improvements: print_regression(test)
292+
if regressions:
293+
print("#\n# Regressions:\n#\n")
294+
for test in regressions: print_regression(test)
295+
else:
296+
print("Unsupported operation: exiting.")
297+
exit(1)

0 commit comments

Comments
 (0)