1+ from utils .aggregate import SimpleMedian
2+ from utils .validate import Validate
3+ from utils .result import Result , BenchmarkRun
4+ from options import options
5+
16import os
27import sys
38import json
9+ import argparse
410from pathlib import Path
511from dataclasses import dataclass , asdict
612
7- from utils .aggregate import SimpleMedian
8- from utils .validate import Validate
9- from utils .result import Result , BenchmarkRun
10- from options import options
11-
1213@dataclass
1314class BenchmarkHistoricAverage :
15+ """Contains historic average information for 1 benchmark"""
1416 # Name of benchmark as defined in Benchmark class definition
1517 name : str
1618
@@ -32,14 +34,32 @@ class BenchmarkHistoricAverage:
3234 command_args : set [str ]
3335 # TODO Ensure ONEAPI_DEVICE_SELECTOR? GPU name itself?
3436
35- class Compare :
3637
38+ class Compare :
39+ """Class containing logic for comparisons between results"""
3740 @staticmethod
3841 def get_hist_avg (
3942 result_name : str , result_dir : str , cutoff : str , aggregator = SimpleMedian ,
4043 exclude : list [str ] = []
4144 ) -> dict [str , BenchmarkHistoricAverage ]:
45+ """
46+ Create a historic average for results named result_name in result_dir
47+ using the specified aggregator
48+
49+ Args:
50+ result_name (str): Name of benchmarking result to obtain average for
51+ result_dir (str): Path to folder containing benchmark results
52+ cutoff (str): Timestamp in YYYYMMDD_HHMMSS of oldest results used in
53+ average calcultaion
54+ aggregator (Aggregator): The aggregator to use for calculating the
55+ historic average
56+ exclude (list[str]): List of filenames (only the stem) to exclude
57+ from average calculation
4258
59+ Returns:
60+ A dictionary mapping benchmark names to BenchmarkHistoricAverage
61+ objects
62+ """
4363 def get_timestamp (f : str ) -> str :
4464 """Extract timestamp from result filename"""
4565 return str (f )[- len ("YYYYMMDD_HHMMSS.json" ) : - len (".json" )]
@@ -121,6 +141,17 @@ def reset_aggregate() -> dict:
121141 def to_hist_avg (
122142 hist_avg : dict [str , BenchmarkHistoricAverage ], compare_file : str
123143 ) -> tuple :
144+ """
145+ Compare results in compare_file to a pre-existing map of historic
146+ averages
147+
148+ Args:
149+ hist_avg (dict): A historic average map generated from get_hist_avg
150+ compare_file (str): Full filepath of result to compare against
151+
152+ Returns:
153+ A tuple returning (list of improved tests, list of regressed tests).
154+ """
124155 with open (compare_file , 'r' ) as compare_f :
125156 compare_result = BenchmarkRun .from_json (json .load (compare_f ))
126157
@@ -153,36 +184,114 @@ def perf_diff_entry() -> dict:
153184 regression .append (perf_diff_entry ())
154185
155186 return improvement , regression
156-
157187
158188
159189
160190 def to_hist (
161- avg_type : str , result_name : str , compare_name : str , result_dir : str , cutoff : str ,
191+ avg_type : str , result_name : str , compare_file : str , result_dir : str , cutoff : str ,
162192
163193 ) -> tuple :
164194 """
165- This function generates a historic average from results named result_name
166- in result_dir and compares it to the results in compare_file
195+ Pregenerate a historic average from results named result_name in
196+ result_dir, and compares the results in compare_file to it
167197
168- Parameters :
198+ Args :
169199 result_name (str): Save name of the result
170200 compare_name (str): Result file name to compare historic average against
171201 result_dir (str): Directory to look for results in
172202 cutoff (str): Timestamp (in YYYYMMDD_HHMMSS) indicating the oldest
173203 result included in the historic average calculation
174204 avg_type (str): Type of "average" (measure of central tendency) to
175205 use in historic "average" calculation
206+
207+ Returns:
208+ A tuple returning (list of improved tests, list of regressed tests).
209+ Each element in each list is a BenchmarkRun object with a hist_avg,
210+ avg_type, and delta field added, indicating the historic average,
211+ type of central tendency used for historic average, and the delta
212+ from the average for this benchmark run.
176213 """
177214
178215 if avg_type != "median" :
179216 print ("Only median is currently supported: refusing to continue." )
180217 exit (1 )
181218
182219 # TODO call validator on cutoff timestamp
183- hist_avg = Compare .get_hist_avg (result_name , result_dir , cutoff , exclude = [compare_name ])
184- return Compare .to_hist_avg (hist_avg , f"{ result_dir } /{ compare_name } .json" )
220+ hist_avg = Compare .get_hist_avg (
221+ result_name ,
222+ result_dir ,
223+ cutoff ,
224+ exclude = [Path (compare_file ).stem ]
225+ )
226+ return Compare .to_hist_avg (hist_avg , compare_file )
227+
228+
229+ if __name__ == "__main__" :
230+ parser = argparse .ArgumentParser (description = "Compare benchmark results" )
231+ subparsers = parser .add_subparsers (dest = "operation" , required = True )
232+ parser_avg = subparsers .add_parser ("to_hist" , help = "Compare a benchmark result to historic average" )
233+ parser_avg .add_argument (
234+ "--avg_type" ,
235+ type = str ,
236+ help = "Measure of central tendency to use when computing historic average" ,
237+ default = "median"
238+ )
239+ parser_avg .add_argument (
240+ "--name" ,
241+ type = str ,
242+ required = True ,
243+ help = "Save name of the benchmark results to compare to"
244+ )
245+ parser_avg .add_argument (
246+ "--compare_file" ,
247+ type = str ,
248+ required = True ,
249+ help = "Result file to compare against te historic average"
250+ )
251+ parser_avg .add_argument (
252+ "--results_dir" ,
253+ type = str ,
254+ required = True ,
255+ help = "Directory storing results"
256+ )
257+ parser_avg .add_argument (
258+ "--cutoff" ,
259+ type = str ,
260+ help = "Timestamp (in YYYYMMDD_HHMMSS) of oldest result to include in historic average calculation" ,
261+ default = "20000101_010101"
262+ )
263+
264+ args = parser .parse_args ()
265+
266+ if args .operation == "to_hist" :
267+ if args .avg_type != "median" :
268+ print ("Only median is currently supported: exiting." )
269+ exit (1 )
270+ if not Validate .timestamp (args .cutoff ):
271+ raise ValueError ("Timestamp must be provided as YYYYMMDD_HHMMSS." )
272+
273+ improvements , regressions = Compare .to_hist (
274+ "median" ,
275+ args .name ,
276+ args .compare_file ,
277+ args .results_dir ,
278+ args .cutoff
279+ )
185280
281+ def print_regression (entry : dict ):
282+ """Print an entry outputted from Compare.to_hist"""
283+ print (f"Test: { entry ['name' ]} " )
284+ print (f"-- Historic { entry ['avg_type' ]} : { entry ['hist_avg' ]} " )
285+ print (f"-- Run result: { test ['value' ]} " )
286+ print (f"-- Delta: { test ['delta' ]} " )
287+ print ("" )
186288
187- res = Compare .to_hist ("median" , "Baseline_PVC_L0" , "Baseline_PVC_L0_20250314_170754" , "./" , "00000000_000000" )
188- print (res )
289+ if improvements :
290+ print ("#\n # Improvements:\n #\n " )
291+ for test in improvements : print_regression (test )
292+ if regressions :
293+ print ("#\n # Regressions:\n #\n " )
294+ for test in regressions : print_regression (test )
295+ else :
296+ print ("Unsupported operation: exiting." )
297+ exit (1 )
0 commit comments