1111from pathlib import Path
1212from dataclasses import dataclass , asdict
1313
14+
1415@dataclass
1516class BenchmarkHistoricAverage :
1617 """Contains historic average information for 1 benchmark"""
18+
1719 # Name of benchmark as defined in Benchmark class definition
1820 name : str
1921
@@ -32,27 +34,32 @@ class BenchmarkHistoricAverage:
3234 #
3335 # This exists to ensure benchmarks called using different arguments are not
3436 # compared together.
35- command_args : set [str ]
37+ command_args : set [str ]
3638 # TODO Ensure ONEAPI_DEVICE_SELECTOR? GPU name itself?
3739
3840
3941class Compare :
4042 """Class containing logic for comparisons between results"""
43+
4144 @staticmethod
4245 def get_hist_avg (
43- result_name : str , result_dir : str , hostname : str , cutoff : str ,
44- aggregator : Aggregator = SimpleMedian , exclude : list [str ] = []
46+ result_name : str ,
47+ result_dir : str ,
48+ hostname : str ,
49+ cutoff : str ,
50+ aggregator : Aggregator = SimpleMedian ,
51+ exclude : list [str ] = [],
4552 ) -> dict [str , BenchmarkHistoricAverage ]:
4653 """
4754 Create a historic average for results named result_name in result_dir
4855 using the specified aggregator
4956
5057 Args:
51- result_name (str): Name of benchmarking result to obtain average for
58+ result_name (str): Name of benchmarking result to obtain average for
5259 result_dir (str): Path to folder containing benchmark results
5360 cutoff (str): Timestamp in YYYYMMDD_HHMMSS of oldest results used in
5461 average calcultaion
55- hostname (str): Hostname of machine on which results ran on
62+ hostname (str): Hostname of machine on which results ran on
5663 aggregator (Aggregator): The aggregator to use for calculating the
5764 historic average
5865 exclude (list[str]): List of filenames (only the stem) to exclude
@@ -90,10 +97,10 @@ def get_result_paths() -> list[str]:
9097 # Result file is not excluded
9198 and f .stem not in exclude ,
9299 # Assumes format is <name>_YYYYMMDD_HHMMSS.json
93- cache_dir .glob (f"{ result_name } _*_*.json" )
100+ cache_dir .glob (f"{ result_name } _*_*.json" ),
94101 )
95102 )
96-
103+
97104 def validate_benchmark_result (result : BenchmarkRun ) -> bool :
98105 """
99106 Returns True if result file:
@@ -105,21 +112,25 @@ def validate_benchmark_result(result: BenchmarkRun) -> bool:
105112 if result .hostname != hostname :
106113 return False
107114 if result .name != result_name :
108- print (f"Warning: Result file { result_path } does not match specified result name { result .name } ." )
115+ print (
116+ f"Warning: Result file { result_path } does not match specified result name { result .name } ."
117+ )
109118 return False
110- if result .date < datetime .strptime (cutoff , "%Y%m%d_%H%M%S" ).replace (tzinfo = timezone .utc ):
119+ if result .date < datetime .strptime (cutoff , "%Y%m%d_%H%M%S" ).replace (
120+ tzinfo = timezone .utc
121+ ):
111122 return False
112123 return True
113124
114125 # key: name of the benchmark test result
115126 # value: { command_args: set[str], aggregate: Aggregator }
116- #
127+ #
117128 # This is then used to build a dict[BenchmarkHistoricAverage] used
118129 # to find historic averages.
119130 average_aggregate : dict [str , dict ] = dict ()
120-
131+
121132 for result_path in get_result_paths ():
122- with result_path .open ('r' ) as result_f :
133+ with result_path .open ("r" ) as result_f :
123134 result = BenchmarkRun .from_json (json .load (result_f ))
124135
125136 # Perform another check on result file here, as get_result_paths()
@@ -131,39 +142,48 @@ def validate_benchmark_result(result: BenchmarkRun) -> bool:
131142 continue
132143
133144 for test_run in result .results :
145+
134146 def reset_aggregate () -> dict :
135- return {
147+ return {
136148 "command_args" : set (test_run .command [1 :]),
137- "aggregate" : aggregator (starting_elements = [test_run .value ])
149+ "aggregate" : aggregator (starting_elements = [test_run .value ]),
138150 }
139151
140152 # Add every benchmark run to average_aggregate:
141153 if test_run .name not in average_aggregate :
142154 average_aggregate [test_run .name ] = reset_aggregate ()
143155 else :
144156 # Check that we are comparing runs with the same cmd args:
145- if set (test_run .command [1 :]) == average_aggregate [test_run .name ]["command_args" ]:
146- average_aggregate [test_run .name ]["aggregate" ].add (test_run .value )
157+ if (
158+ set (test_run .command [1 :])
159+ == average_aggregate [test_run .name ]["command_args" ]
160+ ):
161+ average_aggregate [test_run .name ]["aggregate" ].add (
162+ test_run .value
163+ )
147164 else :
148165 # If the command args used between runs are different,
149166 # discard old run data and prefer new command args
150167 #
151168 # This relies on the fact that paths from get_result_paths()
152169 # is sorted from older to newer
153- print (f"Warning: Command args for { test_run .name } from { result_path } is different from prior runs." )
154- print ("DISCARDING older data and OVERRIDING with data using new arg." )
170+ print (
171+ f"Warning: Command args for { test_run .name } from { result_path } is different from prior runs."
172+ )
173+ print (
174+ "DISCARDING older data and OVERRIDING with data using new arg."
175+ )
155176 average_aggregate [test_run .name ] = reset_aggregate ()
156-
177+
157178 return {
158179 name : BenchmarkHistoricAverage (
159180 name = name ,
160181 average_type = stats ["aggregate" ].get_type (),
161182 value = stats ["aggregate" ].get_avg (),
162- command_args = stats ["command_args" ]
183+ command_args = stats ["command_args" ],
163184 )
164185 for name , stats in average_aggregate .items ()
165186 }
166-
167187
168188 def to_hist_avg (
169189 hist_avg : dict [str , BenchmarkHistoricAverage ], target : BenchmarkRun
@@ -181,12 +201,14 @@ def to_hist_avg(
181201 Returns:
182202 A tuple returning (list of improved tests, list of regressed tests).
183203 """
204+
184205 def halfway_round (value : int , n : int ):
185206 """
186207 Python's default round() does banker's rounding, which doesn't
187208 make much sense here. This rounds 0.5 to 1, and -0.5 to -1
188209 """
189- if value == 0 : return 0
210+ if value == 0 :
211+ return 0
190212 return int (value * 10 ** n + 0.5 * (value / abs (value ))) / 10 ** n
191213
192214 improvement = []
@@ -198,11 +220,11 @@ def halfway_round(value: int, n: int):
198220 if hist_avg [test .name ].command_args != set (test .command [1 :]):
199221 print (f"Warning: skipped { test .name } due to command args mismatch." )
200222 continue
201-
223+
202224 delta = 1 - (
203225 test .value / hist_avg [test .name ].value
204- if test .lower_is_better else
205- hist_avg [test .name ].value / test .value
226+ if test .lower_is_better
227+ else hist_avg [test .name ].value / test .value
206228 )
207229
208230 def perf_diff_entry () -> dict :
@@ -221,9 +243,11 @@ def perf_diff_entry() -> dict:
221243
222244 return improvement , regression
223245
224-
225246 def to_hist (
226- avg_type : str , result_name : str , compare_file : str , result_dir : str ,
247+ avg_type : str ,
248+ result_name : str ,
249+ compare_file : str ,
250+ result_dir : str ,
227251 cutoff : str ,
228252 ) -> tuple :
229253 """
@@ -236,7 +260,7 @@ def to_hist(
236260 result_dir (str): Directory to look for results in
237261 cutoff (str): Timestamp (in YYYYMMDD_HHMMSS) indicating the oldest
238262 result included in the historic average calculation
239- avg_type (str): Type of "average" (measure of central tendency) to
263+ avg_type (str): Type of "average" (measure of central tendency) to
240264 use in historic "average" calculation
241265
242266 Returns:
@@ -245,22 +269,24 @@ def to_hist(
245269 avg_type, and delta field added, indicating the historic average,
246270 type of central tendency used for historic average, and the delta
247271 from the average for this benchmark run.
248- """
272+ """
249273
250274 if avg_type != "median" :
251275 print ("Only median is currently supported: Refusing to continue." )
252276 exit (1 )
253277
254278 try :
255- with open (compare_file , 'r' ) as compare_f :
279+ with open (compare_file , "r" ) as compare_f :
256280 compare_result = BenchmarkRun .from_json (json .load (compare_f ))
257281 except :
258282 print (f"Unable to open { compare_file } ." )
259283 exit (1 )
260284
261285 # Sanity checks:
262286 if compare_result .hostname == "Unknown" :
263- print ("Hostname for results in {compare_file} unknown, unable to build a historic average: Refusing to continue." )
287+ print (
288+ "Hostname for results in {compare_file} unknown, unable to build a historic average: Refusing to continue."
289+ )
264290 exit (1 )
265291 if not Validate .timestamp (cutoff ):
266292 print ("Invalid timestamp provided, please follow YYYYMMDD_HHMMSS." )
@@ -272,44 +298,43 @@ def to_hist(
272298 result_dir ,
273299 compare_result .hostname ,
274300 cutoff ,
275- exclude = [Path (compare_file ).stem ]
301+ exclude = [Path (compare_file ).stem ],
276302 )
277303 return Compare .to_hist_avg (hist_avg , compare_result )
278304
279305
280306if __name__ == "__main__" :
281307 parser = argparse .ArgumentParser (description = "Compare benchmark results" )
282308 subparsers = parser .add_subparsers (dest = "operation" , required = True )
283- parser_avg = subparsers .add_parser ("to_hist" , help = "Compare a benchmark result to historic average" )
309+ parser_avg = subparsers .add_parser (
310+ "to_hist" , help = "Compare a benchmark result to historic average"
311+ )
284312 parser_avg .add_argument (
285313 "--avg-type" ,
286314 type = str ,
287315 help = "Measure of central tendency to use when computing historic average" ,
288- default = "median"
316+ default = "median" ,
289317 )
290318 parser_avg .add_argument (
291319 "--name" ,
292320 type = str ,
293321 required = True ,
294- help = "Save name of the benchmark results to compare to"
322+ help = "Save name of the benchmark results to compare to" ,
295323 )
296324 parser_avg .add_argument (
297325 "--compare-file" ,
298326 type = str ,
299327 required = True ,
300- help = "Result file to compare against te historic average"
328+ help = "Result file to compare against te historic average" ,
301329 )
302330 parser_avg .add_argument (
303- "--results-dir" ,
304- type = str ,
305- required = True ,
306- help = "Directory storing results"
331+ "--results-dir" , type = str , required = True , help = "Directory storing results"
307332 )
308333 parser_avg .add_argument (
309334 "--cutoff" ,
310335 type = str ,
311336 help = "Timestamp (in YYYYMMDD_HHMMSS) of oldest result to include in historic average calculation" ,
312- default = "20000101_010101"
337+ default = "20000101_010101" ,
313338 )
314339
315340 args = parser .parse_args ()
@@ -322,11 +347,7 @@ def to_hist(
322347 raise ValueError ("Timestamp must be provided as YYYYMMDD_HHMMSS." )
323348
324349 improvements , regressions = Compare .to_hist (
325- "median" ,
326- args .name ,
327- args .compare_file ,
328- args .results_dir ,
329- args .cutoff
350+ "median" , args .name , args .compare_file , args .results_dir , args .cutoff
330351 )
331352
332353 def print_regression (entry : dict ):
@@ -339,10 +360,12 @@ def print_regression(entry: dict):
339360
340361 if improvements :
341362 print ("#\n # Improvements:\n #\n " )
342- for test in improvements : print_regression (test )
363+ for test in improvements :
364+ print_regression (test )
343365 if regressions :
344366 print ("#\n # Regressions:\n #\n " )
345- for test in regressions : print_regression (test )
367+ for test in regressions :
368+ print_regression (test )
346369 exit (1 ) # Exit 1 to trigger github test failure
347370 else :
348371 print ("Unsupported operation: exiting." )
0 commit comments