@@ -39,8 +39,8 @@ class Compare:
3939 """Class containing logic for comparisons between results"""
4040 @staticmethod
4141 def get_hist_avg (
42- result_name : str , result_dir : str , cutoff : str , aggregator = SimpleMedian ,
43- exclude : list [str ] = []
42+ result_name : str , result_dir : str , hostname : str , cutoff : str ,
43+ aggregator : Aggregator = SimpleMedian , exclude : list [str ] = []
4444 ) -> dict [str , BenchmarkHistoricAverage ]:
4545 """
4646 Create a historic average for results named result_name in result_dir
@@ -51,6 +51,7 @@ def get_hist_avg(
5151 result_dir (str): Path to folder containing benchmark results
5252 cutoff (str): Timestamp in YYYYMMDD_HHMMSS of oldest results used in
5353 average calcultaion
54+ hostname (str): Hostname of machine on which results ran on
5455 aggregator (Aggregator): The aggregator to use for calculating the
5556 historic average
5657 exclude (list[str]): List of filenames (only the stem) to exclude
@@ -60,14 +61,21 @@ def get_hist_avg(
6061 A dictionary mapping benchmark names to BenchmarkHistoricAverage
6162 objects
6263 """
64+ if not Validate .timestamp (cutoff ):
65+ raise ValueError ("Provided cutoff time is not a proper timestamp." )
66+
6367 def get_timestamp (f : str ) -> str :
6468 """Extract timestamp from result filename"""
6569 return str (f )[- len ("YYYYMMDD_HHMMSS.json" ) : - len (".json" )]
6670
6771 def get_result_paths () -> list [str ]:
6872 """
6973 Get a list of all results matching result_name in result_dir that is
70- newer than the timestamp specified by cutoff
74+ newer than the timestamp specified by cutoff based off of filename.
75+
76+ This function assumes filenames of benchmark result files are
77+ accurate; files returned by this function will be checked a second
78+ time once their contents are actually loaded.
7179 """
7280 cache_dir = Path (f"{ result_dir } " )
7381
@@ -84,6 +92,23 @@ def get_result_paths() -> list[str]:
8492 cache_dir .glob (f"{ result_name } _*_*.json" )
8593 )
8694 )
95+
96+ def check_benchmark_result (result : BenchmarkRun ) -> bool :
97+ """
98+ Returns True if result file:
99+ - Was ran on the target machine/hostname specified
100+ - Sanity check: ensure metadata are all expected values:
101+ - Date is truly before cutoff timestamp
102+ - Name truly matches up with specified result_name
103+ """
104+ if result .hostname != hostname :
105+ return False
106+ if result .name != result_name :
107+ print (f"Warning: Result file { result_path } does not match specified result name { result .name } ." )
108+ return False
109+ if result .date < datetime .strptime (cutoff , "%Y%m%d_%H%M%S" ):
110+ return False
111+ return True
87112
88113 # key: name of the benchmark test result
89114 # value: { command_args: set[str], aggregate: Aggregator }
@@ -95,9 +120,13 @@ def get_result_paths() -> list[str]:
95120 for result_path in get_result_paths ():
96121 with result_path .open ('r' ) as result_f :
97122 result = BenchmarkRun .from_json (json .load (result_f ))
98-
99- if result .name != result_name :
100- print (f"Warning: Result file { result_path } has mismatching name { result .name } . Skipping file." )
123+
124+ # Perform another check on result file here, as get_result_paths()
125+ # only filters out result files via filename, which:
126+ # - does not contain enough information to filter out results, i.e.
127+ # no hostname information.
128+ # - information in filename may be mismatched from metadata.
129+ if not check_benchmark_result (result ):
101130 continue
102131
103132 for test_run in result .results :
@@ -139,26 +168,25 @@ def reset_aggregate() -> dict:
139168
140169
141170 def to_hist_avg (
142- hist_avg : dict [str , BenchmarkHistoricAverage ], compare_file : str
171+ hist_avg : dict [str , BenchmarkHistoricAverage ], target : BenchmarkRun
143172 ) -> tuple :
144173 """
145- Compare results in compare_file to a pre-existing map of historic
146- averages
174+ Compare results in target to a pre-existing map of historic average.
175+
176+ Caution: Ensure the generated hist_avg is for results running on the
177+ same host as target.hostname.
147178
148179 Args:
149180 hist_avg (dict): A historic average map generated from get_hist_avg
150- compare_file (str ): Full filepath of result to compare against
181+ target (BenchmarkRun ): results to compare against hist_avg
151182
152183 Returns:
153184 A tuple returning (list of improved tests, list of regressed tests).
154185 """
155- with open (compare_file , 'r' ) as compare_f :
156- compare_result = BenchmarkRun .from_json (json .load (compare_f ))
157-
158186 improvement = []
159187 regression = []
160188
161- for test in compare_result .results :
189+ for test in target .results :
162190 if test .name not in hist_avg :
163191 continue
164192 if hist_avg [test .name ].command_args != set (test .command [1 :]):
@@ -186,10 +214,9 @@ def perf_diff_entry() -> dict:
186214 return improvement , regression
187215
188216
189-
190217 def to_hist (
191- avg_type : str , result_name : str , compare_file : str , result_dir : str , cutoff : str ,
192-
218+ avg_type : str , result_name : str , compare_file : str , result_dir : str ,
219+ cutoff : str ,
193220 ) -> tuple :
194221 """
195222 Pregenerate a historic average from results named result_name in
@@ -213,17 +240,33 @@ def to_hist(
213240 """
214241
215242 if avg_type != "median" :
216- print ("Only median is currently supported: refusing to continue." )
243+ print ("Only median is currently supported: Refusing to continue." )
244+ exit (1 )
245+
246+ try :
247+ with open (compare_file , 'r' ) as compare_f :
248+ compare_result = BenchmarkRun .from_json (json .load (compare_f ))
249+ except :
250+ print (f"Unable to open { compare_file } ." )
251+ exit (1 )
252+
253+ # Sanity checks:
254+ if compare_result .hostname == "Unknown" :
255+ print ("Hostname for results in {compare_file} unknown, unable to build a historic average: Refusing to continue." )
256+ exit (1 )
257+ if not Validate .timestamp (cutoff ):
258+ print ("Invalid timestamp provided, please follow YYYYMMDD_HHMMSS." )
217259 exit (1 )
218260
219- # TODO call validator on cutoff timestamp
261+ # Build historic average and compare results against historic average:
220262 hist_avg = Compare .get_hist_avg (
221263 result_name ,
222264 result_dir ,
265+ compare_result .hostname ,
223266 cutoff ,
224267 exclude = [Path (compare_file ).stem ]
225268 )
226- return Compare .to_hist_avg (hist_avg , compare_file )
269+ return Compare .to_hist_avg (hist_avg , compare_result )
227270
228271
229272if __name__ == "__main__" :
0 commit comments