intel
diff --git a/‎devops/scripts/benchmarks/compare.py‎
Lines changed: 71 additions & 48 deletions b/‎devops/scripts/benchmarks/compare.py‎
Lines changed: 71 additions & 48 deletions
diff --git a/‎devops/scripts/benchmarks/history.py‎
Lines changed: 20 additions & 11 deletions b/‎devops/scripts/benchmarks/history.py‎
Lines changed: 20 additions & 11 deletions
@@ -11,9 +11,11 @@
 from pathlib import Path
 from dataclasses import dataclass, asdict
 
+
 @dataclass
 class BenchmarkHistoricAverage:
     """Contains historic average information for 1 benchmark"""
+
     # Name of benchmark as defined in Benchmark class definition
     name: str
 
@@ -32,27 +34,32 @@ class BenchmarkHistoricAverage:
     #
     # This exists to ensure benchmarks called using different arguments are not
     # compared together.
-    command_args: set[str] 
+    command_args: set[str]
     # TODO Ensure ONEAPI_DEVICE_SELECTOR? GPU name itself?
 
 
 class Compare:
     """Class containing logic for comparisons between results"""
+
     @staticmethod
     def get_hist_avg(
-        result_name: str, result_dir: str, hostname: str, cutoff: str,
-        aggregator: Aggregator = SimpleMedian, exclude: list[str] = []
+        result_name: str,
+        result_dir: str,
+        hostname: str,
+        cutoff: str,
+        aggregator: Aggregator = SimpleMedian,
+        exclude: list[str] = [],
     ) -> dict[str, BenchmarkHistoricAverage]:
         """
         Create a historic average for results named result_name in result_dir
         using the specified aggregator
 
         Args:
-            result_name (str): Name of benchmarking result to obtain average for 
+            result_name (str): Name of benchmarking result to obtain average for
             result_dir (str): Path to folder containing benchmark results
             cutoff (str): Timestamp in YYYYMMDD_HHMMSS of oldest results used in
             average calcultaion
-            hostname (str): Hostname of machine on which results ran on 
+            hostname (str): Hostname of machine on which results ran on
             aggregator (Aggregator): The aggregator to use for calculating the
             historic average
             exclude (list[str]): List of filenames (only the stem) to exclude
@@ -90,10 +97,10 @@ def get_result_paths() -> list[str]:
                     # Result file is not excluded
                     and f.stem not in exclude,
                     # Assumes format is <name>_YYYYMMDD_HHMMSS.json
-                    cache_dir.glob(f"{result_name}_*_*.json")
+                    cache_dir.glob(f"{result_name}_*_*.json"),
                 )
             )
-        
+
         def validate_benchmark_result(result: BenchmarkRun) -> bool:
             """
             Returns True if result file:
@@ -105,21 +112,25 @@ def validate_benchmark_result(result: BenchmarkRun) -> bool:
             if result.hostname != hostname:
                 return False
             if result.name != result_name:
-                print(f"Warning: Result file {result_path} does not match specified result name {result.name}.")
+                print(
+                    f"Warning: Result file {result_path} does not match specified result name {result.name}."
+                )
                 return False
-            if result.date < datetime.strptime(cutoff, "%Y%m%d_%H%M%S").replace(tzinfo=timezone.utc):
+            if result.date < datetime.strptime(cutoff, "%Y%m%d_%H%M%S").replace(
+                tzinfo=timezone.utc
+            ):
                 return False
             return True
 
         # key: name of the benchmark test result
         # value: { command_args: set[str], aggregate: Aggregator }
-        # 
+        #
         # This is then used to build a dict[BenchmarkHistoricAverage] used
         # to find historic averages.
         average_aggregate: dict[str, dict] = dict()
-        
+
         for result_path in get_result_paths():
-            with result_path.open('r') as result_f:
+            with result_path.open("r") as result_f:
                 result = BenchmarkRun.from_json(json.load(result_f))
 
             # Perform another check on result file here, as get_result_paths()
@@ -131,39 +142,48 @@ def validate_benchmark_result(result: BenchmarkRun) -> bool:
                 continue
 
             for test_run in result.results:
+
                 def reset_aggregate() -> dict:
-                    return { 
+                    return {
                         "command_args": set(test_run.command[1:]),
-                        "aggregate": aggregator(starting_elements=[test_run.value])
+                        "aggregate": aggregator(starting_elements=[test_run.value]),
                     }
 
                 # Add every benchmark run to average_aggregate:
                 if test_run.name not in average_aggregate:
                     average_aggregate[test_run.name] = reset_aggregate()
                 else:
                     # Check that we are comparing runs with the same cmd args:
-                    if set(test_run.command[1:]) == average_aggregate[test_run.name]["command_args"]:
-                        average_aggregate[test_run.name]["aggregate"].add(test_run.value)
+                    if (
+                        set(test_run.command[1:])
+                        == average_aggregate[test_run.name]["command_args"]
+                    ):
+                        average_aggregate[test_run.name]["aggregate"].add(
+                            test_run.value
+                        )
                     else:
                         # If the command args used between runs are different,
                         # discard old run data and prefer new command args
                         #
                         # This relies on the fact that paths from get_result_paths()
                         # is sorted from older to newer
-                        print(f"Warning: Command args for {test_run.name} from {result_path} is different from prior runs.")
-                        print("DISCARDING older data and OVERRIDING with data using new arg.")
+                        print(
+                            f"Warning: Command args for {test_run.name} from {result_path} is different from prior runs."
+                        )
+                        print(
+                            "DISCARDING older data and OVERRIDING with data using new arg."
+                        )
                         average_aggregate[test_run.name] = reset_aggregate()
-            
+
         return {
             name: BenchmarkHistoricAverage(
                 name=name,
                 average_type=stats["aggregate"].get_type(),
                 value=stats["aggregate"].get_avg(),
-                command_args=stats["command_args"]
+                command_args=stats["command_args"],
             )
             for name, stats in average_aggregate.items()
         }
-    
 
     def to_hist_avg(
         hist_avg: dict[str, BenchmarkHistoricAverage], target: BenchmarkRun
@@ -181,12 +201,14 @@ def to_hist_avg(
         Returns:
             A tuple returning (list of improved tests, list of regressed tests).
         """
+
         def halfway_round(value: int, n: int):
             """
             Python's default round() does banker's rounding, which doesn't
             make much sense here. This rounds 0.5 to 1, and -0.5 to -1
             """
-            if value == 0: return 0
+            if value == 0:
+                return 0
             return int(value * 10**n + 0.5 * (value / abs(value))) / 10**n
 
         improvement = []
@@ -198,11 +220,11 @@ def halfway_round(value: int, n: int):
             if hist_avg[test.name].command_args != set(test.command[1:]):
                 print(f"Warning: skipped {test.name} due to command args mismatch.")
                 continue
-            
+
             delta = 1 - (
                 test.value / hist_avg[test.name].value
-                if test.lower_is_better else 
-                hist_avg[test.name].value / test.value
+                if test.lower_is_better
+                else hist_avg[test.name].value / test.value
             )
 
             def perf_diff_entry() -> dict:
@@ -221,9 +243,11 @@ def perf_diff_entry() -> dict:
 
         return improvement, regression
 
-
     def to_hist(
-        avg_type: str, result_name: str, compare_file: str, result_dir: str,
+        avg_type: str,
+        result_name: str,
+        compare_file: str,
+        result_dir: str,
         cutoff: str,
     ) -> tuple:
         """
@@ -236,7 +260,7 @@ def to_hist(
             result_dir (str): Directory to look for results in
             cutoff (str): Timestamp (in YYYYMMDD_HHMMSS) indicating the oldest
             result included in the historic average calculation
-            avg_type (str): Type of "average" (measure of central tendency) to 
+            avg_type (str): Type of "average" (measure of central tendency) to
             use in historic "average" calculation
 
         Returns:
@@ -245,22 +269,24 @@ def to_hist(
             avg_type, and delta field added, indicating the historic average,
             type of central tendency used for historic average, and the delta
             from the average for this benchmark run.
-        """ 
+        """
 
         if avg_type != "median":
             print("Only median is currently supported: Refusing to continue.")
             exit(1)
 
         try:
-            with open(compare_file, 'r') as compare_f:
+            with open(compare_file, "r") as compare_f:
                 compare_result = BenchmarkRun.from_json(json.load(compare_f))
         except:
             print(f"Unable to open {compare_file}.")
             exit(1)
 
         # Sanity checks:
         if compare_result.hostname == "Unknown":
-            print("Hostname for results in {compare_file} unknown, unable to build a historic average: Refusing to continue.")
+            print(
+                "Hostname for results in {compare_file} unknown, unable to build a historic average: Refusing to continue."
+            )
             exit(1)
         if not Validate.timestamp(cutoff):
             print("Invalid timestamp provided, please follow YYYYMMDD_HHMMSS.")
@@ -272,44 +298,43 @@ def to_hist(
             result_dir,
             compare_result.hostname,
             cutoff,
-            exclude=[Path(compare_file).stem]
+            exclude=[Path(compare_file).stem],
         )
         return Compare.to_hist_avg(hist_avg, compare_result)
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Compare benchmark results")
     subparsers = parser.add_subparsers(dest="operation", required=True)
-    parser_avg = subparsers.add_parser("to_hist", help="Compare a benchmark result to historic average")
+    parser_avg = subparsers.add_parser(
+        "to_hist", help="Compare a benchmark result to historic average"
+    )
     parser_avg.add_argument(
         "--avg-type",
         type=str,
         help="Measure of central tendency to use when computing historic average",
-        default="median"
+        default="median",
     )
     parser_avg.add_argument(
         "--name",
         type=str,
         required=True,
-        help="Save name of the benchmark results to compare to"
+        help="Save name of the benchmark results to compare to",
     )
     parser_avg.add_argument(
         "--compare-file",
         type=str,
         required=True,
-        help="Result file to compare against te historic average"
+        help="Result file to compare against te historic average",
     )
     parser_avg.add_argument(
-        "--results-dir",
-        type=str,
-        required=True,
-        help="Directory storing results"
+        "--results-dir", type=str, required=True, help="Directory storing results"
     )
     parser_avg.add_argument(
         "--cutoff",
         type=str,
         help="Timestamp (in YYYYMMDD_HHMMSS) of oldest result to include in historic average calculation",
-        default="20000101_010101"
+        default="20000101_010101",
     )
 
     args = parser.parse_args()
@@ -322,11 +347,7 @@ def to_hist(
             raise ValueError("Timestamp must be provided as YYYYMMDD_HHMMSS.")
 
         improvements, regressions = Compare.to_hist(
-            "median",
-            args.name,
-            args.compare_file,
-            args.results_dir,
-            args.cutoff
+            "median", args.name, args.compare_file, args.results_dir, args.cutoff
         )
 
         def print_regression(entry: dict):
@@ -339,10 +360,12 @@ def print_regression(entry: dict):
 
         if improvements:
             print("#\n# Improvements:\n#\n")
-            for test in improvements: print_regression(test)
+            for test in improvements:
+                print_regression(test)
         if regressions:
             print("#\n# Regressions:\n#\n")
-            for test in regressions: print_regression(test)
+            for test in regressions:
+                print_regression(test)
             exit(1)  # Exit 1 to trigger github test failure
     else:
         print("Unsupported operation: exiting.")
 
@@ -31,7 +31,9 @@ def load_result(self, file_path: Path) -> BenchmarkRun:
     def load(self, n: int):
         results_dir = Path(self.dir) / "results"
         if not results_dir.exists() or not results_dir.is_dir():
-            print(f"Warning: {results_dir} is not a valid directory: no historic results loaded.")
+            print(
+                f"Warning: {results_dir} is not a valid directory: no historic results loaded."
+            )
             return
 
         # Get all JSON files in the results directory
@@ -40,8 +42,8 @@ def load(self, n: int):
         # Extract timestamp and sort files by it
         def extract_timestamp(file_path: Path) -> str:
             try:
-                # Assumes results are stored as <name>_YYYYMMDD_HHMMSS.json 
-                ts = file_path.stem[-len("YYYYMMDD_HHMMSS"):]
+                # Assumes results are stored as <name>_YYYYMMDD_HHMMSS.json
+                ts = file_path.stem[-len("YYYYMMDD_HHMMSS") :]
                 return ts if Validate.timestamp(ts) else ""
             except IndexError:
                 return ""
@@ -80,21 +82,28 @@ def git_info_from_path(path: Path) -> (str, str):
                     github_repo = remote_url.split("[email protected]:")[1].rstrip(".git")
                 elif remote_url.startswith("https://github.com/"):
                     # HTTPS format: https://github.com/owner/repo.git
-                    github_repo = remote_url.split("https://github.com/")[1].rstrip(".git")
+                    github_repo = remote_url.split("https://github.com/")[1].rstrip(
+                        ".git"
+                    )
                 else:
                     github_repo = None
 
             except:
                 git_hash = "unknown"
                 github_repo = None
-            
+
             return git_hash, github_repo
 
         if options.git_commit_override is None or options.github_repo_override is None:
-            git_hash, github_repo = git_info_from_path(os.path.dirname(os.path.abspath(__file__)))
+            git_hash, github_repo = git_info_from_path(
+                os.path.dirname(os.path.abspath(__file__))
+            )
         else:
-            git_hash, github_repo = options.git_commit_override, options.github_repo_override
-        
+            git_hash, github_repo = (
+                options.git_commit_override,
+                options.github_repo_override,
+            )
+
         # Check if RUNNER_NAME environment variable has been declared.
         #
         # Github runners obfusicate hostnames, thus running socket.gethostname()
@@ -108,7 +117,7 @@ def git_info_from_path(path: Path) -> (str, str):
             # TODO is this overkill?
             Validate.runner_name(
                 hostname,
-                throw=ValueError("Illegal characters found in specified RUNNER_NAME.")
+                throw=ValueError("Illegal characters found in specified RUNNER_NAME."),
             )
 
         compute_runtime = (
@@ -139,8 +148,8 @@ def save(self, save_name, results: list[Result], to_file=True):
         # Use formatted timestamp for the filename
         timestamp = (
             datetime.now(tz=timezone.utc).strftime("%Y%m%d_%H%M%S")
-            if options.timestamp_override is None else 
-            options.timestamp_override
+            if options.timestamp_override is None
+            else options.timestamp_override
         )
         file_path = Path(os.path.join(results_dir, f"{save_name}_{timestamp}.json"))
         with file_path.open("w") as file: