intel
diff --git a/‎devops/benchmarking/config.ini‎
Lines changed: 43 additions & 0 deletions b/‎devops/benchmarking/config.ini‎
Lines changed: 43 additions & 0 deletions
diff --git a/‎devops/benchmarking/constants.ini‎
Lines changed: 43 additions & 0 deletions b/‎devops/benchmarking/constants.ini‎
Lines changed: 43 additions & 0 deletions
diff --git a/‎devops/scripts/benchmarking/aggregate.py‎
Lines changed: 128 additions & 65 deletions b/‎devops/scripts/benchmarking/aggregate.py‎
Lines changed: 128 additions & 65 deletions
@@ -0,0 +1,43 @@
+;
+; This file contains configuration options to change the behaviour of the
+; benchmarking workflow in sycl-linux-run-tests.yml.
+;
+; DO NOT USE THE CONTENTS OF THIS FILE DIRECTLY -- Due to security concerns, The
+; contents of this file must be sanitized first before use. See: xxx.py
+;
+
+; Compute-benchmark compile/run options
+[compute_bench]
+; Value for -j during compilation of compute-benchmarks
+compile_jobs = 2
+; Number of iterations to run compute-benchmark tests
+iterations = 100
+
+; Options for benchmark result metrics (to record/compare against)
+[metrics]
+; Sets the metrics to record/aggregate in the historical average.
+; Format: comma-separated list of column names in compute-benchmark results
+recorded = Median,StdDev
+; Sets the tolerance for each recorded metric and their allowed deviation from
+; the historical average. Metrics not included here are not compared against
+; when passing/failing benchmark results.
+; Format: comma-separated list of <metric>:<deviation percentage in decimals>
+tolerances = Median:0.5
+
+; Options for computing historical averages
+[average]
+; Number of days (from today) to look back for results when computing historical
+; average 
+cutoff_range = 7
+; Minimum number of samples required to compute a historical average
+min_threshold = 3
+
+; ONEAPI_DEVICE_SELECTOR linting/options
+[device_selector]
+; Backends to allow in device_selector
+enabled_backends = level_zero,opencl,cuda,hip
+; native_cpu is disabled
+
+; Devices to allow in device_selector
+enabled_devices = cpu,gpu
+; fpga is disabled
@@ -0,0 +1,43 @@
+;
+; This file defines constants used throughout the benchmarking workflow in
+; sycl-linux-run-tests.yml. If you're trying to change the behavior of this
+; workflow, you're likely looking for /devops/benchmarking/config.ini instead.
+;
+; DO NOT USE THE CONTENTS OF THIS FILE DIRECTLY -- Due to security concerns, The
+; contents of this file must be sanitized first before use. See: xxx.py
+;
+
+; Constants for compute-benchmarks
+[compute_bench]
+git_repo = ianayl/compute-benchmarks
+git_branch = update-sycl
+path = ./compute-benchmarks
+
+; Constants for artifacts
+[artifact]
+; Path to root folder storing benchmark CI artifact
+path = ./artifact
+; Path (relative to artifact.path) to cache compute-benchmark results
+;
+; If a test result does not get moved out of this catch-all cache path, it is
+; considered to have failed
+output_cache = ./artifact/failed_tests
+; Path (relative to artifact.path) to cache passing compute-benchmark results
+passing_cache = ./artifact/passing_tests
+
+; Constants for git repo storing benchmark performance results
+[perf_res]
+git_repo = ianayl/llvm-ci-perf-results
+git_branch = test-compute-bench
+; Path to clone performance result repo
+path = ./llvm-ci-perf-res
+
+[timestamp]
+; Timestamp format used for 
+format = %%Y%%m%%d_%%H%%M%%S
+
+[benchmark_log]
+; Log file for test cases that perform over the allowed variance
+slow = ./artifact/benchmarks_failed.log
+; Log file for test cases that errored / failed to build
+error = ./artifact/benchmarks_errored.log
@@ -3,30 +3,66 @@
 from pathlib import Path
 import heapq
 import statistics
-
-import common
-
-
-# Simple median calculation
-class SimpleMedian:
-
+from common import Validate, SanitizedConfig
+from abc import ABC, abstractmethod
+
+
+class Aggregator(ABC):
+    """
+    Aggregator classes used to "aggregate" a pool of elements, and produce an
+    "average" (precisely, some "measure of central tendency") from the elements.
+    """
+    @staticmethod
+    @abstractmethod
+    def get_type() -> str:
+        """
+        Return a string indicating the type of average this aggregator 
+        produces.
+        """
+        pass
+
+    @abstractmethod
+    def add(self, n: float):
+        """
+        Add/aggregate an element to the pool of elements used by this aggregator
+        to produce an average calculation.
+        """
+        pass
+
+    @abstractmethod
+    def get_avg(self) -> float:
+        """
+        Produce an average from the pool of elements aggregated using add().
+        """
+        pass
+
+
+class SimpleMedian(Aggregator):
+    """
+    Simple median calculation: if the number of samples being generated are low,
+    this is the fastest median method.
+    """
     def __init__(self):
         self.elements = []
 
+    @staticmethod
+    def get_type() -> str:
+        return "median"
+
     def add(self, n: float):
         self.elements.append(n)
 
     def get_median(self) -> float:
         return statistics.median(self.elements)
 
 
-# Calculate medians incrementally using a heap: Useful for when dealing with
-# large number of samples.
-#
-# TODO how many samples are we going to realistically get? I had written this
-# with precommit in mind, but if this only runs nightly, it would actually be
-# faster to do a normal median calculation.
-class StreamingMedian:
+class StreamingMedian(Aggregator):
+    """
+    Calculate medians incrementally using heaps: Theoretically the fastest way
+    to calculate a median from a stream of elements, but realistically is only
+    faster when dealing with huge numbers of samples that would be generated by
+    i.e. enabling this workflow in precommit and using longer periods of time.
+    """
 
     def __init__(self):
         # Gist: we keep a minheap and a maxheap, and store the median as the top
@@ -36,6 +72,10 @@ def __init__(self):
         self.minheap_larger = []
         self.maxheap_smaller = []
 
+    @staticmethod
+    def get_type() -> str:
+        return "median"
+
     # Note: numbers on maxheap should be negative, as heapq
     # is minheap by default
 
@@ -63,64 +103,87 @@ def get_median(self) -> float:
             return -self.maxheap_smaller[0]
 
 
-def aggregate_median(test_name: str, test_dir: str, cutoff: str):
-
-    # Get all .csv samples for the requested test folder
-    def csv_samples() -> list[str]:
-        # TODO check that the path below is valid directory
-        cache_dir = Path(f"{test_dir}")
-        # TODO check for time range; What time range do I want?
-        return filter(
-            lambda f: f.is_file()
-            and common.valid_timestamp(str(f)[-19:-4])
-            and str(f)[-19:-4] > cutoff,
-            cache_dir.glob(f"{test_name}-*_*.csv"),
-        )
-
-    # Calculate median of every desired metric:
-    aggregate_s = dict()
-    for sample_path in csv_samples():
-        with open(sample_path, "r") as sample_file:
-            for s in csv.DictReader(sample_file):
-                test_case = s["TestCase"]
-                # Construct entry in aggregate_s for test case if it does not
-                # exist already:
-                if test_case not in aggregate_s:
-                    aggregate_s[test_case] = {
-                        metric: SimpleMedian() for metric in common.metrics_variance
-                    }
-
-                for metric in common.metrics_variance:
-                    aggregate_s[test_case][metric].add(common.sanitize(s[metric]))
+class Aggregate:
+    """
+    Static class providing methods for aggregating data 
+    """
+    @staticmethod
+    def hist_avg(benchmark_name: str, res_dir: str, cutoff: str,
+                aggregator = SimpleMedian):
+        if not os.path.isdir(res_dir):
+            print(f"Not a directory: {res_dir}.", file=sys.stderr)
+            exit(1)
+
+        def csv_samples() -> list[str]:
+            """ Get all valid .csv samples from the results folder. """
+            cache_dir = Path(f"{res_dir}")
+            # Filter all benchmark .csv files in the result directory:
+            return filter(
+                # Make sure the .csv "file" is a file:
+                lambda f: f.is_file()
+                # Make sure timestamp of .csv file is good format:
+                # [-19:-4] corresponds to the timestamp in the filename.
+                and Validate.timestamp(str(f)[-19:-4]) 
+                # Make sure timestamp is bigger than cutoff timestamp:
+                and str(f)[-19:-4] > cutoff, 
+                cache_dir.glob(f"{benchmark_name}-*_*.csv"),
+            )
 
-    # Write calculated median (aggregate_s) as a new .csv file:
-    with open(
-        f"{test_dir}/{test_name}-median.csv", "w"
-    ) as output_csv:
-        writer = csv.DictWriter(
-            output_csv, fieldnames=["TestCase", *common.metrics_variance.keys()]
-        )
-        writer.writeheader()
-        for test_case in aggregate_s:
-            writer.writerow(
-                {"TestCase": test_case}
-                | {
-                    metric: aggregate_s[test_case][metric].get_median()
-                    for metric in common.metrics_variance
-                }
+        # Calculate median of every desired metric:
+        samples_aggregate = dict()
+        for sample_path in csv_samples():
+            with open(sample_path, "r") as sample_file:
+                for sample in csv.DictReader(sample_file):
+                    test = sample["TestCase"]
+                    # Construct entry in aggregator for test if it doesn't exist
+                    # already:
+                    if test not in samples_aggregate:
+                        samples_aggregate[test] = {
+                            metric: aggregator()
+                            for metric in SanitizedConfig.METRICS_TOLERANCES
+                        }
+
+                    # For each metric of concern, add to aggregator:
+                    for metric in SanitizedConfig.METRICS_TOLERANCES:
+                        sample_value = Validate.sanitize_stat(sample[metric])
+                        if not isinstance(sample_value, float):
+                            print(f"Malformatted statistic in {str(sample_path)}: " + 
+                                f"'{sample[metric]}' for {test}.")
+                            exit(1)
+                        # Add metric from sample for current test to aggregate:
+                        samples_aggregate[test][metric].add(sample_value)
+
+        # Calculate + write new average (from samples_aggregate) in new .csv file:
+        with open(
+            f"{res_dir}/{benchmark_name}-{aggregator.get_type()}.csv", "w"
+        ) as output_csv:
+            writer = csv.DictWriter(
+                output_csv, fieldnames=["TestCase", *SanitizedConfig.METRICS_TOLERANCES.keys()]
             )
+            writer.writeheader()
+            for test in samples_aggregate:
+                writer.writerow(
+                    {"TestCase": test_case}
+                    | {
+                        metric: samples_aggregate[test][metric].get_median()
+                        for metric in SanitizedConfig.METRICS_TOLERANCES
+                    }
+                )
 
 
 if __name__ == "__main__":
-    if len(sys.argv) < 4:
+    if len(sys.argv) != 5:
         print(
-            f"Usage: {sys.argv[0]} <test name> <absolute path to test directory> <cutoff timestamp YYYYMMDD_HHMMSS>"
+            f"Usage: {sys.argv[0]} <path to /devops> <benchmark name> <absolute path to benchmark results> <cutoff timestamp YYYYMMDD_HHMMSS>"
         )
         exit(1)
-    if not common.valid_timestamp(sys.argv[3]):
-        print(sys.argv)
-        print(f"Bad cutoff timestamp, please use YYYYMMDD_HHMMSS.")
+    if not Validate.timestamp(sys.argv[4]):
+        print(f"Bad cutoff timestamp, please use YYYYMMDD_HHMMSS.", file=sys.stderr)
+        exit(1)
+    if not Validate.filepath(sys.argv[1]):
+        print(f"Not a valid filepath: {sys.argv[1]}", file=sys.stderr)
         exit(1)
-    common.load_configs()
+    # If the filepath provided passed filepath validation, then it is clean
+    SanitizedConfig.load(sys.argv[1])
 
-    aggregate_median(sys.argv[1], sys.argv[2], sys.argv[3])
+    Aggregate.hist_avg(sys.argv[2], sys.argv[3], sys.argv[4])