diff --git a/.github/workflows/sycl-benchmark-aggregate.yml b/.github/workflows/sycl-benchmark-aggregate.yml
new file mode 100644
index 0000000000000..87f7ef718160a
--- /dev/null
+++ b/.github/workflows/sycl-benchmark-aggregate.yml
@@ -0,0 +1,52 @@
+name: Aggregate compute-benchmark averages from historical data
+
+# The benchmarking workflow in sycl-linux-run-tests.yml passes or fails based on
+# how the benchmark results compare to a historical average: This historical
+# average is calculated in this workflow, which aggregates historical data and
+# produces measures of central tendency (median in this case) used for this
+# purpose.
+
+on:
+  workflow_dispatch:
+    inputs:
+      lookback_days:
+        description: |
+          Number of days from today to look back in historical results for:
+          This sets the age limit of data used in average calculation: Any
+          benchmark results created before `lookback_days` from today is
+          excluded from being aggregated in the historical average. 
+        type: number
+        required: true
+  workflow_call:
+    inputs:
+      lookback_days:
+        type: number
+        required: true
+    secrets:
+      LLVM_SYCL_BENCHMARK_TOKEN:
+        description: |
+          Github token used by the faceless account to push newly calculated
+          medians.
+        required: true
+
+
+permissions:
+  contents: read
+
+jobs:
+  aggregate:
+    name: Aggregate average (median) value for all metrics
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        sparse-checkout: |
+          devops/scripts/benchmarking
+          devops/benchmarking
+          devops/actions/benchmarking
+    - name: Aggregate benchmark results and produce historical average
+      uses: ./devops/actions/benchmarking/aggregate
+      with:
+        lookback_days: ${{ inputs.lookback_days }}
+      env:
+        GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml
index 7a4dd382e8bca..710ef9f8cbb9b 100644
--- a/.github/workflows/sycl-linux-run-tests.yml
+++ b/.github/workflows/sycl-linux-run-tests.yml
@@ -25,7 +25,7 @@ on:
         required: False
       tests_selector:
         description: |
-          Two possible options: "e2e" and "cts".
+          Three possible options: "e2e", "cts", and "compute-benchmarks".
         type: string
         default: "e2e"
 
@@ -152,6 +152,7 @@ on:
         options:
           - e2e
           - cts
+          - compute-benchmarks
 
       env:
         description: |
@@ -314,3 +315,12 @@ jobs:
         sycl_cts_artifact: ${{ inputs.sycl_cts_artifact }}
         target_devices: ${{ inputs.target_devices }}
         retention-days: ${{ inputs.retention-days }}
+
+    - name: Run compute-benchmarks on SYCL
+      if: inputs.tests_selector == 'compute-benchmarks'
+      uses: ./devops/actions/run-tests/benchmark
+      with:
+        target_devices: ${{ inputs.target_devices }}
+      env:
+        RUNNER_TAG: ${{ inputs.runner }}
+        GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
diff --git a/.github/workflows/sycl-nightly.yml b/.github/workflows/sycl-nightly.yml
index 572284125449f..243919404bc6a 100644
--- a/.github/workflows/sycl-nightly.yml
+++ b/.github/workflows/sycl-nightly.yml
@@ -243,6 +243,46 @@ jobs:
       sycl_toolchain_decompress_command: ${{ needs.ubuntu2204_build.outputs.artifact_decompress_command }}
       sycl_cts_artifact: sycl_cts_bin
 
+  aggregate_benchmark_results:
+    if: always() && !cancelled()
+    name: Aggregate benchmark results and produce historical averages
+    uses: ./.github/workflows/sycl-benchmark-aggregate.yml
+    secrets:
+      LLVM_SYCL_BENCHMARK_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
+    with:
+      lookback_days: 100
+      
+  run-sycl-benchmarks:
+    needs: [ubuntu2204_build, aggregate_benchmark_results]
+    if: ${{ always() && !cancelled() && needs.ubuntu2204_build.outputs.build_conclusion == 'success' }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - name: Run compute-benchmarks on L0 Gen12
+            runner: '["Linux", "gen12"]'
+            image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
+            target_devices: level_zero:gpu
+            reset_intel_gpu: true
+          - name: Run compute-benchmarks on L0 PVC
+            runner: '["Linux", "pvc"]'
+            image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
+            target_devices: level_zero:gpu
+            reset_intel_gpu: false
+    uses: ./.github/workflows/sycl-linux-run-tests.yml
+    secrets: inherit
+    with:
+      name: ${{ matrix.name }}
+      runner: ${{ matrix.runner }}
+      image_options: ${{ matrix.image_options }}
+      target_devices: ${{ matrix.target_devices }}
+      tests_selector: compute-benchmarks
+      reset_intel_gpu: ${{ matrix.reset_intel_gpu }}
+      ref: ${{ github.sha }}
+      sycl_toolchain_artifact: sycl_linux_default
+      sycl_toolchain_archive: ${{ needs.ubuntu2204_build.outputs.artifact_archive_name }}
+      sycl_toolchain_decompress_command: ${{ needs.ubuntu2204_build.outputs.artifact_decompress_command }}
+
   nightly_build_upload:
     name: Nightly Build Upload
     if: ${{ github.ref_name == 'sycl' }}
diff --git a/devops/actions/benchmarking/aggregate/action.yml b/devops/actions/benchmarking/aggregate/action.yml
new file mode 100644
index 0000000000000..c062636684b1f
--- /dev/null
+++ b/devops/actions/benchmarking/aggregate/action.yml
@@ -0,0 +1,95 @@
+name: 'Aggregate compute-benchmark results and produce historical averages'
+
+# The benchmarking workflow in sycl-linux-run-tests.yml passes or fails based on
+# how the benchmark results compare to a historical average: This historical
+# average is calculated in this composite workflow, which aggregates historical
+# data and produces measures of central tendency (median in this case) used for
+# this purpose.
+#
+# This action assumes that /devops has been checked out in ./devops. This action
+# also assumes that GITHUB_TOKEN was properly set in env, because according to
+# Github, that's apparently the recommended way to pass a secret into a github
+# action:
+#
+# https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions#accessing-your-secrets
+#
+
+inputs:
+  lookback_days:
+    type: number
+    required: true
+
+runs:
+  using: "composite"
+  steps:
+  - name: Obtain oldest timestamp allowed for data in aggregation
+    shell: bash
+    run: |
+      # DO NOT use inputs.lookback_days directly, only use SANITIZED_TIMESTAMP.
+      SANITIZED_LOOKBACK_DAYS="$(echo '${{ inputs.lookback_days }}' | grep -oE '^[0-9]+$')"
+      if [ -z "$SANITIZED_LOOKBACK_DAYS" ]; then
+        echo "Please ensure inputs.lookback_days is a number."
+        exit 1
+      fi
+      SANITIZED_TIMESTAMP="$(date -d "$SANITIZED_LOOKBACK_DAYS days ago" +%Y%m%d_%H%M%S)"
+      if [ -z "$(echo "$SANITIZED_TIMESTAMP" | grep -oE '^[0-9]{8}_[0-9]{6}$' )" ]; then
+        echo "Invalid timestamp generated: is inputs.lookback_days valid?"
+        exit 1
+      fi
+      echo "SANITIZED_TIMESTAMP=$SANITIZED_TIMESTAMP" >> $GITHUB_ENV
+  - name: Load benchmarking configuration
+    shell: bash
+    run: |
+      $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
+      echo "SANITIZED_PERF_RES_GIT_REPO=$SANITIZED_PERF_RES_GIT_REPO" >> $GITHUB_ENV
+      echo "SANITIZED_PERF_RES_GIT_BRANCH=$SANITIZED_PERF_RES_GIT_BRANCH" >> $GITHUB_ENV
+  - name: Checkout historical performance results repository
+    shell: bash
+    run: |
+      if [ ! -d ./llvm-ci-perf-results ]; then
+        git clone -b "$SANITIZED_PERF_RES_GIT_BRANCH" "https://github.com/$SANITIZED_PERF_RES_GIT_REPO" ./llvm-ci-perf-results
+      fi
+  - name: Run aggregator on historical results
+    shell: bash
+    run: |
+      # The current format of the historical results respository is:
+      #
+      # /<ONEAPI_DEVICE_SELECTOR>/<runner>/<test name>
+      #
+      # Thus, a min/max depth of 3 is used to enumerate all test cases in the
+      # repository. Test name is also derived from here.
+      find ./llvm-ci-perf-results -mindepth 3 -maxdepth 3 -type d ! -path '*.git*' |
+      while read -r dir; do
+        test_name="$(basename "$dir")"
+        python ./devops/scripts/benchmarking/aggregate.py ./devops "$test_name" "$dir" "$SANITIZED_TIMESTAMP"
+      done
+  - name: Upload average to the repo
+    shell: bash
+    run: |
+      cd ./llvm-ci-perf-results
+      git config user.name "SYCL Benchmarking Bot"
+      git config user.email "sys_sycl_benchmarks@intel.com"
+      git pull
+      # Make sure changes have been made
+      if git diff --quiet && git diff --cached --quiet; then
+        echo "No changes to median, skipping push."
+      else
+        git add .
+        git commit -m "[GHA] Aggregate median data from $SANITIZED_TIMESTAMP to $(date +%Y%m%d_%H%M%S)"
+        git push "https://$GITHUB_TOKEN@github.com/$SANITIZED_PERF_RES_GIT_REPO.git" "$SANITIZED_PERF_RES_GIT_BRANCH"
+      fi
+  - name: Find aggregated average results artifact here
+    if: always()
+    shell: bash
+    run: |
+      cat << EOF
+      #
+      # Artifact link for aggregated averages here:
+      #
+      EOF
+  - name: Archive new medians
+    if: always()
+    uses: actions/upload-artifact@v4
+    with:
+      name: llvm-ci-perf-results new medians
+      path: ./llvm-ci-perf-results/**/*-median.csv
diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
new file mode 100644
index 0000000000000..7f69fdf832982
--- /dev/null
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -0,0 +1,107 @@
+name: 'Run compute-benchmarks'
+
+# Run compute-benchmarks on SYCL
+# 
+# This action assumes SYCL is in ./toolchain, and that /devops has been
+# checked out in ./devops. This action also assumes that GITHUB_TOKEN
+# was properly set in env, because according to Github, that's apparently the
+# recommended way to pass a secret into a github action:
+#
+# https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions#accessing-your-secrets
+#
+# This action also expects a RUNNER_TAG environment variable to be set to the
+# runner tag used to run this workflow: Currently, only gen12 and pvc on Linux
+# are fully supported. Although this workflow won't stop you from running other
+# devices, note that only gen12 and pvc has been tested to work.
+#
+
+inputs:
+  target_devices:
+    type: string
+    required: True
+
+runs:
+  using: "composite"
+  steps:
+  - name: Check specified runner type / target backend
+    shell: bash
+    env:
+      TARGET_DEVICE: ${{ inputs.target_devices }}
+    run: |
+      case "$RUNNER_TAG" in
+        '["Linux", "gen12"]' | '["Linux", "pvc"]') ;;
+        *)
+          echo "#"
+          echo "# WARNING: Only gen12/pvc on Linux is fully supported."
+          echo "# This workflow is not guaranteed to work with other runners."
+          echo "#" ;;
+      esac
+
+      # input.target_devices is not directly used, as this allows code injection
+      case "$TARGET_DEVICE" in
+        level_zero:*) ;;
+        *)
+          echo "#"
+          echo "# WARNING: Only level_zero backend is fully supported."
+          echo "# This workflow is not guaranteed to work with other backends."
+          echo "#" ;;
+      esac
+  - name: Run compute-benchmarks
+    shell: bash
+    run: |
+      cat << EOF
+      #
+      # NOTE TO DEVELOPERS:
+      #
+
+      Check latter steps of the workflow: This job produces an artifact with:
+        - benchmark results from passing/failing tests
+        - log containing all failing (too slow) benchmarks
+        - log containing all erroring benchmarks
+
+      While this step in the workflow provides debugging output describing this
+      information, it might be easier to inspect the logs from the artifact
+      instead.
+
+      EOF
+      export ONEAPI_DEVICE_SELECTOR="${{ inputs.target_devices }}"
+      export CMPLR_ROOT=./toolchain
+      echo "-----"
+      sycl-ls
+      echo "-----"
+      ./devops/scripts/benchmarking/benchmark.sh -n '${{ runner.name }}' -s || exit 1
+  - name: Push compute-benchmarks results
+    if: always()
+    shell: bash
+    run: |
+      # TODO -- waiting on security clearance
+      # Load configuration values
+      $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
+
+      cd "./llvm-ci-perf-results"
+      git config user.name "SYCL Benchmarking Bot"
+      git config user.email "sys_sycl_benchmarks@intel.com"
+      git pull
+      git add .
+      # Make sure changes have been made
+      if git diff --quiet && git diff --cached --quiet; then
+        echo "No new results added, skipping push."
+      else
+        git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}"
+        git push "https://$GITHUB_TOKEN@github.com/$SANITIZED_PERF_RES_GIT_REPO.git" "$SANITIZED_PERF_RES_GIT_BRANCH"
+      fi
+  - name: Find benchmark result artifact here
+    if: always()
+    shell: bash
+    run: |
+      cat << EOF
+      #
+      # Artifact link for benchmark results here:
+      #
+      EOF
+  - name: Archive compute-benchmark results
+    if: always()
+    uses: actions/upload-artifact@v4
+    with:
+      name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }})
+      path: ./artifact
diff --git a/devops/benchmarking/config.ini b/devops/benchmarking/config.ini
new file mode 100644
index 0000000000000..c0b3ca9c31c9e
--- /dev/null
+++ b/devops/benchmarking/config.ini
@@ -0,0 +1,44 @@
+;
+; This file contains configuration options to change the behaviour of the
+; benchmarking workflow in sycl-linux-run-tests.yml.
+;
+; DO NOT USE THE CONTENTS OF THIS FILE DIRECTLY -- Due to security concerns, The
+; contents of this file must be sanitized first before use.
+; See: /devops/scripts/benchmarking/common.py
+;
+
+; Compute-benchmark compile/run options
+[compute_bench]
+; Value for -j during compilation of compute-benchmarks
+compile_jobs = 2
+; Number of iterations to run compute-benchmark tests
+iterations = 100
+
+; Options for benchmark result metrics (to record/compare against)
+[metrics]
+; Sets the metrics to record/aggregate in the historical average.
+; Format: comma-separated list of column names in compute-benchmark results
+recorded = Median,StdDev
+; Sets the tolerance for each recorded metric and their allowed deviation from
+; the historical average. Metrics not included here are not compared against
+; when passing/failing benchmark results.
+; Format: comma-separated list of <metric>:<deviation percentage in decimals>
+tolerances = Median:0.5
+
+; Options for computing historical averages
+[average]
+; Number of days (from today) to look back for results when computing historical
+; average 
+cutoff_range = 7
+; Minimum number of samples required to compute a historical average
+min_threshold = 3
+
+; ONEAPI_DEVICE_SELECTOR linting/options
+[device_selector]
+; Backends to allow in device_selector
+enabled_backends = level_zero,opencl,cuda,hip
+; native_cpu is disabled
+
+; Devices to allow in device_selector
+enabled_devices = cpu,gpu
+; fpga is disabled
diff --git a/devops/benchmarking/constants.ini b/devops/benchmarking/constants.ini
new file mode 100644
index 0000000000000..9281ece8f4950
--- /dev/null
+++ b/devops/benchmarking/constants.ini
@@ -0,0 +1,48 @@
+;
+; This file defines constants used throughout the benchmarking workflow in
+; sycl-linux-run-tests.yml. If you're trying to change the behavior of this
+; workflow, you're likely looking for /devops/benchmarking/config.ini instead.
+;
+; DO NOT USE THE CONTENTS OF THIS FILE DIRECTLY -- Due to security concerns, The
+; contents of this file must be sanitized first before use.
+; See: /devops/scripts/benchmarking/common.py
+;
+
+; Constants for compute-benchmarks
+[compute_bench]
+git_repo = intel/compute-benchmarks
+git_branch = master
+git_commit = 230a3db4d8d03c0e9a663988f7c3abbd1137a1e0
+; path = ./compute-benchmarks
+
+; Constants for git repo storing benchmark performance results
+[perf_res]
+git_repo = intel/llvm-ci-perf-results
+git_branch = main
+; Path to clone performance result repo
+; path = ./llvm-ci-perf-results
+
+; It was decided that paths should be hardcoded throughout this workflow for
+; security reasons and ease of readability. Do not use paths as constants.
+
+; ; Constants for artifacts
+; [artifact]
+; ; Path to root folder storing benchmark CI artifact
+; path = ./artifact
+; ; Path (relative to artifact.path) to cache compute-benchmark results
+; ;
+; ; If a test result does not get moved out of this catch-all cache path, it is
+; ; considered to have failed
+; output_cache = ./artifact/failed_tests
+; ; Path (relative to artifact.path) to cache passing compute-benchmark results
+; passing_cache = ./artifact/passing_tests
+
+; [timestamp]
+; ; Timestamp format used for 
+; format = %%Y%%m%%d_%%H%%M%%S
+
+; [benchmark_log]
+; ; Log file for test cases that perform over the allowed variance
+; slow = ./artifact/benchmarks_failed.log
+; ; Log file for test cases that errored / failed to build
+; error = ./artifact/benchmarks_errored.log
diff --git a/devops/benchmarking/enabled_tests.conf b/devops/benchmarking/enabled_tests.conf
new file mode 100644
index 0000000000000..20659cbea636d
--- /dev/null
+++ b/devops/benchmarking/enabled_tests.conf
@@ -0,0 +1,8 @@
+# Test cases to be enabled:
+api_overhead_benchmark_sycl
+memory_benchmark_sycl
+miscellaneous_benchmark_sycl
+ulls_benchmark_sycl
+
+# As of January 2025, these are every compute-benchmark tests with a SYCL
+# implementation.
diff --git a/devops/scripts/benchmarking/aggregate.py b/devops/scripts/benchmarking/aggregate.py
new file mode 100644
index 0000000000000..f62a8ffed83c5
--- /dev/null
+++ b/devops/scripts/benchmarking/aggregate.py
@@ -0,0 +1,205 @@
+import csv
+import sys
+from pathlib import Path
+import heapq
+import statistics
+from common import Validate, SanitizedConfig
+from abc import ABC, abstractmethod
+import os
+
+
+class Aggregator(ABC):
+    """
+    Aggregator classes used to "aggregate" a pool of elements, and produce an
+    "average" (precisely, some "measure of central tendency") from the elements.
+    """
+
+    @staticmethod
+    @abstractmethod
+    def get_type() -> str:
+        """
+        Return a string indicating the type of average this aggregator
+        produces.
+        """
+        pass
+
+    @abstractmethod
+    def add(self, n: float):
+        """
+        Add/aggregate an element to the pool of elements used by this aggregator
+        to produce an average calculation.
+        """
+        pass
+
+    @abstractmethod
+    def get_avg(self) -> float:
+        """
+        Produce an average from the pool of elements aggregated using add().
+        """
+        pass
+
+
+class SimpleMedian(Aggregator):
+    """
+    Simple median calculation: if the number of samples being generated are low,
+    this is the fastest median method.
+    """
+
+    def __init__(self):
+        self.elements = []
+
+    @staticmethod
+    def get_type() -> str:
+        return "median"
+
+    def add(self, n: float):
+        self.elements.append(n)
+
+    def get_avg(self) -> float:
+        return statistics.median(self.elements)
+
+
+class StreamingMedian(Aggregator):
+    """
+    Calculate medians incrementally using heaps: Theoretically the fastest way
+    to calculate a median from a stream of elements, but realistically is only
+    faster when dealing with huge numbers of samples that would be generated by
+    i.e. enabling this workflow in precommit and using longer periods of time.
+    """
+
+    def __init__(self):
+        # Gist: we keep a minheap and a maxheap, and store the median as the top
+        # of the minheap. When a new element comes it gets put into the heap
+        # based on if the element is bigger than the current median. Then, the
+        # heaps are heapified and the median is repopulated by heapify.
+        self.minheap_larger = []
+        self.maxheap_smaller = []
+
+    @staticmethod
+    def get_type() -> str:
+        return "median"
+
+    # Note: numbers on maxheap should be negative, as heapq
+    # is minheap by default
+
+    def add(self, n: float):
+        if len(self.maxheap_smaller) == 0 or -self.maxheap_smaller[0] >= n:
+            heapq.heappush(self.maxheap_smaller, -n)
+        else:
+            heapq.heappush(self.minheap_larger, n)
+
+        # Ensure minheap has more elements than maxheap
+        if len(self.maxheap_smaller) > len(self.minheap_larger) + 1:
+            heapq.heappush(self.minheap_larger, -heapq.heappop(self.maxheap_smaller))
+        elif len(self.maxheap_smaller) < len(self.minheap_larger):
+            heapq.heappush(self.maxheap_smaller, -heapq.heappop(self.minheap_larger))
+
+    def get_avg(self) -> float:
+        if len(self.maxheap_smaller) == len(self.minheap_larger):
+            # Equal number of elements smaller and larger than "median":
+            # thus, there are two median values. The median would then become
+            # the average of both median values.
+            return (-self.maxheap_smaller[0] + self.minheap_larger[0]) / 2.0
+        else:
+            # Otherwise, median is always in minheap, as minheap is always
+            # bigger
+            return -self.maxheap_smaller[0]
+
+
+class Aggregate:
+    """
+    Static class providing methods for aggregating data
+    """
+
+    @staticmethod
+    def hist_avg(
+        benchmark_name: str, res_dir: str, cutoff: str, aggregator=SimpleMedian
+    ):
+        if not os.path.isdir(res_dir):
+            print(f"Not a directory: {res_dir}.", file=sys.stderr)
+            exit(1)
+
+        def get_csv_samples() -> list[str]:
+            """Get all valid .csv samples from the results folder."""
+            cache_dir = Path(f"{res_dir}")
+            # Filter all benchmark .csv files in the result directory:
+            return list(
+                filter(
+                    # Make sure the .csv "file" is a file:
+                    lambda f: f.is_file()
+                    # Make sure timestamp of .csv file is good format:
+                    # [-19:-4] corresponds to the timestamp in the filename.
+                    and Validate.timestamp(str(f)[-19:-4])
+                    # Make sure timestamp is bigger than cutoff timestamp:
+                    and str(f)[-19:-4] > cutoff,
+                    cache_dir.glob(f"{benchmark_name}-*_*.csv"),
+                )
+            )
+
+        # Calculate median of every desired metric:
+        samples_aggregate = dict()
+        filtered_samples = get_csv_samples()
+        if len(filtered_samples) == 0:
+            print(
+                f"WARNING: No results for {benchmark_name} found from {cutoff} to now",
+                file=sys.stderr,
+            )
+        for sample_path in filtered_samples:
+            with open(sample_path, "r") as sample_file:
+                for sample in csv.DictReader(sample_file):
+                    test = sample["TestCase"]
+                    # Construct entry in aggregator for test if it doesn't exist
+                    # already:
+                    if test not in samples_aggregate:
+                        samples_aggregate[test] = {
+                            metric: aggregator()
+                            for metric in SanitizedConfig.METRICS_TOLERANCES
+                        }
+
+                    # For each metric of concern, add to aggregator:
+                    for metric in SanitizedConfig.METRICS_TOLERANCES:
+                        sample_value = Validate.sanitize_stat(sample[metric])
+                        if not isinstance(sample_value, float):
+                            print(
+                                f"Malformatted statistic in {str(sample_path)}: "
+                                + f"'{sample[metric]}' for {test}."
+                            )
+                            exit(1)
+                        # Add metric from sample for current test to aggregate:
+                        samples_aggregate[test][metric].add(sample_value)
+
+        # Calculate + write new average (from samples_aggregate) in new .csv file:
+        with open(
+            f"{res_dir}/{benchmark_name}-{aggregator.get_type()}.csv", "w"
+        ) as output_csv:
+            writer = csv.DictWriter(
+                output_csv,
+                fieldnames=["TestCase", *SanitizedConfig.METRICS_TOLERANCES.keys()],
+            )
+            writer.writeheader()
+            for test in samples_aggregate:
+                writer.writerow(
+                    {"TestCase": test}
+                    | {
+                        metric: samples_aggregate[test][metric].get_avg()
+                        for metric in SanitizedConfig.METRICS_TOLERANCES
+                    }
+                )
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 5:
+        print(
+            f"Usage: {sys.argv[0]} <path to /devops> <benchmark name> <absolute path to benchmark results> <cutoff timestamp YYYYMMDD_HHMMSS>"
+        )
+        exit(1)
+    if not Validate.timestamp(sys.argv[4]):
+        print(f"Bad cutoff timestamp, please use YYYYMMDD_HHMMSS.", file=sys.stderr)
+        exit(1)
+    if not Validate.filepath(sys.argv[1]):
+        print(f"Not a valid filepath: {sys.argv[1]}", file=sys.stderr)
+        exit(1)
+    # If the filepath provided passed filepath validation, then it is clean
+    SanitizedConfig.load(sys.argv[1])
+
+    Aggregate.hist_avg(sys.argv[2], sys.argv[3], sys.argv[4])
diff --git a/devops/scripts/benchmarking/benchmark.sh b/devops/scripts/benchmarking/benchmark.sh
new file mode 100755
index 0000000000000..bbfd669774f9a
--- /dev/null
+++ b/devops/scripts/benchmarking/benchmark.sh
@@ -0,0 +1,300 @@
+#!/bin/sh
+
+#
+# benchmark.sh: Benchmark dpcpp using compute-benchmarks
+#
+
+usage () {
+    >&2 echo "Usage: $0 <compute-benchmarks git repo> -t <runner type> [-B <compute-benchmarks build path>]
+  -n  Github runner name -- Required
+  -c  Clean up working directory
+  -C  Clean up working directory and exit
+  -s  Cache results
+
+This script builds and runs benchmarks from compute-benchmarks."
+    exit 1
+}
+
+# Ensures test cases read from enabled_tests.conf contains no malicious content
+_validate_testname () {
+    if [ -n "$(printf "%s" "$1" | sed "s/[a-zA-Z_]*//g")" ]; then
+        echo "Illegal characters in $TEST_CONFIG. Permitted characters: a-zA-Z_"
+        exit 1
+    fi
+}
+
+clone_perf_res() {
+    echo "### Cloning llvm-ci-perf-results ($SANITIZED_PERF_RES_GIT_REPO:$SANITIZED_PERF_RES_GIT_BRANCH) ###"
+    git clone -b "$SANITIZED_PERF_RES_GIT_BRANCH" "https://github.com/$SANITIZED_PERF_RES_GIT_REPO" ./llvm-ci-perf-results
+    [ "$?" -ne 0 ] && exit "$?"
+}
+
+clone_compute_bench() {
+    echo "### Cloning compute-benchmarks ($SANITIZED_COMPUTE_BENCH_GIT_REPO:$SANITIZED_COMPUTE_BENCH_GIT_BRANCH) ###"
+    git clone -b "$SANITIZED_COMPUTE_BENCH_GIT_BRANCH" \
+              --recurse-submodules "https://github.com/$SANITIZED_COMPUTE_BENCH_GIT_REPO" \
+              ./compute-benchmarks
+    if [ ! -d "./compute-benchmarks" ]; then
+        echo "Failed to clone compute-benchmarks."
+        exit 1
+    elif [ -n  "$SANITIZED_COMPUTE_BENCH_GIT_COMMIT" ]; then
+        cd ./compute-benchmarks
+        git checkout "$SANITIZED_COMPUTE_BENCH_GIT_COMMIT"
+        if [ "$?" -ne 0 ]; then
+            echo "Failed to get compute-benchmarks commit '$SANITIZED_COMPUTE_BENCH_GIT_COMMIT'."
+            exit 1
+        fi
+        cd -
+    fi
+}
+
+build_compute_bench() {
+    echo "### Building compute-benchmarks ($SANITIZED_COMPUTE_BENCH_GIT_REPO:$SANITIZED_COMPUTE_BENCH_GIT_BRANCH) ###"
+    mkdir ./compute-benchmarks/build && cd ./compute-benchmarks/build &&
+    # No reason to turn on ccache, if this docker image will be disassembled later on
+    cmake .. -DBUILD_SYCL=ON -DBUILD_L0=OFF -DBUILD=OCL=OFF -DCCACHE_ALLOWED=FALSE
+    # TODO enable mechanism for opting into L0 and OCL -- the concept is to
+    # subtract OCL/L0 times from SYCL times in hopes of deriving SYCL runtime
+    # overhead, but this is mostly an idea that needs to be mulled upon.
+
+    if [ "$?" -eq 0 ]; then
+        while IFS= read -r case; do
+            # Skip lines starting with '#'
+            [ "${case##\#*}" ] || continue
+
+            _validate_testname "$case"
+            make "-j$SANITIZED_COMPUTE_BENCH_COMPILE_JOBS" "$case"
+        done < "$TESTS_CONFIG"
+    fi
+    cd -
+}
+
+# Check if the number of samples for a given test case is less than a threshold
+# set in benchmark-ci.conf
+#
+# Usage: <relative path of directory containing test case results>
+samples_under_threshold () {
+    # Directory doesn't exist, samples automatically under threshold
+    [ ! -d "./llvm-ci-perf-results/$1" ] && return 0
+    file_count="$(find "./llvm-ci-perf-results/$1" -maxdepth 1 -type f | wc -l )"
+    [ "$file_count" -lt "$SANITIZED_AVERAGE_MIN_THRESHOLD" ]
+}
+
+# Check for a regression via compare.py
+#
+# Usage: check_regression <relative path of output csv>
+check_regression() {
+    csv_relpath="$(dirname "$1")"
+    csv_name="$(basename "$1")"
+    if samples_under_threshold "$csv_relpath"; then
+        echo "Not enough samples to construct a good average, performance\
+ check skipped!"
+        return 0 # Success status
+    fi
+    python "$DEVOPS_PATH/scripts/benchmarking/compare.py" \
+        "$DEVOPS_PATH" "$csv_relpath" "$csv_name"
+    return $?
+}
+
+# Move the results of our benchmark into the git repo, and save benchmark
+# results to artifact archive
+#
+# Usage: cache <relative path of output csv>
+cache() {
+    mkdir -p "$(dirname ./artifact/passing_tests/$1)" "$(dirname ./artifact/failed_tests/$1)"
+    cp "./artifact/failed_tests/$1" "./artifact/passing_tests/$1"
+    mkdir -p "$(dirname ./llvm-ci-perf-results/$1)"
+    mv "./artifact/failed_tests/$1" "./llvm-ci-perf-results/$1"
+}
+
+# Check for a regression + cache if no regression found
+#
+# Usage: check_and_cache <relative path of output csv>
+check_and_cache() {
+    echo "Checking $1..."
+    if check_regression $1; then
+        if [ "$CACHE_RESULTS" -eq "1" ]; then
+            echo "Caching $1..."
+            cache $1
+        fi
+    else
+        [ "$CACHE_RESULTS" -eq "1" ] && echo "Regression found -- Not caching!"
+    fi
+}
+
+# Run and process the results of each enabled benchmark in enabled_tests.conf
+process_benchmarks() {
+    echo "### Running and processing selected benchmarks ###"
+    if [ -z "$TESTS_CONFIG" ]; then
+        echo "Setting tests to run via cli is not currently supported."
+        exit 1
+    else
+        rm ./artifact/benchmarks_errored.log ./artifact/benchmarks_failed.log 2> /dev/null
+        mkdir -p ./artifact
+        # Loop through each line of enabled_tests.conf, but ignore lines in the
+        # test config starting with #'s:
+        grep "^[^#]" "$TESTS_CONFIG" | while read -r testcase; do
+            _validate_testname "$testcase"
+            echo "# Running $testcase..."
+
+            # The benchmark results git repo and this script's output both share
+            # the following directory structure:
+            #
+            # /<device selector>/<runner>/<test name>
+            #
+            # Instead of specifying 2 paths with a slightly different root
+            # folder name for every function we use, we can use a relative path
+            # to represent the file in both folders.
+            #
+            # Figure out the relative path of our testcase result:
+            test_dir_relpath="$DEVICE_SELECTOR_DIRNAME/$RUNNER/$testcase"
+            output_csv_relpath="$test_dir_relpath/$testcase-$TIMESTAMP.csv"
+			mkdir -p "./artifact/failed_tests/$test_dir_relpath" # Ensure directory exists
+
+            # Tests are first placed in ./artifact/failed_tests, and are only
+            # moved to passing_tests or the performance results repo if the
+            # benchmark results are passing
+            output_csv="./artifact/failed_tests/$output_csv_relpath"
+            "./compute-benchmarks/build/bin/$testcase" --csv \
+                --iterations="$SANITIZED_COMPUTE_BENCH_ITERATIONS" > "$output_csv"
+
+            exit_status="$?"
+            if [ "$exit_status" -eq 0 ] && [ -s "$output_csv" ]; then 
+                # Filter out header lines not in csv format:
+                tail +8 "$output_csv" > .tmp_res
+                mv .tmp_res "$output_csv"
+                check_and_cache $output_csv_relpath
+            else
+                echo "[ERROR] $testcase returned exit status $exit_status"
+                echo "-- $testcase: error $exit_status" >> ./artifact/benchmarks_errored.log
+            fi
+        done
+    fi
+}
+
+# Handle failures + produce a report on what failed
+process_results() {
+    fail=0
+    if [ -s ./artifact/benchmarks_failed.log ]; then
+        printf "\n### Tests performing over acceptable range of average: ###\n"
+        cat ./artifact/benchmarks_failed.log
+        echo ""
+        fail=2
+    fi
+    if [ -s ./artifact/benchmarks_errored.log ]; then
+        printf "\n### Tests that failed to run: ###\n"
+        cat ./artifact/benchmarks_errored.log
+        echo ""
+        fail=1
+    fi
+    exit $fail
+}
+
+cleanup() {
+    echo "### Cleaning up compute-benchmark builds from prior runs ###"
+    rm -rf ./compute-benchmarks
+    rm -rf ./llvm-ci-perf-results
+    [ ! -z "$_exit_after_cleanup" ] && exit
+}
+
+load_configs() {
+    # This script needs to know where the intel/llvm "/devops" directory is,
+    # containing all the configuration files and the compare script.
+    #
+    # If this is not provided, this function tries to guess where the files
+    # are based on how the script is called, and verifies that all necessary
+    # configs and scripts are reachable. 
+
+    # This benchmarking script is usually at:
+    # 
+    # /devops/scripts/benchmarking/benchmark.sh
+    #
+    # Derive /devops based on location of this script:
+    [ -z "$DEVOPS_PATH" ] && DEVOPS_PATH="$(dirname "$0")/../.."
+    if [ -z "$(printf '%s' "$DEVOPS_PATH" | grep -oE '^[a-zA-Z0-9._\/-]+$')" ]; then
+        echo "Bad DEVOPS_PATH, please specify DEVOPS_PATH variable."
+        exit 1
+    fi
+
+    TESTS_CONFIG="$(realpath "$DEVOPS_PATH/benchmarking/enabled_tests.conf")"
+    COMPARE_PATH="$(realpath "$DEVOPS_PATH/scripts/benchmarking/compare.py")"
+    LOAD_CONFIG_PY="$(realpath "$DEVOPS_PATH/scripts/benchmarking/load_config.py")"
+
+    for file in \
+        "$TESTS_CONFIG" "$COMPARE_PATH" "$LOAD_CONFIG_PY"
+    do
+        if [ ! -f "$file" ]; then
+            echo "Please provide path to /devops in DEVOPS_PATH."
+            exit -1
+        fi
+    done
+
+    $(python "$LOAD_CONFIG_PY" "$DEVOPS_PATH" config)
+    $(python "$LOAD_CONFIG_PY" "$DEVOPS_PATH" constants)
+}
+
+#####
+
+load_configs
+
+COMPUTE_BENCH_COMPILE_FLAGS=""
+CACHE_RESULTS="0"
+# Timestamp format is YYYYMMDD_HHMMSS
+TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
+
+# CLI flags + overrides to configuration options:
+while getopts "n:cCs" opt; do
+    case "$opt" in
+		n) 
+        if [ -n "$(printf "%s" "$OPTARG" | sed "s/[a-zA-Z0-9_-]*//g")" ]; then
+            echo "Illegal characters in runner name."
+            exit 1
+        fi
+        RUNNER="$OPTARG"
+        ;;
+        # Cleanup status is saved in a var to ensure all arguments are processed before
+        # performing cleanup
+        c) _cleanup=1 ;;
+        C) _cleanup=1 && _exit_after_cleanup=1 ;;
+        s) CACHE_RESULTS=1;;
+        \?) usage ;;
+    esac
+done
+
+# Check all necessary variables exist:
+if [ -z "$CMPLR_ROOT" ]; then
+    echo "Please set CMPLR_ROOT first; it is needed by compute-benchmarks to build."
+    exit 1
+elif [ -z "$ONEAPI_DEVICE_SELECTOR" ]; then
+    echo "Please set ONEAPI_DEVICE_SELECTOR first to specify which device to use."
+    exit 1
+elif [ -z "$RUNNER" ]; then
+    echo "Please specify runner name using -n first; it is needed for storing/comparing benchmark results."
+    exit 1
+fi
+
+# Make sure ONEAPI_DEVICE_SELECTOR doesn't try to enable multiple devices at the
+# same time, or use specific device id's
+_dev_sel_backend_re="$(echo "$SANITIZED_DEVICE_SELECTOR_ENABLED_BACKENDS" | sed 's/,/|/g')"
+_dev_sel_device_re="$(echo "$SANITIZED_DEVICE_SELECTOR_ENABLED_DEVICES" | sed 's/,/|/g')"
+_dev_sel_re="s/($_dev_sel_backend_re):($_dev_sel_device_re)//"
+if [ -n "$(echo "$ONEAPI_DEVICE_SELECTOR" | sed -E "$_dev_sel_re")" ]; then
+    echo "Unsupported ONEAPI_DEVICE_SELECTOR value: please ensure only one \
+device is selected, and devices are not selected by indices."
+    echo "Enabled backends: $SANITIZED_DEVICE_SELECTOR_ENABLED_BACKENDS"
+    echo "Enabled device types: $SANITIZED_DEVICE_SELECTOR_ENABLED_DEVICES"
+    exit 1
+fi
+# ONEAPI_DEVICE_SELECTOR values are not valid directory names in unix: this 
+# value lets us use ONEAPI_DEVICE_SELECTOR as actual directory names 
+DEVICE_SELECTOR_DIRNAME="$(echo "$ONEAPI_DEVICE_SELECTOR" | sed 's/:/-/')"
+
+# Clean up and delete all cached files if specified:
+[ ! -z "$_cleanup" ] && cleanup
+# Clone and build only if they aren't already cached/deleted:
+[ ! -d ./llvm-ci-perf-results     ] && clone_perf_res
+[ ! -d ./compute-benchmarks       ] && clone_compute_bench
+[ ! -d ./compute-benchmarks/build ] && build_compute_bench
+# Process benchmarks:
+process_benchmarks
+process_results
\ No newline at end of file
diff --git a/devops/scripts/benchmarking/common.py b/devops/scripts/benchmarking/common.py
new file mode 100644
index 0000000000000..c400b686db90f
--- /dev/null
+++ b/devops/scripts/benchmarking/common.py
@@ -0,0 +1,196 @@
+import re
+import os
+import sys
+import string
+import configparser
+
+
+class Validate:
+    """Static class containing methods for validating various fields"""
+
+    @staticmethod
+    def filepath(path: str) -> bool:
+        """
+        Returns True if path is clean (no illegal characters), otherwise False.
+        """
+        filepath_re = re.compile(r"[a-zA-Z0-9\/\._\-]+")
+        return filepath_re.match(path) is not None
+
+    @staticmethod
+    def timestamp(t: str) -> bool:
+        """
+        Returns True if t is in form YYYYMMDD_HHMMSS, otherwise False.
+        """
+        timestamp_re = re.compile(
+            r"^\d{4}(0[1-9]|1[0-2])([0-2][0-9]|3[01])_([01][0-9]|2[0-3])[0-5][0-9][0-5][0-9]$"
+        )
+        return timestamp_re.match(t) is not None
+
+    @staticmethod
+    def sanitize_stat(stat: str) -> float:
+        """
+        Sanitize statistics found in compute-benchmark output csv files. Returns
+        float if sanitized, None if not sanitizable.
+        """
+        # Get rid of %
+        if stat[-1] == "%":
+            stat = stat[:-1]
+
+        # Cast to float: If cast succeeds, the statistic is clean.
+        try:
+            return float(stat)
+        except ValueError:
+            return None
+
+
+class SanitizedConfig:
+    """
+    Static class for holding sanitized configuration values used within python.
+
+    Configuration option names follow <section name>_<option name> from config
+    file.
+    """
+
+    loaded: bool = False
+    # PERF_RES_PATH: str = None
+    # ARTIFACT_OUTPUT_CACHE: str = None
+    METRICS_TOLERANCES: dict = None
+    METRICS_RECORDED: list = None
+    # BENCHMARK_LOG_SLOW: str = None
+    # BENCHMARK_LOG_ERROR: str = None
+
+    @staticmethod
+    def load(devops_path: str):
+        config = Configuration(devops_path)
+        config.export_python_globals()
+
+
+class Configuration:
+    """
+    Class handling loading, sanitizing, and exporting configuration options for
+    use within python or shell scripts.
+    """
+
+    def __init__(self, devops_path: str):
+        """
+        Initialize this configuration handler by finding configuration files
+
+        @param devops_path Path to /devops folder in intel/llvm
+        """
+        self.config_path = f"{devops_path}/benchmarking/config.ini"
+        self.constants_path = f"{devops_path}/benchmarking/constants.ini"
+
+        if not os.path.isfile(self.config_path):
+            print(
+                f"config.ini not found in {devops_path}/benchmarking.", file=sys.stderr
+            )
+            exit(1)
+        if not os.path.isfile(self.constants_path):
+            print(
+                f"constants.ini not found in {devops_path}/benchmarking.",
+                file=sys.stderr,
+            )
+            exit(1)
+
+    def __sanitize(self, value: str, field: str) -> str:
+        """
+        Enforces an allowlist of characters and sanitizes input from config
+        files.
+        """
+        _alnum = list(string.ascii_letters + string.digits)
+        allowlist = _alnum + ["_", "-", ".", ",", ":", "/", "%"]
+
+        for illegal_ch in filter(lambda ch: ch not in allowlist, value):
+            print(f"Illegal character '{illegal_ch}' in {field}", file=sys.stderr)
+            exit(1)
+
+        return value
+
+    def __get_export_cmd(self, export_opts: list, config_file_path: str) -> str:
+        """
+        Generates export commands for variables in the configuration file at
+        config_file_path, as listed by export_opts.
+
+        export_opts is list of tuples in (<option section>, <option name>) form.
+        """
+        config = configparser.ConfigParser()
+        config.read(config_file_path)
+
+        def export_var_cmd(sec: str, opt: str) -> str:
+            var_name = f"SANITIZED_{sec.upper()}_{opt.upper()}"
+            var_val = f"{self.__sanitize(config[sec][opt], sec + '.' + opt)}"
+            return f"{var_name}={var_val}"
+
+        export_cmds = [export_var_cmd(sec, opt) for sec, opt in export_opts]
+        return "export " + " ".join(export_cmds)
+
+    def export_shell_configs(self) -> str:
+        """
+        Return shell command exporting environment variables representing
+        various configuration options used in shell scripts.
+        """
+        # List of configs used in shell scripts: Export only what's needed
+        shell_configs = [
+            ("compute_bench", "compile_jobs"),
+            ("compute_bench", "iterations"),
+            ("average", "cutoff_range"),
+            ("average", "min_threshold"),
+            ("device_selector", "enabled_backends"),
+            ("device_selector", "enabled_devices"),
+        ]
+        return self.__get_export_cmd(shell_configs, self.config_path)
+
+    def export_shell_constants(self) -> str:
+        """
+        Return shell command exporting environment variables representing
+        various constants used in shell scripts.
+        """
+        # List of configs used in shell scripts: Export only what's needed
+        shell_constants = [
+            ("perf_res", "git_repo"),
+            ("perf_res", "git_branch"),
+            ("compute_bench", "git_repo"),
+            ("compute_bench", "git_branch"),
+            ("compute_bench", "git_commit"),
+        ]
+        return self.__get_export_cmd(shell_constants, self.constants_path)
+
+    def export_python_globals(self):
+        """
+        Populate all configs/constants used in python into SanitizedConfig.
+        """
+        all_opts = configparser.ConfigParser()
+        all_opts.read(self.config_path)
+        all_opts.read(self.constants_path)
+
+        # Fields that are supposed to be python objects need to be changed to
+        # python objects manually:
+
+        # metrics.recorded
+        m_rec_str = self.__sanitize(all_opts["metrics"]["recorded"], "metrics.recorded")
+        SanitizedConfig.METRICS_RECORDED = m_rec_str.split(",")
+
+        # metrics.tolerances
+        m_tol_str = self.__sanitize(
+            all_opts["metrics"]["tolerances"], "metrics.tolerances"
+        )
+        metric_tolerances = dict(
+            [pair_str.split(":") for pair_str in m_tol_str.split(",")]
+        )
+
+        for metric, tolerance_str in metric_tolerances.items():
+            if metric not in SanitizedConfig.METRICS_RECORDED:
+                print(
+                    f"Metric compared against {metric} is not being recorded.",
+                    file=sys.stderr,
+                )
+                exit(1)
+            try:
+                metric_tolerances[metric] = float(tolerance_str)
+            except ValueError:
+                print(f"Could not convert '{tolerance_str}' to float.", file=sys.stderr)
+                exit(1)
+
+        SanitizedConfig.METRICS_TOLERANCES = metric_tolerances
+
+        SanitizedConfig.loaded = True
diff --git a/devops/scripts/benchmarking/compare.py b/devops/scripts/benchmarking/compare.py
new file mode 100644
index 0000000000000..efa9f67cbfc24
--- /dev/null
+++ b/devops/scripts/benchmarking/compare.py
@@ -0,0 +1,101 @@
+import os
+import csv
+import sys
+from common import Validate, SanitizedConfig
+
+
+class Compare:
+
+    @staticmethod
+    def to_hist_avg(benchmark_name: str, hist_avg_path: str, test_csv_path: str):
+        """
+        Compare a benchmark test result to the historical average
+
+        @param test_name  Name of the benchmark of results being compared
+        @param hist_avg_path  Path to historical average .csv file
+        @param test_csv_path  Path to benchmark result .csv file
+        """
+        hist_avg = dict()  # stores historical median of the test suite of interest
+
+        # Load metrics from historical median being compared against
+        with open(hist_avg_path, "r") as avg_csv:
+            for stat in csv.DictReader(avg_csv):
+                hist_avg[stat["TestCase"]] = {
+                    metric: float(stat[metric])
+                    for metric in SanitizedConfig.METRICS_TOLERANCES
+                }
+
+        status = 0
+        failure_counts = {metric: 0 for metric in SanitizedConfig.METRICS_TOLERANCES}
+        with open(test_csv_path, "r") as sample_csv:
+            # For every test case in our current benchmark test suite:
+            for sample in csv.DictReader(sample_csv):
+                test = sample["TestCase"]
+                # Ignore test cases we haven't profiled before
+                if test not in hist_avg:
+                    continue
+                test_hist_avg = hist_avg[test]
+
+                # Check benchmark test results against historical median
+                for metric, threshold in SanitizedConfig.METRICS_TOLERANCES.items():
+                    max_tolerated = test_hist_avg[metric] * (1 + threshold)
+                    sample_value = Validate.sanitize_stat(sample[metric])
+                    if not isinstance(sample_value, float):
+                        print(
+                            f"Malformatted statistic in {test_csv_path}: "
+                            + f"'{sample[metric]}' for {test}."
+                        )
+                        exit(1)
+
+                    if sample_value > max_tolerated:
+                        # Log failure if fail, otherwise proceed as usual
+                        print(f"\n-- FAILED {benchmark_name}::{test}")
+                        print(
+                            f"  {metric}: {sample_value} -- Historic avg. {test_hist_avg[metric]} (max tolerance {threshold*100}%: {max_tolerated})\n"
+                        )
+                        with open("./artifact/benchmarks_failed.log", "a") as slow_log:
+                            slow_log.write(
+                                f"-- {benchmark_name}::{test}\n"
+                                f"   {metric}: {sample_value} -- Historic avg. {test_hist_avg[metric]} (max tol. {threshold*100}%: {max_tolerated})\n"
+                            )
+                        status = 1
+                        failure_counts[metric] += 1
+        if status != 0:
+            print(f"Failure counts: {failure_counts}")
+        return status
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 4:
+        print(
+            f"Usage: {sys.argv[0]} <path to /devops> <relative path to results directory> <result csv filename>"
+        )
+        exit(1)
+
+    if not Validate.filepath(sys.argv[1]):
+        print(f"Not a valid filepath: {sys.argv[1]}", file=sys.stderr)
+        exit(1)
+    # If the filepath provided passed filepath validation, then it is clean
+    SanitizedConfig.load(sys.argv[1])
+
+    # Both benchmark results git repo and benchmark.sh output are structured
+    # like so:
+    # /<device_selector>/<runner>/<test name>
+    # This relative path is sys.argv[1], while the name of the csv file we are
+    # comparing against is sys.argv[2].
+    benchmark_name = os.path.basename(sys.argv[2])
+    test_csv_path = f"./artifact/failed_tests/{sys.argv[2]}/{sys.argv[3]}"
+    median_path = f"./llvm-ci-perf-results/{sys.argv[2]}/{benchmark_name}-median.csv"
+
+    if not os.path.isfile(test_csv_path):
+        print("Invalid test file provided: " + test_csv_path)
+        exit(1)
+    if not os.path.isfile(median_path):
+        print(
+            f"Median file for benchmark '{benchmark_name}' not found at {median_path}.\n"
+            + "Please compute the median using the aggregate workflow."
+        )
+        exit(1)
+
+    # Compare to median in this case
+    exit(Compare.to_hist_avg(benchmark_name, median_path, test_csv_path))
diff --git a/devops/scripts/benchmarking/load_config.py b/devops/scripts/benchmarking/load_config.py
new file mode 100644
index 0000000000000..69c994e5c296c
--- /dev/null
+++ b/devops/scripts/benchmarking/load_config.py
@@ -0,0 +1,30 @@
+from common import Configuration, Validate
+import sys
+
+# TODO better frontend / use argparse
+if __name__ == "__main__":
+
+    def usage_and_exit():
+        print(f"Usage: {sys.argv[0]} <path to /devops> [config | constants]")
+        print(
+            "Generate commands to export configuration options/constants as an environment variable."
+        )
+        exit(1)
+
+    if len(sys.argv) != 3:
+        usage_and_exit()
+
+    if not Validate.filepath(sys.argv[1]):
+        print(f"Not a valid filepath: {sys.argv[1]}", file=sys.stderr)
+        exit(1)
+    # If the filepath provided passed filepath validation, then it is clean
+    sanitized_filepath = sys.argv[1]
+
+    # Load configuration
+    config = Configuration(sanitized_filepath)
+    if sys.argv[2] == "config":
+        print(config.export_shell_configs())
+    elif sys.argv[2] == "constants":
+        print(config.export_shell_constants())
+    else:
+        usage_and_exit()