diff --git a/.github/workflows/sycl-docs.yml b/.github/workflows/sycl-docs.yml
index 5c1e8e425111b..7bb6a568892a8 100644
--- a/.github/workflows/sycl-docs.yml
+++ b/.github/workflows/sycl-docs.yml
@@ -49,7 +49,13 @@ jobs:
         mkdir clang
         mv $GITHUB_WORKSPACE/build/tools/sycl/doc/html/* .
         mv $GITHUB_WORKSPACE/build/tools/clang/docs/html/* clang/
+        cp -r $GITHUB_WORKSPACE/repo/devops/scripts/benchmarks/html benchmarks
         touch .nojekyll
+        # Update benchmarking dashboard configuration
+        cat << EOF > benchmarks/config.js
+        remoteDataUrl = 'https://raw.githubusercontent.com/intel/llvm-ci-perf-results/refs/heads/unify-ci/data.json';
+        defaultCompareNames = ["Baseline_PVC_L0"];
+        EOF
     # Upload the generated docs as an artifact and deploy to GitHub Pages.
     - name: Upload artifact
       uses: actions/upload-pages-artifact@v3
diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml
index 42066d4d1fee2..ac3e6341cc797 100644
--- a/.github/workflows/sycl-linux-run-tests.yml
+++ b/.github/workflows/sycl-linux-run-tests.yml
@@ -25,7 +25,7 @@ on:
         required: False
       tests_selector:
         description: |
-          Three possible options: "e2e", "cts", and "compute-benchmarks".
+          Three possible options: "e2e", "cts", and "benchmarks".
         type: string
         default: "e2e"
 
@@ -111,6 +111,33 @@ on:
         default: ''
         required: False
 
+      benchmark_upload_results:
+        description: |
+          Set to true to upload results to git repository storing benchmarking
+          results.
+        type: string
+        default: 'false'
+        required: False
+      benchmark_save_name:
+        description: |
+          Save name to use for benchmark results: Save names are stored in
+          metadata of result file, and are used to identify benchmark results in
+          the same series (e.g. same configuration, same device, etc.).
+
+          Note: Currently, benchmark result filenames are in the format of
+          <benchmark_save_name>_<Device>_<Backend>_YYYYMMDD_HHMMSS.json
+        type: string
+        default: ''
+        required: False
+      benchmark_preset:
+        description: |
+          Name of benchmark preset to run.
+
+          See /devops/scripts/benchmarks/presets.py for all presets available.
+        type: string
+        default: 'Minimal'
+        required: False
+
   workflow_dispatch:
     inputs:
       runner:
@@ -150,7 +177,7 @@ on:
         options:
           - e2e
           - cts
-          - compute-benchmarks
+          - benchmarks
 
       env:
         description: |
@@ -303,11 +330,14 @@ jobs:
         target_devices: ${{ inputs.target_devices }}
         retention-days: ${{ inputs.retention-days }}
 
-    - name: Run compute-benchmarks on SYCL
-      if: inputs.tests_selector == 'compute-benchmarks'
+    - name: Run benchmarks
+      if: inputs.tests_selector == 'benchmarks'
       uses: ./devops/actions/run-tests/benchmark
       with:
         target_devices: ${{ inputs.target_devices }}
+        upload_results: ${{ inputs.benchmark_upload_results }}
+        save_name: ${{ inputs.benchmark_save_name }}
+        preset: ${{ inputs.benchmark_preset }}
       env:
         RUNNER_TAG: ${{ inputs.runner }}
         GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
diff --git a/.github/workflows/sycl-nightly.yml b/.github/workflows/sycl-nightly.yml
index 1ccf4a2498e6b..c6cffa66f5fea 100644
--- a/.github/workflows/sycl-nightly.yml
+++ b/.github/workflows/sycl-nightly.yml
@@ -274,35 +274,30 @@ jobs:
       sycl_toolchain_archive: ${{ needs.build-win.outputs.artifact_archive_name }}
       sycl_cts_artifact: sycl_cts_bin_win
 
-  aggregate_benchmark_results:
-    if: github.repository == 'intel/llvm' && !cancelled()
-    name: Aggregate benchmark results and produce historical averages
-    uses: ./.github/workflows/sycl-benchmark-aggregate.yml
-    secrets:
-      LLVM_SYCL_BENCHMARK_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
-    with:
-      lookback_days: 100
-      
   run-sycl-benchmarks:
-    needs: [ubuntu2204_build, aggregate_benchmark_results]
+    needs: [ubuntu2204_build]
     if: ${{ always() && !cancelled() && needs.ubuntu2204_build.outputs.build_conclusion == 'success' }}
     strategy:
-      fail-fast: false
       matrix:
         include:
-          - name: Run compute-benchmarks on L0 PVC
+          - ref: ${{ github.sha }}
+            save_name: Baseline
             runner: '["PVC_PERF"]'
-            image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
-            target_devices: level_zero:gpu
+            backend: 'level_zero:gpu'
+            preset: Minimal
     uses: ./.github/workflows/sycl-linux-run-tests.yml
     secrets: inherit
     with:
-      name: ${{ matrix.name }}
+      name: Run compute-benchmarks (${{ matrix.runner }}, ${{ matrix.backend }})
       runner: ${{ matrix.runner }}
-      image_options: ${{ matrix.image_options }}
-      target_devices: ${{ matrix.target_devices }}
-      tests_selector: compute-benchmarks
-      repo_ref: ${{ github.sha }}
+      image: ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest
+      image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
+      target_devices: ${{ matrix.backend }}
+      tests_selector: benchmarks
+      benchmark_upload_results: true
+      benchmark_save_name: ${{ matrix.save_name }}
+      benchmark_preset: ${{ matrix.preset }}
+      repo_ref: ${{ matrix.ref }}
       sycl_toolchain_artifact: sycl_linux_default
       sycl_toolchain_archive: ${{ needs.ubuntu2204_build.outputs.artifact_archive_name }}
       sycl_toolchain_decompress_command: ${{ needs.ubuntu2204_build.outputs.artifact_decompress_command }}
diff --git a/.github/workflows/sycl-ur-perf-benchmarking.yml b/.github/workflows/sycl-ur-perf-benchmarking.yml
index 23fbb1ad903b4..c8900baa78368 100644
--- a/.github/workflows/sycl-ur-perf-benchmarking.yml
+++ b/.github/workflows/sycl-ur-perf-benchmarking.yml
@@ -1,12 +1,138 @@
-name: Benchmarks
+name: Run Benchmarks
 
-# This workflow is a WIP: this workflow file acts as a placeholder.
+on:
+  workflow_call:
+    inputs:
+      preset:
+        type: string
+        description: |
+          Benchmark presets to run: See /devops/scripts/benchmarks/presets.py
+        required: false
+        default: 'Minimal'  # Only compute-benchmarks
+      pr_no:
+        type: string
+        description: |
+          PR no. to build SYCL from if specified: SYCL will be built from HEAD
+          of incoming branch used by the specified PR no.
 
-on: [ workflow_dispatch ]
+          If both pr_no and commit_hash are empty, the latest SYCL nightly build
+          will be used.
+        required: false
+        default: ''
+      commit_hash:
+        type: string
+        description: |
+          Commit hash (within intel/llvm) to build SYCL from if specified.
+
+          If both pr_no and commit_hash are empty, the latest commit in
+          deployment branch will be used.
+        required: false
+        default: ''
+      upload_results:
+        type: string # true/false: workflow_dispatch does not support booleans
+        required: true
+      runner:
+        type: string
+        required: true
+      backend:
+        type: string
+        required: true
+
+  workflow_dispatch:
+    inputs:
+      preset:
+        type: choice
+        description: |
+          Benchmark presets to run, See /devops/scripts/benchmarks/presets.py. Hint: Minimal is compute-benchmarks only.
+        options:
+          - Full
+          - SYCL
+          - Minimal
+          - Normal
+          - Test
+        default: 'Minimal'  # Only compute-benchmarks
+      pr_no:
+        type: string
+        description: |
+          PR no. to build SYCL from:
+            
+          SYCL will be built from HEAD of incoming branch.
+        required: false
+        default: ''
+      commit_hash:
+        type: string
+        description: |
+          Commit hash (within intel/llvm) to build SYCL from:
+
+          Leave both pr_no and commit_hash empty to use latest commit.
+        required: false
+        default: ''
+      upload_results:
+        description: 'Save and upload results'
+        type: choice
+        options:
+          - false
+          - true
+        default: true
+      runner:
+        type: choice
+        options:
+          - '["PVC_PERF"]'
+      backend:
+        description: Backend to use
+        type: choice
+        options:
+          - 'level_zero:gpu'
+          - 'level_zero_v2:gpu'
+            # As of #17407, sycl-linux-build now builds v2 by default
+
+permissions: read-all
 
 jobs:
-  do-nothing:
-    runs-on: ubuntu-latest
-    steps:
-      - run: echo 'This workflow is a WIP.'
+  build_sycl:
+    name: Build SYCL
+    uses: ./.github/workflows/sycl-linux-build.yml
+    with:
+      build_ref: |
+        ${{
+          inputs.commit_hash != '' && inputs.commit_hash ||
+          inputs.pr_no != '' && format('refs/pull/{0}/head', inputs.pr_no) ||
+          github.ref
+        }}
+      build_cache_root: "/__w/"
+      build_artifact_suffix: "prod_noassert"
+      build_cache_suffix: "prod_noassert"
+      build_configure_extra_args: "--no-assertions"
+      build_image: "ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest"
+      cc: clang
+      cxx: clang++
+      changes: '[]'
 
+  run_benchmarks_build:
+    name: Run Benchmarks on Build
+    needs: [ build_sycl ]
+    strategy:
+      matrix:
+        include:
+          - ref: ${{ inputs.commit_hash != '' && inputs.commit_hash || format('refs/pull/{0}/head', inputs.pr_no) }}
+            save_name: ${{ inputs.commit_hash != '' && format('Commit{0}', inputs.commit_hash) || format('PR{0}', inputs.pr_no) }}
+            # Set default values if not specified:
+            runner: ${{ inputs.runner || '["PVC_PERF"]' }}
+            backend: ${{ inputs.backend || 'level_zero:gpu' }}
+    uses: ./.github/workflows/sycl-linux-run-tests.yml
+    secrets: inherit
+    with:
+      name: Run compute-benchmarks (${{ matrix.save_name }}, ${{ matrix.runner }}, ${{ matrix.backend }})
+      runner: ${{ matrix.runner }}
+      image: ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest
+      image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
+      target_devices: ${{ matrix.backend }}
+      tests_selector: benchmarks
+      benchmark_upload_results: ${{ inputs.upload_results }}
+      benchmark_save_name: ${{ matrix.save_name }}
+      benchmark_preset: ${{ inputs.preset }}
+      repo_ref: ${{ matrix.ref }}
+      devops_ref: ${{ github.ref }}
+      sycl_toolchain_artifact: sycl_linux_prod_noassert
+      sycl_toolchain_archive: ${{ needs.build_sycl.outputs.artifact_archive_name }}
+      sycl_toolchain_decompress_command: ${{ needs.build_sycl.outputs.artifact_decompress_command }}
diff --git a/devops/actions/benchmarking/aggregate/action.yml b/devops/actions/benchmarking/aggregate/action.yml
deleted file mode 100644
index c062636684b1f..0000000000000
--- a/devops/actions/benchmarking/aggregate/action.yml
+++ /dev/null
@@ -1,95 +0,0 @@
-name: 'Aggregate compute-benchmark results and produce historical averages'
-
-# The benchmarking workflow in sycl-linux-run-tests.yml passes or fails based on
-# how the benchmark results compare to a historical average: This historical
-# average is calculated in this composite workflow, which aggregates historical
-# data and produces measures of central tendency (median in this case) used for
-# this purpose.
-#
-# This action assumes that /devops has been checked out in ./devops. This action
-# also assumes that GITHUB_TOKEN was properly set in env, because according to
-# Github, that's apparently the recommended way to pass a secret into a github
-# action:
-#
-# https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions#accessing-your-secrets
-#
-
-inputs:
-  lookback_days:
-    type: number
-    required: true
-
-runs:
-  using: "composite"
-  steps:
-  - name: Obtain oldest timestamp allowed for data in aggregation
-    shell: bash
-    run: |
-      # DO NOT use inputs.lookback_days directly, only use SANITIZED_TIMESTAMP.
-      SANITIZED_LOOKBACK_DAYS="$(echo '${{ inputs.lookback_days }}' | grep -oE '^[0-9]+$')"
-      if [ -z "$SANITIZED_LOOKBACK_DAYS" ]; then
-        echo "Please ensure inputs.lookback_days is a number."
-        exit 1
-      fi
-      SANITIZED_TIMESTAMP="$(date -d "$SANITIZED_LOOKBACK_DAYS days ago" +%Y%m%d_%H%M%S)"
-      if [ -z "$(echo "$SANITIZED_TIMESTAMP" | grep -oE '^[0-9]{8}_[0-9]{6}$' )" ]; then
-        echo "Invalid timestamp generated: is inputs.lookback_days valid?"
-        exit 1
-      fi
-      echo "SANITIZED_TIMESTAMP=$SANITIZED_TIMESTAMP" >> $GITHUB_ENV
-  - name: Load benchmarking configuration
-    shell: bash
-    run: |
-      $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
-      echo "SANITIZED_PERF_RES_GIT_REPO=$SANITIZED_PERF_RES_GIT_REPO" >> $GITHUB_ENV
-      echo "SANITIZED_PERF_RES_GIT_BRANCH=$SANITIZED_PERF_RES_GIT_BRANCH" >> $GITHUB_ENV
-  - name: Checkout historical performance results repository
-    shell: bash
-    run: |
-      if [ ! -d ./llvm-ci-perf-results ]; then
-        git clone -b "$SANITIZED_PERF_RES_GIT_BRANCH" "https://github.com/$SANITIZED_PERF_RES_GIT_REPO" ./llvm-ci-perf-results
-      fi
-  - name: Run aggregator on historical results
-    shell: bash
-    run: |
-      # The current format of the historical results respository is:
-      #
-      # /<ONEAPI_DEVICE_SELECTOR>/<runner>/<test name>
-      #
-      # Thus, a min/max depth of 3 is used to enumerate all test cases in the
-      # repository. Test name is also derived from here.
-      find ./llvm-ci-perf-results -mindepth 3 -maxdepth 3 -type d ! -path '*.git*' |
-      while read -r dir; do
-        test_name="$(basename "$dir")"
-        python ./devops/scripts/benchmarking/aggregate.py ./devops "$test_name" "$dir" "$SANITIZED_TIMESTAMP"
-      done
-  - name: Upload average to the repo
-    shell: bash
-    run: |
-      cd ./llvm-ci-perf-results
-      git config user.name "SYCL Benchmarking Bot"
-      git config user.email "sys_sycl_benchmarks@intel.com"
-      git pull
-      # Make sure changes have been made
-      if git diff --quiet && git diff --cached --quiet; then
-        echo "No changes to median, skipping push."
-      else
-        git add .
-        git commit -m "[GHA] Aggregate median data from $SANITIZED_TIMESTAMP to $(date +%Y%m%d_%H%M%S)"
-        git push "https://$GITHUB_TOKEN@github.com/$SANITIZED_PERF_RES_GIT_REPO.git" "$SANITIZED_PERF_RES_GIT_BRANCH"
-      fi
-  - name: Find aggregated average results artifact here
-    if: always()
-    shell: bash
-    run: |
-      cat << EOF
-      #
-      # Artifact link for aggregated averages here:
-      #
-      EOF
-  - name: Archive new medians
-    if: always()
-    uses: actions/upload-artifact@v4
-    with:
-      name: llvm-ci-perf-results new medians
-      path: ./llvm-ci-perf-results/**/*-median.csv
diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index e357e2bddec30..182e08422b9dd 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -1,24 +1,30 @@
-name: 'Run compute-benchmarks'
-
-# Run compute-benchmarks on SYCL
-# 
-# This action assumes SYCL is in ./toolchain, and that /devops has been
-# checked out in ./devops. This action also assumes that GITHUB_TOKEN
-# was properly set in env, because according to Github, that's apparently the
-# recommended way to pass a secret into a github action:
-#
-# https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions#accessing-your-secrets
+name: 'Run benchmarks'
+
+# This action assumes the following prerequisites:
 #
-# This action also expects a RUNNER_TAG environment variable to be set to the
-# runner tag used to run this workflow: Currently, only gen12 and pvc on Linux
-# are fully supported. Although this workflow won't stop you from running other
-# devices, note that only gen12 and pvc has been tested to work.
+# - SYCL is placed in ./toolchain -- TODO change this
+# - /devops has been checked out in ./devops.
+# - env.GITHUB_TOKEN was properly set, because according to Github, that's
+#   apparently the recommended way to pass a secret into a github action:
+
+#   https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions#accessing-your-secrets
 #
+# - env.RUNNER_TAG set to the runner tag used to run this workflow: Currently,
+#   only specific runners are fully supported.
 
 inputs:
   target_devices:
     type: string
     required: True
+  upload_results:
+    type: string
+    required: True
+  save_name:
+    type: string
+    required: True
+  preset:
+    type: string
+    required: True
 
 runs:
   using: "composite"
@@ -27,16 +33,24 @@ runs:
     shell: bash
     env:
       TARGET_DEVICE: ${{ inputs.target_devices }}
+      PRESET: ${{ inputs.preset }}
     run: |
       case "$RUNNER_TAG" in
-        '["Linux", "gen12"]' | '["Linux", "pvc"]') ;;
+        '["PVC_PERF"]' ) ;;
         *)
           echo "#"
-          echo "# WARNING: Only gen12/pvc on Linux is fully supported."
+          echo "# WARNING: Only specific tuned runners are fully supported."
           echo "# This workflow is not guaranteed to work with other runners."
           echo "#" ;;
       esac
 
+      # Ensure runner name has nothing injected
+      # TODO: in terms of security, is this overkill?
+      if [ -z "$(printf '%s' "$RUNNER_NAME" | grep -oE '^[a-zA-Z0-9_-]+$')" ]; then
+          echo "Bad runner name, please ensure runner name is [a-zA-Z0-9_-]."
+          exit 1
+      fi
+
       # input.target_devices is not directly used, as this allows code injection
       case "$TARGET_DEVICE" in
         level_zero:*) ;;
@@ -46,11 +60,15 @@ runs:
           echo "# This workflow is not guaranteed to work with other backends."
           echo "#" ;;
       esac
+      echo "ONEAPI_DEVICE_SELECTOR=$TARGET_DEVICE" >> $GITHUB_ENV 
+
+      # Make sure specified preset is a known value and is not malicious
+      python3 ./devops/scripts/benchmarks/presets.py query "$PRESET"
+      [ "$?" -ne 0 ] && exit 1  # Stop workflow if invalid preset
+      echo "PRESET=$PRESET" >> $GITHUB_ENV
   - name: Compute CPU core range to run benchmarks on
     shell: bash
     run: |
-      # Taken from ur-benchmark-reusable.yml:
-
       # Compute the core range for the first NUMA node; second node is used by
       # UMF. Skip the first 4 cores as the kernel is likely to schedule more
       # work on these.
@@ -67,62 +85,131 @@ runs:
 
       ZE_AFFINITY_MASK=0
       echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV
+  - name: Checkout results repo
+    shell: bash
+    run: |
+      git clone -b unify-ci https://github.com/intel/llvm-ci-perf-results
   - name: Run compute-benchmarks
+    env:
+      # Need to append "_<device>_<backend>" to save name in order to follow
+      # conventions:
+      SAVE_PREFIX: ${{ inputs.save_name }}
     shell: bash
     run: |
-      cat << EOF
-      #
-      # NOTE TO DEVELOPERS:
-      #
-
-      Check latter steps of the workflow: This job produces an artifact with:
-        - benchmark results from passing/failing tests
-        - log containing all failing (too slow) benchmarks
-        - log containing all erroring benchmarks
-
-      While this step in the workflow provides debugging output describing this
-      information, it might be easier to inspect the logs from the artifact
-      instead.
-
-      EOF
-      export ONEAPI_DEVICE_SELECTOR="${{ inputs.target_devices }}"
+      # TODO generate summary + display helpful message here
       export CMPLR_ROOT=./toolchain
       echo "-----"
       sycl-ls
       echo "-----"
-      taskset -c "$CORES" ./devops/scripts/benchmarking/benchmark.sh -n '${{ runner.name }}' -s || exit 1
-  - name: Push compute-benchmarks results
+      # Using --break-system-packages because:
+      # - venv is not installed
+      # - unable to install anything via pip, as python packages in the docker
+      #   container are managed by apt
+      # - apt is unable to install anything due to unresolved dpkg dependencies,
+      #   as a result of how the sycl nightly images are created
+      pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt
+      echo "-----"
+
+      # clang builds have git repo / commit hashes in their --version output,
+      # same goes for dpcpp. Obtain git repo / commit hash info this way:
+
+      # First line of --version is formatted 'clang version ... (<repo> <commit>)'
+      # thus we parse for (<repo> <commit>):
+      sycl_git_info="$(clang++ --version | head -n 1 | grep -oE '\([^ ]+ [a-f0-9]+\)$' | tr -d '()')"
+      if [ -z "$sycl_git_info" ]; then
+        echo "Error: Unable to deduce SYCL build source repo/commit: Are you sure dpcpp variable is in PATH?"
+        exit 1
+      fi
+      sycl_git_repo="$(printf "$sycl_git_info" | cut -d' ' -f1)"
+      sycl_git_commit="$(printf "$sycl_git_info" | cut -d' ' -f2)"
+
+      case "$ONEAPI_DEVICE_SELECTOR" in
+        level_zero:*) SAVE_SUFFIX="L0" ;;
+        level_zero_v2:*) SAVE_SUFFIX="L0v2" ;;
+        opencl:*) SAVE_SUFFIX="OCL" ;;
+        *) SAVE_SUFFIX="${ONEAPI_DEVICE_SELECTOR%%:*}";;
+      esac
+      # TODO accomodate for different GPUs and backends
+      SAVE_NAME="${SAVE_PREFIX}_PVC_${SAVE_SUFFIX}"
+      SAVE_TIMESTAMP="$(date -u +'%Y%m%d_%H%M%S')"  # Timestamps are in UTC time
+
+      taskset -c "$CORES" ./devops/scripts/benchmarks/main.py \
+        "$(realpath ./llvm_test_workdir)" \
+        --sycl "$(realpath ./toolchain)" \
+        --save "$SAVE_NAME" \
+        --output-html remote \
+        --results-dir "./llvm-ci-perf-results/" \
+        --output-dir "./llvm-ci-perf-results/" \
+        --preset "$PRESET" \
+        --timestamp-override "$SAVE_TIMESTAMP" \
+        --github-repo "$sycl_git_repo" \
+        --git-commit "$sycl_git_commit"
+      echo "-----"
+      python3 ./devops/scripts/benchmarks/compare.py to_hist \
+        --name "$SAVE_NAME" \
+        --compare-file "./llvm-ci-perf-results/results/${SAVE_NAME}_${SAVE_TIMESTAMP}.json" \
+        --results-dir "./llvm-ci-perf-results/results/"
+      echo "-----"
+
+  - name: Cache changes to benchmark folder for archival purposes
     if: always()
     shell: bash
+    run: | 
+      cd "./llvm-ci-perf-results"
+      git add .
+      for diff in $(git diff HEAD --name-only); do
+        mkdir -p "../cached_changes/$(dirname $diff)"
+        cp "$diff" "../cached_changes/$diff"
+      done
+  - name: Push benchmarks results
+    if: inputs.upload_results == 'true' && always()
+    shell: bash
     run: |
-      # TODO -- waiting on security clearance
-      # Load configuration values
-      $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
-
       cd "./llvm-ci-perf-results"
       git config user.name "SYCL Benchmarking Bot"
       git config user.email "sys_sycl_benchmarks@intel.com"
-      git pull
-      git add .
-      # Make sure changes have been made
+      results_branch="unify-ci"
+
       if git diff --quiet && git diff --cached --quiet; then
         echo "No new results added, skipping push."
-      else
-        git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}"
-        git push "https://$GITHUB_TOKEN@github.com/$SANITIZED_PERF_RES_GIT_REPO.git" "$SANITIZED_PERF_RES_GIT_BRANCH"
+        exit 0
       fi
-  - name: Find benchmark result artifact here
-    if: always()
-    shell: bash
-    run: |
-      cat << EOF
-      #
-      # Artifact link for benchmark results here:
-      #
-      EOF
-  - name: Archive compute-benchmark results
+
+      for attempt in 1 2 3; do
+        echo "Attempt $attempt to push new results"
+        git add .
+        git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}"
+        results_file="$(git diff HEAD~1 --name-only -- results/ | head -n 1)"
+
+        if git push "https://$GITHUB_TOKEN@github.com/intel/llvm-ci-perf-results.git" "$results_branch"; then
+          echo "Push succeeded"
+          break
+        fi
+
+        echo "Push failed, retrying..."
+        if [ -n "$results_file" ]; then
+          cached_result="$(mktemp -d)/$(basename $results_file)"
+          mv "$results_file" "$cached_result"
+
+          git reset --hard "origin/$results_branch"
+          git pull origin "$results_branch"
+
+          mv "$cached_result" "$results_file"
+        fi
+
+        echo "Regenerating data.json..."
+        cd ../
+        ./devops/scripts/benchmarks/main.py \
+          "$(realpath ./llvm_test_workdir)" \
+          --output-html remote \
+          --results-dir "./llvm-ci-perf-results/" \
+          --output-dir "./llvm-ci-perf-results/" \
+          --dry-run
+        cd -
+      done
+  - name: Archive benchmark results
     if: always()
     uses: actions/upload-artifact@v4
     with:
-      name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }})
-      path: ./artifact
+      name: Benchmark run ${{ github.run_id }} (${{ runner.name }})
+      path: ./cached_changes
diff --git a/devops/benchmarking/config.ini b/devops/benchmarking/config.ini
deleted file mode 100644
index 988d1d9f08af9..0000000000000
--- a/devops/benchmarking/config.ini
+++ /dev/null
@@ -1,44 +0,0 @@
-;
-; This file contains configuration options to change the behaviour of the
-; benchmarking workflow in sycl-linux-run-tests.yml.
-;
-; DO NOT USE THE CONTENTS OF THIS FILE DIRECTLY -- Due to security concerns, The
-; contents of this file must be sanitized first before use.
-; See: /devops/scripts/benchmarking/common.py
-;
-
-; Compute-benchmark compile/run options
-[compute_bench]
-; Value for -j during compilation of compute-benchmarks
-compile_jobs = 40
-; Number of iterations to run compute-benchmark tests
-iterations = 5000
-
-; Options for benchmark result metrics (to record/compare against)
-[metrics]
-; Sets the metrics to record/aggregate in the historical average.
-; Format: comma-separated list of column names in compute-benchmark results
-recorded = Median,StdDev
-; Sets the tolerance for each recorded metric and their allowed deviation from
-; the historical average. Metrics not included here are not compared against
-; when passing/failing benchmark results.
-; Format: comma-separated list of <metric>:<deviation percentage in decimals>
-tolerances = Median:0.08
-
-; Options for computing historical averages
-[average]
-; Number of days (from today) to look back for results when computing historical
-; average 
-cutoff_range = 7
-; Minimum number of samples required to compute a historical average
-min_threshold = 10
-
-; ONEAPI_DEVICE_SELECTOR linting/options
-[device_selector]
-; Backends to allow in device_selector
-enabled_backends = level_zero,opencl,cuda,hip
-; native_cpu is disabled
-
-; Devices to allow in device_selector
-enabled_devices = cpu,gpu
-; fpga is disabled
diff --git a/devops/benchmarking/constants.ini b/devops/benchmarking/constants.ini
deleted file mode 100644
index 9281ece8f4950..0000000000000
--- a/devops/benchmarking/constants.ini
+++ /dev/null
@@ -1,48 +0,0 @@
-;
-; This file defines constants used throughout the benchmarking workflow in
-; sycl-linux-run-tests.yml. If you're trying to change the behavior of this
-; workflow, you're likely looking for /devops/benchmarking/config.ini instead.
-;
-; DO NOT USE THE CONTENTS OF THIS FILE DIRECTLY -- Due to security concerns, The
-; contents of this file must be sanitized first before use.
-; See: /devops/scripts/benchmarking/common.py
-;
-
-; Constants for compute-benchmarks
-[compute_bench]
-git_repo = intel/compute-benchmarks
-git_branch = master
-git_commit = 230a3db4d8d03c0e9a663988f7c3abbd1137a1e0
-; path = ./compute-benchmarks
-
-; Constants for git repo storing benchmark performance results
-[perf_res]
-git_repo = intel/llvm-ci-perf-results
-git_branch = main
-; Path to clone performance result repo
-; path = ./llvm-ci-perf-results
-
-; It was decided that paths should be hardcoded throughout this workflow for
-; security reasons and ease of readability. Do not use paths as constants.
-
-; ; Constants for artifacts
-; [artifact]
-; ; Path to root folder storing benchmark CI artifact
-; path = ./artifact
-; ; Path (relative to artifact.path) to cache compute-benchmark results
-; ;
-; ; If a test result does not get moved out of this catch-all cache path, it is
-; ; considered to have failed
-; output_cache = ./artifact/failed_tests
-; ; Path (relative to artifact.path) to cache passing compute-benchmark results
-; passing_cache = ./artifact/passing_tests
-
-; [timestamp]
-; ; Timestamp format used for 
-; format = %%Y%%m%%d_%%H%%M%%S
-
-; [benchmark_log]
-; ; Log file for test cases that perform over the allowed variance
-; slow = ./artifact/benchmarks_failed.log
-; ; Log file for test cases that errored / failed to build
-; error = ./artifact/benchmarks_errored.log
diff --git a/devops/benchmarking/enabled_tests.conf b/devops/benchmarking/enabled_tests.conf
deleted file mode 100644
index 20659cbea636d..0000000000000
--- a/devops/benchmarking/enabled_tests.conf
+++ /dev/null
@@ -1,8 +0,0 @@
-# Test cases to be enabled:
-api_overhead_benchmark_sycl
-memory_benchmark_sycl
-miscellaneous_benchmark_sycl
-ulls_benchmark_sycl
-
-# As of January 2025, these are every compute-benchmark tests with a SYCL
-# implementation.
diff --git a/devops/scripts/benchmarking/aggregate.py b/devops/scripts/benchmarking/aggregate.py
deleted file mode 100644
index f62a8ffed83c5..0000000000000
--- a/devops/scripts/benchmarking/aggregate.py
+++ /dev/null
@@ -1,205 +0,0 @@
-import csv
-import sys
-from pathlib import Path
-import heapq
-import statistics
-from common import Validate, SanitizedConfig
-from abc import ABC, abstractmethod
-import os
-
-
-class Aggregator(ABC):
-    """
-    Aggregator classes used to "aggregate" a pool of elements, and produce an
-    "average" (precisely, some "measure of central tendency") from the elements.
-    """
-
-    @staticmethod
-    @abstractmethod
-    def get_type() -> str:
-        """
-        Return a string indicating the type of average this aggregator
-        produces.
-        """
-        pass
-
-    @abstractmethod
-    def add(self, n: float):
-        """
-        Add/aggregate an element to the pool of elements used by this aggregator
-        to produce an average calculation.
-        """
-        pass
-
-    @abstractmethod
-    def get_avg(self) -> float:
-        """
-        Produce an average from the pool of elements aggregated using add().
-        """
-        pass
-
-
-class SimpleMedian(Aggregator):
-    """
-    Simple median calculation: if the number of samples being generated are low,
-    this is the fastest median method.
-    """
-
-    def __init__(self):
-        self.elements = []
-
-    @staticmethod
-    def get_type() -> str:
-        return "median"
-
-    def add(self, n: float):
-        self.elements.append(n)
-
-    def get_avg(self) -> float:
-        return statistics.median(self.elements)
-
-
-class StreamingMedian(Aggregator):
-    """
-    Calculate medians incrementally using heaps: Theoretically the fastest way
-    to calculate a median from a stream of elements, but realistically is only
-    faster when dealing with huge numbers of samples that would be generated by
-    i.e. enabling this workflow in precommit and using longer periods of time.
-    """
-
-    def __init__(self):
-        # Gist: we keep a minheap and a maxheap, and store the median as the top
-        # of the minheap. When a new element comes it gets put into the heap
-        # based on if the element is bigger than the current median. Then, the
-        # heaps are heapified and the median is repopulated by heapify.
-        self.minheap_larger = []
-        self.maxheap_smaller = []
-
-    @staticmethod
-    def get_type() -> str:
-        return "median"
-
-    # Note: numbers on maxheap should be negative, as heapq
-    # is minheap by default
-
-    def add(self, n: float):
-        if len(self.maxheap_smaller) == 0 or -self.maxheap_smaller[0] >= n:
-            heapq.heappush(self.maxheap_smaller, -n)
-        else:
-            heapq.heappush(self.minheap_larger, n)
-
-        # Ensure minheap has more elements than maxheap
-        if len(self.maxheap_smaller) > len(self.minheap_larger) + 1:
-            heapq.heappush(self.minheap_larger, -heapq.heappop(self.maxheap_smaller))
-        elif len(self.maxheap_smaller) < len(self.minheap_larger):
-            heapq.heappush(self.maxheap_smaller, -heapq.heappop(self.minheap_larger))
-
-    def get_avg(self) -> float:
-        if len(self.maxheap_smaller) == len(self.minheap_larger):
-            # Equal number of elements smaller and larger than "median":
-            # thus, there are two median values. The median would then become
-            # the average of both median values.
-            return (-self.maxheap_smaller[0] + self.minheap_larger[0]) / 2.0
-        else:
-            # Otherwise, median is always in minheap, as minheap is always
-            # bigger
-            return -self.maxheap_smaller[0]
-
-
-class Aggregate:
-    """
-    Static class providing methods for aggregating data
-    """
-
-    @staticmethod
-    def hist_avg(
-        benchmark_name: str, res_dir: str, cutoff: str, aggregator=SimpleMedian
-    ):
-        if not os.path.isdir(res_dir):
-            print(f"Not a directory: {res_dir}.", file=sys.stderr)
-            exit(1)
-
-        def get_csv_samples() -> list[str]:
-            """Get all valid .csv samples from the results folder."""
-            cache_dir = Path(f"{res_dir}")
-            # Filter all benchmark .csv files in the result directory:
-            return list(
-                filter(
-                    # Make sure the .csv "file" is a file:
-                    lambda f: f.is_file()
-                    # Make sure timestamp of .csv file is good format:
-                    # [-19:-4] corresponds to the timestamp in the filename.
-                    and Validate.timestamp(str(f)[-19:-4])
-                    # Make sure timestamp is bigger than cutoff timestamp:
-                    and str(f)[-19:-4] > cutoff,
-                    cache_dir.glob(f"{benchmark_name}-*_*.csv"),
-                )
-            )
-
-        # Calculate median of every desired metric:
-        samples_aggregate = dict()
-        filtered_samples = get_csv_samples()
-        if len(filtered_samples) == 0:
-            print(
-                f"WARNING: No results for {benchmark_name} found from {cutoff} to now",
-                file=sys.stderr,
-            )
-        for sample_path in filtered_samples:
-            with open(sample_path, "r") as sample_file:
-                for sample in csv.DictReader(sample_file):
-                    test = sample["TestCase"]
-                    # Construct entry in aggregator for test if it doesn't exist
-                    # already:
-                    if test not in samples_aggregate:
-                        samples_aggregate[test] = {
-                            metric: aggregator()
-                            for metric in SanitizedConfig.METRICS_TOLERANCES
-                        }
-
-                    # For each metric of concern, add to aggregator:
-                    for metric in SanitizedConfig.METRICS_TOLERANCES:
-                        sample_value = Validate.sanitize_stat(sample[metric])
-                        if not isinstance(sample_value, float):
-                            print(
-                                f"Malformatted statistic in {str(sample_path)}: "
-                                + f"'{sample[metric]}' for {test}."
-                            )
-                            exit(1)
-                        # Add metric from sample for current test to aggregate:
-                        samples_aggregate[test][metric].add(sample_value)
-
-        # Calculate + write new average (from samples_aggregate) in new .csv file:
-        with open(
-            f"{res_dir}/{benchmark_name}-{aggregator.get_type()}.csv", "w"
-        ) as output_csv:
-            writer = csv.DictWriter(
-                output_csv,
-                fieldnames=["TestCase", *SanitizedConfig.METRICS_TOLERANCES.keys()],
-            )
-            writer.writeheader()
-            for test in samples_aggregate:
-                writer.writerow(
-                    {"TestCase": test}
-                    | {
-                        metric: samples_aggregate[test][metric].get_avg()
-                        for metric in SanitizedConfig.METRICS_TOLERANCES
-                    }
-                )
-
-
-if __name__ == "__main__":
-    if len(sys.argv) != 5:
-        print(
-            f"Usage: {sys.argv[0]} <path to /devops> <benchmark name> <absolute path to benchmark results> <cutoff timestamp YYYYMMDD_HHMMSS>"
-        )
-        exit(1)
-    if not Validate.timestamp(sys.argv[4]):
-        print(f"Bad cutoff timestamp, please use YYYYMMDD_HHMMSS.", file=sys.stderr)
-        exit(1)
-    if not Validate.filepath(sys.argv[1]):
-        print(f"Not a valid filepath: {sys.argv[1]}", file=sys.stderr)
-        exit(1)
-    # If the filepath provided passed filepath validation, then it is clean
-    SanitizedConfig.load(sys.argv[1])
-
-    Aggregate.hist_avg(sys.argv[2], sys.argv[3], sys.argv[4])
diff --git a/devops/scripts/benchmarking/benchmark.sh b/devops/scripts/benchmarking/benchmark.sh
deleted file mode 100755
index bbfd669774f9a..0000000000000
--- a/devops/scripts/benchmarking/benchmark.sh
+++ /dev/null
@@ -1,300 +0,0 @@
-#!/bin/sh
-
-#
-# benchmark.sh: Benchmark dpcpp using compute-benchmarks
-#
-
-usage () {
-    >&2 echo "Usage: $0 <compute-benchmarks git repo> -t <runner type> [-B <compute-benchmarks build path>]
-  -n  Github runner name -- Required
-  -c  Clean up working directory
-  -C  Clean up working directory and exit
-  -s  Cache results
-
-This script builds and runs benchmarks from compute-benchmarks."
-    exit 1
-}
-
-# Ensures test cases read from enabled_tests.conf contains no malicious content
-_validate_testname () {
-    if [ -n "$(printf "%s" "$1" | sed "s/[a-zA-Z_]*//g")" ]; then
-        echo "Illegal characters in $TEST_CONFIG. Permitted characters: a-zA-Z_"
-        exit 1
-    fi
-}
-
-clone_perf_res() {
-    echo "### Cloning llvm-ci-perf-results ($SANITIZED_PERF_RES_GIT_REPO:$SANITIZED_PERF_RES_GIT_BRANCH) ###"
-    git clone -b "$SANITIZED_PERF_RES_GIT_BRANCH" "https://github.com/$SANITIZED_PERF_RES_GIT_REPO" ./llvm-ci-perf-results
-    [ "$?" -ne 0 ] && exit "$?"
-}
-
-clone_compute_bench() {
-    echo "### Cloning compute-benchmarks ($SANITIZED_COMPUTE_BENCH_GIT_REPO:$SANITIZED_COMPUTE_BENCH_GIT_BRANCH) ###"
-    git clone -b "$SANITIZED_COMPUTE_BENCH_GIT_BRANCH" \
-              --recurse-submodules "https://github.com/$SANITIZED_COMPUTE_BENCH_GIT_REPO" \
-              ./compute-benchmarks
-    if [ ! -d "./compute-benchmarks" ]; then
-        echo "Failed to clone compute-benchmarks."
-        exit 1
-    elif [ -n  "$SANITIZED_COMPUTE_BENCH_GIT_COMMIT" ]; then
-        cd ./compute-benchmarks
-        git checkout "$SANITIZED_COMPUTE_BENCH_GIT_COMMIT"
-        if [ "$?" -ne 0 ]; then
-            echo "Failed to get compute-benchmarks commit '$SANITIZED_COMPUTE_BENCH_GIT_COMMIT'."
-            exit 1
-        fi
-        cd -
-    fi
-}
-
-build_compute_bench() {
-    echo "### Building compute-benchmarks ($SANITIZED_COMPUTE_BENCH_GIT_REPO:$SANITIZED_COMPUTE_BENCH_GIT_BRANCH) ###"
-    mkdir ./compute-benchmarks/build && cd ./compute-benchmarks/build &&
-    # No reason to turn on ccache, if this docker image will be disassembled later on
-    cmake .. -DBUILD_SYCL=ON -DBUILD_L0=OFF -DBUILD=OCL=OFF -DCCACHE_ALLOWED=FALSE
-    # TODO enable mechanism for opting into L0 and OCL -- the concept is to
-    # subtract OCL/L0 times from SYCL times in hopes of deriving SYCL runtime
-    # overhead, but this is mostly an idea that needs to be mulled upon.
-
-    if [ "$?" -eq 0 ]; then
-        while IFS= read -r case; do
-            # Skip lines starting with '#'
-            [ "${case##\#*}" ] || continue
-
-            _validate_testname "$case"
-            make "-j$SANITIZED_COMPUTE_BENCH_COMPILE_JOBS" "$case"
-        done < "$TESTS_CONFIG"
-    fi
-    cd -
-}
-
-# Check if the number of samples for a given test case is less than a threshold
-# set in benchmark-ci.conf
-#
-# Usage: <relative path of directory containing test case results>
-samples_under_threshold () {
-    # Directory doesn't exist, samples automatically under threshold
-    [ ! -d "./llvm-ci-perf-results/$1" ] && return 0
-    file_count="$(find "./llvm-ci-perf-results/$1" -maxdepth 1 -type f | wc -l )"
-    [ "$file_count" -lt "$SANITIZED_AVERAGE_MIN_THRESHOLD" ]
-}
-
-# Check for a regression via compare.py
-#
-# Usage: check_regression <relative path of output csv>
-check_regression() {
-    csv_relpath="$(dirname "$1")"
-    csv_name="$(basename "$1")"
-    if samples_under_threshold "$csv_relpath"; then
-        echo "Not enough samples to construct a good average, performance\
- check skipped!"
-        return 0 # Success status
-    fi
-    python "$DEVOPS_PATH/scripts/benchmarking/compare.py" \
-        "$DEVOPS_PATH" "$csv_relpath" "$csv_name"
-    return $?
-}
-
-# Move the results of our benchmark into the git repo, and save benchmark
-# results to artifact archive
-#
-# Usage: cache <relative path of output csv>
-cache() {
-    mkdir -p "$(dirname ./artifact/passing_tests/$1)" "$(dirname ./artifact/failed_tests/$1)"
-    cp "./artifact/failed_tests/$1" "./artifact/passing_tests/$1"
-    mkdir -p "$(dirname ./llvm-ci-perf-results/$1)"
-    mv "./artifact/failed_tests/$1" "./llvm-ci-perf-results/$1"
-}
-
-# Check for a regression + cache if no regression found
-#
-# Usage: check_and_cache <relative path of output csv>
-check_and_cache() {
-    echo "Checking $1..."
-    if check_regression $1; then
-        if [ "$CACHE_RESULTS" -eq "1" ]; then
-            echo "Caching $1..."
-            cache $1
-        fi
-    else
-        [ "$CACHE_RESULTS" -eq "1" ] && echo "Regression found -- Not caching!"
-    fi
-}
-
-# Run and process the results of each enabled benchmark in enabled_tests.conf
-process_benchmarks() {
-    echo "### Running and processing selected benchmarks ###"
-    if [ -z "$TESTS_CONFIG" ]; then
-        echo "Setting tests to run via cli is not currently supported."
-        exit 1
-    else
-        rm ./artifact/benchmarks_errored.log ./artifact/benchmarks_failed.log 2> /dev/null
-        mkdir -p ./artifact
-        # Loop through each line of enabled_tests.conf, but ignore lines in the
-        # test config starting with #'s:
-        grep "^[^#]" "$TESTS_CONFIG" | while read -r testcase; do
-            _validate_testname "$testcase"
-            echo "# Running $testcase..."
-
-            # The benchmark results git repo and this script's output both share
-            # the following directory structure:
-            #
-            # /<device selector>/<runner>/<test name>
-            #
-            # Instead of specifying 2 paths with a slightly different root
-            # folder name for every function we use, we can use a relative path
-            # to represent the file in both folders.
-            #
-            # Figure out the relative path of our testcase result:
-            test_dir_relpath="$DEVICE_SELECTOR_DIRNAME/$RUNNER/$testcase"
-            output_csv_relpath="$test_dir_relpath/$testcase-$TIMESTAMP.csv"
-			mkdir -p "./artifact/failed_tests/$test_dir_relpath" # Ensure directory exists
-
-            # Tests are first placed in ./artifact/failed_tests, and are only
-            # moved to passing_tests or the performance results repo if the
-            # benchmark results are passing
-            output_csv="./artifact/failed_tests/$output_csv_relpath"
-            "./compute-benchmarks/build/bin/$testcase" --csv \
-                --iterations="$SANITIZED_COMPUTE_BENCH_ITERATIONS" > "$output_csv"
-
-            exit_status="$?"
-            if [ "$exit_status" -eq 0 ] && [ -s "$output_csv" ]; then 
-                # Filter out header lines not in csv format:
-                tail +8 "$output_csv" > .tmp_res
-                mv .tmp_res "$output_csv"
-                check_and_cache $output_csv_relpath
-            else
-                echo "[ERROR] $testcase returned exit status $exit_status"
-                echo "-- $testcase: error $exit_status" >> ./artifact/benchmarks_errored.log
-            fi
-        done
-    fi
-}
-
-# Handle failures + produce a report on what failed
-process_results() {
-    fail=0
-    if [ -s ./artifact/benchmarks_failed.log ]; then
-        printf "\n### Tests performing over acceptable range of average: ###\n"
-        cat ./artifact/benchmarks_failed.log
-        echo ""
-        fail=2
-    fi
-    if [ -s ./artifact/benchmarks_errored.log ]; then
-        printf "\n### Tests that failed to run: ###\n"
-        cat ./artifact/benchmarks_errored.log
-        echo ""
-        fail=1
-    fi
-    exit $fail
-}
-
-cleanup() {
-    echo "### Cleaning up compute-benchmark builds from prior runs ###"
-    rm -rf ./compute-benchmarks
-    rm -rf ./llvm-ci-perf-results
-    [ ! -z "$_exit_after_cleanup" ] && exit
-}
-
-load_configs() {
-    # This script needs to know where the intel/llvm "/devops" directory is,
-    # containing all the configuration files and the compare script.
-    #
-    # If this is not provided, this function tries to guess where the files
-    # are based on how the script is called, and verifies that all necessary
-    # configs and scripts are reachable. 
-
-    # This benchmarking script is usually at:
-    # 
-    # /devops/scripts/benchmarking/benchmark.sh
-    #
-    # Derive /devops based on location of this script:
-    [ -z "$DEVOPS_PATH" ] && DEVOPS_PATH="$(dirname "$0")/../.."
-    if [ -z "$(printf '%s' "$DEVOPS_PATH" | grep -oE '^[a-zA-Z0-9._\/-]+$')" ]; then
-        echo "Bad DEVOPS_PATH, please specify DEVOPS_PATH variable."
-        exit 1
-    fi
-
-    TESTS_CONFIG="$(realpath "$DEVOPS_PATH/benchmarking/enabled_tests.conf")"
-    COMPARE_PATH="$(realpath "$DEVOPS_PATH/scripts/benchmarking/compare.py")"
-    LOAD_CONFIG_PY="$(realpath "$DEVOPS_PATH/scripts/benchmarking/load_config.py")"
-
-    for file in \
-        "$TESTS_CONFIG" "$COMPARE_PATH" "$LOAD_CONFIG_PY"
-    do
-        if [ ! -f "$file" ]; then
-            echo "Please provide path to /devops in DEVOPS_PATH."
-            exit -1
-        fi
-    done
-
-    $(python "$LOAD_CONFIG_PY" "$DEVOPS_PATH" config)
-    $(python "$LOAD_CONFIG_PY" "$DEVOPS_PATH" constants)
-}
-
-#####
-
-load_configs
-
-COMPUTE_BENCH_COMPILE_FLAGS=""
-CACHE_RESULTS="0"
-# Timestamp format is YYYYMMDD_HHMMSS
-TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
-
-# CLI flags + overrides to configuration options:
-while getopts "n:cCs" opt; do
-    case "$opt" in
-		n) 
-        if [ -n "$(printf "%s" "$OPTARG" | sed "s/[a-zA-Z0-9_-]*//g")" ]; then
-            echo "Illegal characters in runner name."
-            exit 1
-        fi
-        RUNNER="$OPTARG"
-        ;;
-        # Cleanup status is saved in a var to ensure all arguments are processed before
-        # performing cleanup
-        c) _cleanup=1 ;;
-        C) _cleanup=1 && _exit_after_cleanup=1 ;;
-        s) CACHE_RESULTS=1;;
-        \?) usage ;;
-    esac
-done
-
-# Check all necessary variables exist:
-if [ -z "$CMPLR_ROOT" ]; then
-    echo "Please set CMPLR_ROOT first; it is needed by compute-benchmarks to build."
-    exit 1
-elif [ -z "$ONEAPI_DEVICE_SELECTOR" ]; then
-    echo "Please set ONEAPI_DEVICE_SELECTOR first to specify which device to use."
-    exit 1
-elif [ -z "$RUNNER" ]; then
-    echo "Please specify runner name using -n first; it is needed for storing/comparing benchmark results."
-    exit 1
-fi
-
-# Make sure ONEAPI_DEVICE_SELECTOR doesn't try to enable multiple devices at the
-# same time, or use specific device id's
-_dev_sel_backend_re="$(echo "$SANITIZED_DEVICE_SELECTOR_ENABLED_BACKENDS" | sed 's/,/|/g')"
-_dev_sel_device_re="$(echo "$SANITIZED_DEVICE_SELECTOR_ENABLED_DEVICES" | sed 's/,/|/g')"
-_dev_sel_re="s/($_dev_sel_backend_re):($_dev_sel_device_re)//"
-if [ -n "$(echo "$ONEAPI_DEVICE_SELECTOR" | sed -E "$_dev_sel_re")" ]; then
-    echo "Unsupported ONEAPI_DEVICE_SELECTOR value: please ensure only one \
-device is selected, and devices are not selected by indices."
-    echo "Enabled backends: $SANITIZED_DEVICE_SELECTOR_ENABLED_BACKENDS"
-    echo "Enabled device types: $SANITIZED_DEVICE_SELECTOR_ENABLED_DEVICES"
-    exit 1
-fi
-# ONEAPI_DEVICE_SELECTOR values are not valid directory names in unix: this 
-# value lets us use ONEAPI_DEVICE_SELECTOR as actual directory names 
-DEVICE_SELECTOR_DIRNAME="$(echo "$ONEAPI_DEVICE_SELECTOR" | sed 's/:/-/')"
-
-# Clean up and delete all cached files if specified:
-[ ! -z "$_cleanup" ] && cleanup
-# Clone and build only if they aren't already cached/deleted:
-[ ! -d ./llvm-ci-perf-results     ] && clone_perf_res
-[ ! -d ./compute-benchmarks       ] && clone_compute_bench
-[ ! -d ./compute-benchmarks/build ] && build_compute_bench
-# Process benchmarks:
-process_benchmarks
-process_results
\ No newline at end of file
diff --git a/devops/scripts/benchmarking/common.py b/devops/scripts/benchmarking/common.py
deleted file mode 100644
index c400b686db90f..0000000000000
--- a/devops/scripts/benchmarking/common.py
+++ /dev/null
@@ -1,196 +0,0 @@
-import re
-import os
-import sys
-import string
-import configparser
-
-
-class Validate:
-    """Static class containing methods for validating various fields"""
-
-    @staticmethod
-    def filepath(path: str) -> bool:
-        """
-        Returns True if path is clean (no illegal characters), otherwise False.
-        """
-        filepath_re = re.compile(r"[a-zA-Z0-9\/\._\-]+")
-        return filepath_re.match(path) is not None
-
-    @staticmethod
-    def timestamp(t: str) -> bool:
-        """
-        Returns True if t is in form YYYYMMDD_HHMMSS, otherwise False.
-        """
-        timestamp_re = re.compile(
-            r"^\d{4}(0[1-9]|1[0-2])([0-2][0-9]|3[01])_([01][0-9]|2[0-3])[0-5][0-9][0-5][0-9]$"
-        )
-        return timestamp_re.match(t) is not None
-
-    @staticmethod
-    def sanitize_stat(stat: str) -> float:
-        """
-        Sanitize statistics found in compute-benchmark output csv files. Returns
-        float if sanitized, None if not sanitizable.
-        """
-        # Get rid of %
-        if stat[-1] == "%":
-            stat = stat[:-1]
-
-        # Cast to float: If cast succeeds, the statistic is clean.
-        try:
-            return float(stat)
-        except ValueError:
-            return None
-
-
-class SanitizedConfig:
-    """
-    Static class for holding sanitized configuration values used within python.
-
-    Configuration option names follow <section name>_<option name> from config
-    file.
-    """
-
-    loaded: bool = False
-    # PERF_RES_PATH: str = None
-    # ARTIFACT_OUTPUT_CACHE: str = None
-    METRICS_TOLERANCES: dict = None
-    METRICS_RECORDED: list = None
-    # BENCHMARK_LOG_SLOW: str = None
-    # BENCHMARK_LOG_ERROR: str = None
-
-    @staticmethod
-    def load(devops_path: str):
-        config = Configuration(devops_path)
-        config.export_python_globals()
-
-
-class Configuration:
-    """
-    Class handling loading, sanitizing, and exporting configuration options for
-    use within python or shell scripts.
-    """
-
-    def __init__(self, devops_path: str):
-        """
-        Initialize this configuration handler by finding configuration files
-
-        @param devops_path Path to /devops folder in intel/llvm
-        """
-        self.config_path = f"{devops_path}/benchmarking/config.ini"
-        self.constants_path = f"{devops_path}/benchmarking/constants.ini"
-
-        if not os.path.isfile(self.config_path):
-            print(
-                f"config.ini not found in {devops_path}/benchmarking.", file=sys.stderr
-            )
-            exit(1)
-        if not os.path.isfile(self.constants_path):
-            print(
-                f"constants.ini not found in {devops_path}/benchmarking.",
-                file=sys.stderr,
-            )
-            exit(1)
-
-    def __sanitize(self, value: str, field: str) -> str:
-        """
-        Enforces an allowlist of characters and sanitizes input from config
-        files.
-        """
-        _alnum = list(string.ascii_letters + string.digits)
-        allowlist = _alnum + ["_", "-", ".", ",", ":", "/", "%"]
-
-        for illegal_ch in filter(lambda ch: ch not in allowlist, value):
-            print(f"Illegal character '{illegal_ch}' in {field}", file=sys.stderr)
-            exit(1)
-
-        return value
-
-    def __get_export_cmd(self, export_opts: list, config_file_path: str) -> str:
-        """
-        Generates export commands for variables in the configuration file at
-        config_file_path, as listed by export_opts.
-
-        export_opts is list of tuples in (<option section>, <option name>) form.
-        """
-        config = configparser.ConfigParser()
-        config.read(config_file_path)
-
-        def export_var_cmd(sec: str, opt: str) -> str:
-            var_name = f"SANITIZED_{sec.upper()}_{opt.upper()}"
-            var_val = f"{self.__sanitize(config[sec][opt], sec + '.' + opt)}"
-            return f"{var_name}={var_val}"
-
-        export_cmds = [export_var_cmd(sec, opt) for sec, opt in export_opts]
-        return "export " + " ".join(export_cmds)
-
-    def export_shell_configs(self) -> str:
-        """
-        Return shell command exporting environment variables representing
-        various configuration options used in shell scripts.
-        """
-        # List of configs used in shell scripts: Export only what's needed
-        shell_configs = [
-            ("compute_bench", "compile_jobs"),
-            ("compute_bench", "iterations"),
-            ("average", "cutoff_range"),
-            ("average", "min_threshold"),
-            ("device_selector", "enabled_backends"),
-            ("device_selector", "enabled_devices"),
-        ]
-        return self.__get_export_cmd(shell_configs, self.config_path)
-
-    def export_shell_constants(self) -> str:
-        """
-        Return shell command exporting environment variables representing
-        various constants used in shell scripts.
-        """
-        # List of configs used in shell scripts: Export only what's needed
-        shell_constants = [
-            ("perf_res", "git_repo"),
-            ("perf_res", "git_branch"),
-            ("compute_bench", "git_repo"),
-            ("compute_bench", "git_branch"),
-            ("compute_bench", "git_commit"),
-        ]
-        return self.__get_export_cmd(shell_constants, self.constants_path)
-
-    def export_python_globals(self):
-        """
-        Populate all configs/constants used in python into SanitizedConfig.
-        """
-        all_opts = configparser.ConfigParser()
-        all_opts.read(self.config_path)
-        all_opts.read(self.constants_path)
-
-        # Fields that are supposed to be python objects need to be changed to
-        # python objects manually:
-
-        # metrics.recorded
-        m_rec_str = self.__sanitize(all_opts["metrics"]["recorded"], "metrics.recorded")
-        SanitizedConfig.METRICS_RECORDED = m_rec_str.split(",")
-
-        # metrics.tolerances
-        m_tol_str = self.__sanitize(
-            all_opts["metrics"]["tolerances"], "metrics.tolerances"
-        )
-        metric_tolerances = dict(
-            [pair_str.split(":") for pair_str in m_tol_str.split(",")]
-        )
-
-        for metric, tolerance_str in metric_tolerances.items():
-            if metric not in SanitizedConfig.METRICS_RECORDED:
-                print(
-                    f"Metric compared against {metric} is not being recorded.",
-                    file=sys.stderr,
-                )
-                exit(1)
-            try:
-                metric_tolerances[metric] = float(tolerance_str)
-            except ValueError:
-                print(f"Could not convert '{tolerance_str}' to float.", file=sys.stderr)
-                exit(1)
-
-        SanitizedConfig.METRICS_TOLERANCES = metric_tolerances
-
-        SanitizedConfig.loaded = True
diff --git a/devops/scripts/benchmarking/compare.py b/devops/scripts/benchmarking/compare.py
deleted file mode 100644
index efa9f67cbfc24..0000000000000
--- a/devops/scripts/benchmarking/compare.py
+++ /dev/null
@@ -1,101 +0,0 @@
-import os
-import csv
-import sys
-from common import Validate, SanitizedConfig
-
-
-class Compare:
-
-    @staticmethod
-    def to_hist_avg(benchmark_name: str, hist_avg_path: str, test_csv_path: str):
-        """
-        Compare a benchmark test result to the historical average
-
-        @param test_name  Name of the benchmark of results being compared
-        @param hist_avg_path  Path to historical average .csv file
-        @param test_csv_path  Path to benchmark result .csv file
-        """
-        hist_avg = dict()  # stores historical median of the test suite of interest
-
-        # Load metrics from historical median being compared against
-        with open(hist_avg_path, "r") as avg_csv:
-            for stat in csv.DictReader(avg_csv):
-                hist_avg[stat["TestCase"]] = {
-                    metric: float(stat[metric])
-                    for metric in SanitizedConfig.METRICS_TOLERANCES
-                }
-
-        status = 0
-        failure_counts = {metric: 0 for metric in SanitizedConfig.METRICS_TOLERANCES}
-        with open(test_csv_path, "r") as sample_csv:
-            # For every test case in our current benchmark test suite:
-            for sample in csv.DictReader(sample_csv):
-                test = sample["TestCase"]
-                # Ignore test cases we haven't profiled before
-                if test not in hist_avg:
-                    continue
-                test_hist_avg = hist_avg[test]
-
-                # Check benchmark test results against historical median
-                for metric, threshold in SanitizedConfig.METRICS_TOLERANCES.items():
-                    max_tolerated = test_hist_avg[metric] * (1 + threshold)
-                    sample_value = Validate.sanitize_stat(sample[metric])
-                    if not isinstance(sample_value, float):
-                        print(
-                            f"Malformatted statistic in {test_csv_path}: "
-                            + f"'{sample[metric]}' for {test}."
-                        )
-                        exit(1)
-
-                    if sample_value > max_tolerated:
-                        # Log failure if fail, otherwise proceed as usual
-                        print(f"\n-- FAILED {benchmark_name}::{test}")
-                        print(
-                            f"  {metric}: {sample_value} -- Historic avg. {test_hist_avg[metric]} (max tolerance {threshold*100}%: {max_tolerated})\n"
-                        )
-                        with open("./artifact/benchmarks_failed.log", "a") as slow_log:
-                            slow_log.write(
-                                f"-- {benchmark_name}::{test}\n"
-                                f"   {metric}: {sample_value} -- Historic avg. {test_hist_avg[metric]} (max tol. {threshold*100}%: {max_tolerated})\n"
-                            )
-                        status = 1
-                        failure_counts[metric] += 1
-        if status != 0:
-            print(f"Failure counts: {failure_counts}")
-        return status
-
-
-if __name__ == "__main__":
-    if len(sys.argv) < 4:
-        print(
-            f"Usage: {sys.argv[0]} <path to /devops> <relative path to results directory> <result csv filename>"
-        )
-        exit(1)
-
-    if not Validate.filepath(sys.argv[1]):
-        print(f"Not a valid filepath: {sys.argv[1]}", file=sys.stderr)
-        exit(1)
-    # If the filepath provided passed filepath validation, then it is clean
-    SanitizedConfig.load(sys.argv[1])
-
-    # Both benchmark results git repo and benchmark.sh output are structured
-    # like so:
-    # /<device_selector>/<runner>/<test name>
-    # This relative path is sys.argv[1], while the name of the csv file we are
-    # comparing against is sys.argv[2].
-    benchmark_name = os.path.basename(sys.argv[2])
-    test_csv_path = f"./artifact/failed_tests/{sys.argv[2]}/{sys.argv[3]}"
-    median_path = f"./llvm-ci-perf-results/{sys.argv[2]}/{benchmark_name}-median.csv"
-
-    if not os.path.isfile(test_csv_path):
-        print("Invalid test file provided: " + test_csv_path)
-        exit(1)
-    if not os.path.isfile(median_path):
-        print(
-            f"Median file for benchmark '{benchmark_name}' not found at {median_path}.\n"
-            + "Please compute the median using the aggregate workflow."
-        )
-        exit(1)
-
-    # Compare to median in this case
-    exit(Compare.to_hist_avg(benchmark_name, median_path, test_csv_path))
diff --git a/devops/scripts/benchmarking/load_config.py b/devops/scripts/benchmarking/load_config.py
deleted file mode 100644
index 69c994e5c296c..0000000000000
--- a/devops/scripts/benchmarking/load_config.py
+++ /dev/null
@@ -1,30 +0,0 @@
-from common import Configuration, Validate
-import sys
-
-# TODO better frontend / use argparse
-if __name__ == "__main__":
-
-    def usage_and_exit():
-        print(f"Usage: {sys.argv[0]} <path to /devops> [config | constants]")
-        print(
-            "Generate commands to export configuration options/constants as an environment variable."
-        )
-        exit(1)
-
-    if len(sys.argv) != 3:
-        usage_and_exit()
-
-    if not Validate.filepath(sys.argv[1]):
-        print(f"Not a valid filepath: {sys.argv[1]}", file=sys.stderr)
-        exit(1)
-    # If the filepath provided passed filepath validation, then it is clean
-    sanitized_filepath = sys.argv[1]
-
-    # Load configuration
-    config = Configuration(sanitized_filepath)
-    if sys.argv[2] == "config":
-        print(config.export_shell_configs())
-    elif sys.argv[2] == "constants":
-        print(config.export_shell_constants())
-    else:
-        usage_and_exit()
diff --git a/devops/scripts/benchmarks/benchmark_results.html.template b/devops/scripts/benchmarks/benchmark_results.html.template
deleted file mode 100644
index 1deeedad66b00..0000000000000
--- a/devops/scripts/benchmarks/benchmark_results.html.template
+++ /dev/null
@@ -1,192 +0,0 @@
-<!DOCTYPE html>
-<html>
-<head>
-    <meta charset="utf-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1">
-    <title>Benchmark Results</title>
-    <style>
-        body {
-            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
-            margin: 0;
-            padding: 16px;
-            background: #f8f9fa;
-        }
-        .container {
-            max-width: 1100px;
-            margin: 0 auto;
-        }
-        h1, h2 {
-            color: #212529;
-            text-align: center;
-            margin-bottom: 24px;
-            font-weight: 500;
-        }
-        .chart {
-            background: white;
-            border-radius: 8px;
-            padding: 24px;
-            margin-bottom: 24px;
-            box-shadow: 0 1px 3px rgba(0,0,0,0.1);
-            overflow-x: auto;
-        }
-        .chart > div {
-            min-width: 600px;
-            margin: 0 auto;
-        }
-        @media (max-width: 768px) {
-            body {
-                padding: 12px;
-            }
-            .chart {
-                padding: 16px;
-                border-radius: 6px;
-            }
-            h1 {
-                font-size: 24px;
-                margin-bottom: 16px;
-            }
-        }
-        .filter-container {
-            text-align: center;
-            margin-bottom: 24px;
-        }
-        .filter-container input {
-            padding: 8px;
-            font-size: 16px;
-            border: 1px solid #ccc;
-            border-radius: 4px;
-            width: 400px;
-            max-width: 100%;
-        }
-        .suite-filter-container {
-            text-align: center;
-            margin-bottom: 24px;
-            padding: 16px;
-            background: #e9ecef;
-            border-radius: 8px;
-        }
-        .suite-checkbox {
-            margin: 0 8px;
-        }
-        details {
-            margin-bottom: 24px;
-        }
-        summary {
-            font-size: 18px;
-            font-weight: 500;
-            cursor: pointer;
-            padding: 12px;
-            background: #e9ecef;
-            border-radius: 8px;
-            user-select: none;
-        }
-        summary:hover {
-            background: #dee2e6;
-        }
-    </style>
-    <script>
-        function getQueryParam(param) {
-            const urlParams = new URLSearchParams(window.location.search);
-            return urlParams.get(param);
-        }
-
-        function filterCharts() {
-            const regexInput = document.getElementById('bench-filter').value;
-            const regex = new RegExp(regexInput, 'i');
-            const activeSuites = Array.from(document.querySelectorAll('.suite-checkbox:checked')).map(checkbox => checkbox.getAttribute('data-suite'));
-            const charts = document.querySelectorAll('.chart');
-
-            charts.forEach(chart => {
-                const label = chart.getAttribute('data-label');
-                const suite = chart.getAttribute('data-suite');
-                if (regex.test(label) && activeSuites.includes(suite)) {
-                    chart.style.display = '';
-                } else {
-                    chart.style.display = 'none';
-                }
-            });
-
-            updateURL();
-        }
-
-        function updateURL() {
-            const url = new URL(window.location);
-            const regex = document.getElementById('bench-filter').value;
-            const activeSuites = Array.from(document.querySelectorAll('.suite-checkbox:checked')).map(checkbox => checkbox.getAttribute('data-suite'));
-
-            if (regex) {
-                url.searchParams.set('regex', regex);
-            } else {
-                url.searchParams.delete('regex');
-            }
-
-            if (activeSuites.length > 0) {
-                url.searchParams.set('suites', activeSuites.join(','));
-            } else {
-                url.searchParams.delete('suites');
-            }
-
-            history.replaceState(null, '', url);
-        }
-
-        document.addEventListener('DOMContentLoaded', (event) => {
-            const regexParam = getQueryParam('regex');
-            const suitesParam = getQueryParam('suites');
-
-            if (regexParam) {
-                document.getElementById('bench-filter').value = regexParam;
-            }
-
-            const suiteCheckboxes = document.querySelectorAll('.suite-checkbox');
-            if (suitesParam) {
-                const suites = suitesParam.split(',');
-                suiteCheckboxes.forEach(checkbox => {
-                    if (suites.includes(checkbox.getAttribute('data-suite'))) {
-                        checkbox.checked = true;
-                    } else {
-                        checkbox.checked = false;
-                    }
-                });
-            } else {
-                suiteCheckboxes.forEach(checkbox => {
-                    checkbox.checked = true;
-                });
-            }
-            filterCharts();
-
-            suiteCheckboxes.forEach(checkbox => {
-                checkbox.addEventListener('change', () => {
-                    filterCharts();
-                });
-            });
-
-            document.getElementById('bench-filter').addEventListener('input', () => {
-                filterCharts();
-            });
-        });
-    </script>
-</head>
-<body>
-    <div class="container">
-        <h1>Benchmark Results</h1>
-        <div class="filter-container">
-            <input type="text" id="bench-filter" placeholder="Regex...">
-        </div>
-        <div class="suite-filter-container">
-            ${suite_checkboxes_html}
-        </div>
-        <details class="timeseries">
-            <summary>Historical Results</summary>
-            <div class="charts">
-                ${timeseries_charts_html}
-            </div>
-        </details>
-        <details class="bar-charts">
-            <summary>Comparisons</summary>
-            <div class="charts">
-                ${bar_charts_html}
-            </div>
-        </details>
-    </div>
-</body>
-</html>
diff --git a/devops/scripts/benchmarks/compare.py b/devops/scripts/benchmarks/compare.py
new file mode 100644
index 0000000000000..c44d06f718039
--- /dev/null
+++ b/devops/scripts/benchmarks/compare.py
@@ -0,0 +1,373 @@
+from utils.aggregate import Aggregator, SimpleMedian
+from utils.validate import Validate
+from utils.result import Result, BenchmarkRun
+from options import options
+
+import os
+import sys
+import json
+import argparse
+from datetime import datetime, timezone
+from pathlib import Path
+from dataclasses import dataclass, asdict
+
+
+@dataclass
+class BenchmarkHistoricAverage:
+    """Contains historic average information for 1 benchmark"""
+
+    # Name of benchmark as defined in Benchmark class definition
+    name: str
+
+    # Measure of central tendency used to compute "average"
+    average_type: str
+    # TODO replace this with Compare enum?
+    # However, compare enum's use in the history is ambiguous, perhaps a new enum
+    # should replace both
+
+    # Value recorded from the benchmark
+    value: float
+    # TODO "value" in compute_benchmark assumes median, what about tracking e.g.
+    # standard deviation through this process?
+
+    # Arguments used to call the benchmark executable.
+    #
+    # This exists to ensure benchmarks called using different arguments are not
+    # compared together.
+    command_args: set[str]
+    # TODO Ensure ONEAPI_DEVICE_SELECTOR? GPU name itself?
+
+
+class Compare:
+    """Class containing logic for comparisons between results"""
+
+    @staticmethod
+    def get_hist_avg(
+        result_name: str,
+        result_dir: str,
+        hostname: str,
+        cutoff: str,
+        aggregator: Aggregator = SimpleMedian,
+        exclude: list[str] = [],
+    ) -> dict[str, BenchmarkHistoricAverage]:
+        """
+        Create a historic average for results named result_name in result_dir
+        using the specified aggregator
+
+        Args:
+            result_name (str): Name of benchmarking result to obtain average for
+            result_dir (str): Path to folder containing benchmark results
+            cutoff (str): Timestamp in YYYYMMDD_HHMMSS of oldest results used in
+            average calcultaion
+            hostname (str): Hostname of machine on which results ran on
+            aggregator (Aggregator): The aggregator to use for calculating the
+            historic average
+            exclude (list[str]): List of filenames (only the stem) to exclude
+            from average calculation
+
+        Returns:
+            A dictionary mapping benchmark names to BenchmarkHistoricAverage
+            objects
+        """
+        if not Validate.timestamp(cutoff):
+            raise ValueError("Provided cutoff time is not a proper timestamp.")
+
+        def get_timestamp(f: str) -> str:
+            """Extract timestamp from result filename"""
+            return str(f)[-len("YYYYMMDD_HHMMSS.json") : -len(".json")]
+
+        def get_result_paths() -> list[str]:
+            """
+            Get a list of all results matching result_name in result_dir that is
+            newer than the timestamp specified by cutoff based off of filename.
+
+            This function assumes filenames of benchmark result files are
+            accurate; files returned by this function will be checked a second
+            time once their contents are actually loaded.
+            """
+            cache_dir = Path(f"{result_dir}")
+
+            # List is sorted by filename: given our timestamp format, the
+            # timestamps are sorted from oldest to newest
+            return sorted(
+                filter(
+                    lambda f: f.is_file()
+                    and Validate.timestamp(get_timestamp(f))
+                    and get_timestamp(f) > cutoff
+                    # Result file is not excluded
+                    and f.stem not in exclude,
+                    # Assumes format is <name>_YYYYMMDD_HHMMSS.json
+                    cache_dir.glob(f"{result_name}_*_*.json"),
+                )
+            )
+
+        def validate_benchmark_result(result: BenchmarkRun) -> bool:
+            """
+            Returns True if result file:
+            - Was ran on the target machine/hostname specified
+            - Sanity check: ensure metadata are all expected values:
+              - Date is truly before cutoff timestamp
+              - Name truly matches up with specified result_name
+            """
+            if result.hostname != hostname:
+                return False
+            if result.name != result_name:
+                print(
+                    f"Warning: Result file {result_path} does not match specified result name {result.name}."
+                )
+                return False
+            if result.date < datetime.strptime(cutoff, "%Y%m%d_%H%M%S").replace(
+                tzinfo=timezone.utc
+            ):
+                return False
+            return True
+
+        # key: name of the benchmark test result
+        # value: { command_args: set[str], aggregate: Aggregator }
+        #
+        # This is then used to build a dict[BenchmarkHistoricAverage] used
+        # to find historic averages.
+        average_aggregate: dict[str, dict] = dict()
+
+        for result_path in get_result_paths():
+            with result_path.open("r") as result_f:
+                result = BenchmarkRun.from_json(json.load(result_f))
+
+            # Perform another check on result file here, as get_result_paths()
+            # only filters out result files via filename, which:
+            # - does not contain enough information to filter out results, i.e.
+            #   no hostname information.
+            # - information in filename may be mismatched from metadata.
+            if not validate_benchmark_result(result):
+                continue
+
+            for test_run in result.results:
+
+                def reset_aggregate() -> dict:
+                    return {
+                        "command_args": set(test_run.command[1:]),
+                        "aggregate": aggregator(starting_elements=[test_run.value]),
+                    }
+
+                # Add every benchmark run to average_aggregate:
+                if test_run.name not in average_aggregate:
+                    average_aggregate[test_run.name] = reset_aggregate()
+                else:
+                    # Check that we are comparing runs with the same cmd args:
+                    if (
+                        set(test_run.command[1:])
+                        == average_aggregate[test_run.name]["command_args"]
+                    ):
+                        average_aggregate[test_run.name]["aggregate"].add(
+                            test_run.value
+                        )
+                    else:
+                        # If the command args used between runs are different,
+                        # discard old run data and prefer new command args
+                        #
+                        # This relies on the fact that paths from get_result_paths()
+                        # is sorted from older to newer
+                        print(
+                            f"Warning: Command args for {test_run.name} from {result_path} is different from prior runs."
+                        )
+                        print(
+                            "DISCARDING older data and OVERRIDING with data using new arg."
+                        )
+                        average_aggregate[test_run.name] = reset_aggregate()
+
+        return {
+            name: BenchmarkHistoricAverage(
+                name=name,
+                average_type=stats["aggregate"].get_type(),
+                value=stats["aggregate"].get_avg(),
+                command_args=stats["command_args"],
+            )
+            for name, stats in average_aggregate.items()
+        }
+
+    def to_hist_avg(
+        hist_avg: dict[str, BenchmarkHistoricAverage], target: BenchmarkRun
+    ) -> tuple:
+        """
+        Compare results in target to a pre-existing map of historic average.
+
+        Caution: Ensure the generated hist_avg is for results running on the
+        same host as target.hostname.
+
+        Args:
+            hist_avg (dict): A historic average map generated from get_hist_avg
+            target (BenchmarkRun): results to compare against hist_avg
+
+        Returns:
+            A tuple returning (list of improved tests, list of regressed tests).
+        """
+
+        def halfway_round(value: int, n: int):
+            """
+            Python's default round() does banker's rounding, which doesn't
+            make much sense here. This rounds 0.5 to 1, and -0.5 to -1
+            """
+            if value == 0:
+                return 0
+            return int(value * 10**n + 0.5 * (value / abs(value))) / 10**n
+
+        improvement = []
+        regression = []
+
+        for test in target.results:
+            if test.name not in hist_avg:
+                continue
+            if hist_avg[test.name].command_args != set(test.command[1:]):
+                print(f"Warning: skipped {test.name} due to command args mismatch.")
+                continue
+
+            delta = 1 - (
+                test.value / hist_avg[test.name].value
+                if test.lower_is_better
+                else hist_avg[test.name].value / test.value
+            )
+
+            def perf_diff_entry() -> dict:
+                res = asdict(test)
+                res["delta"] = delta
+                res["hist_avg"] = hist_avg[test.name].value
+                res["avg_type"] = hist_avg[test.name].average_type
+                return res
+
+            # Round to 2 decimal places: not going to fail a test on 0.001% over
+            # regression threshold
+            if halfway_round(delta, 2) > options.regression_threshold:
+                improvement.append(perf_diff_entry())
+            elif halfway_round(delta, 2) < -options.regression_threshold:
+                regression.append(perf_diff_entry())
+
+        return improvement, regression
+
+    def to_hist(
+        avg_type: str,
+        result_name: str,
+        compare_file: str,
+        result_dir: str,
+        cutoff: str,
+    ) -> tuple:
+        """
+        Pregenerate a historic average from results named result_name in
+        result_dir, and compares the results in compare_file to it
+
+        Args:
+            result_name (str): Save name of the result
+            compare_name (str): Result file name to compare historic average against
+            result_dir (str): Directory to look for results in
+            cutoff (str): Timestamp (in YYYYMMDD_HHMMSS) indicating the oldest
+            result included in the historic average calculation
+            avg_type (str): Type of "average" (measure of central tendency) to
+            use in historic "average" calculation
+
+        Returns:
+            A tuple returning (list of improved tests, list of regressed tests).
+            Each element in each list is a BenchmarkRun object with a hist_avg,
+            avg_type, and delta field added, indicating the historic average,
+            type of central tendency used for historic average, and the delta
+            from the average for this benchmark run.
+        """
+
+        if avg_type != "median":
+            print("Only median is currently supported: Refusing to continue.")
+            exit(1)
+
+        try:
+            with open(compare_file, "r") as compare_f:
+                compare_result = BenchmarkRun.from_json(json.load(compare_f))
+        except:
+            print(f"Unable to open {compare_file}.")
+            exit(1)
+
+        # Sanity checks:
+        if compare_result.hostname == "Unknown":
+            print(
+                "Hostname for results in {compare_file} unknown, unable to build a historic average: Refusing to continue."
+            )
+            exit(1)
+        if not Validate.timestamp(cutoff):
+            print("Invalid timestamp provided, please follow YYYYMMDD_HHMMSS.")
+            exit(1)
+
+        # Build historic average and compare results against historic average:
+        hist_avg = Compare.get_hist_avg(
+            result_name,
+            result_dir,
+            compare_result.hostname,
+            cutoff,
+            exclude=[Path(compare_file).stem],
+        )
+        return Compare.to_hist_avg(hist_avg, compare_result)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Compare benchmark results")
+    subparsers = parser.add_subparsers(dest="operation", required=True)
+    parser_avg = subparsers.add_parser(
+        "to_hist", help="Compare a benchmark result to historic average"
+    )
+    parser_avg.add_argument(
+        "--avg-type",
+        type=str,
+        help="Measure of central tendency to use when computing historic average",
+        default="median",
+    )
+    parser_avg.add_argument(
+        "--name",
+        type=str,
+        required=True,
+        help="Save name of the benchmark results to compare to",
+    )
+    parser_avg.add_argument(
+        "--compare-file",
+        type=str,
+        required=True,
+        help="Result file to compare against te historic average",
+    )
+    parser_avg.add_argument(
+        "--results-dir", type=str, required=True, help="Directory storing results"
+    )
+    parser_avg.add_argument(
+        "--cutoff",
+        type=str,
+        help="Timestamp (in YYYYMMDD_HHMMSS) of oldest result to include in historic average calculation",
+        default="20000101_010101",
+    )
+
+    args = parser.parse_args()
+
+    if args.operation == "to_hist":
+        if args.avg_type != "median":
+            print("Only median is currently supported: exiting.")
+            exit(1)
+        if not Validate.timestamp(args.cutoff):
+            raise ValueError("Timestamp must be provided as YYYYMMDD_HHMMSS.")
+
+        improvements, regressions = Compare.to_hist(
+            "median", args.name, args.compare_file, args.results_dir, args.cutoff
+        )
+
+        def print_regression(entry: dict):
+            """Print an entry outputted from Compare.to_hist"""
+            print(f"Test: {entry['name']}")
+            print(f"-- Historic {entry['avg_type']}: {entry['hist_avg']}")
+            print(f"-- Run result: {test['value']}")
+            print(f"-- Delta: {test['delta']}")
+            print("")
+
+        if improvements:
+            print("#\n# Improvements:\n#\n")
+            for test in improvements:
+                print_regression(test)
+        if regressions:
+            print("#\n# Regressions:\n#\n")
+            for test in regressions:
+                print_regression(test)
+            exit(1)  # Exit 1 to trigger github test failure
+        print("\nNo regressions found!")
+    else:
+        print("Unsupported operation: exiting.")
+        exit(1)
diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
index c31b800b1017a..30dc607aa54a6 100644
--- a/devops/scripts/benchmarks/history.py
+++ b/devops/scripts/benchmarks/history.py
@@ -11,6 +11,7 @@
 from options import Compare, options
 from datetime import datetime, timezone
 from utils.utils import run
+from utils.validate import Validate
 
 
 class BenchmarkHistory:
@@ -30,7 +31,10 @@ def load_result(self, file_path: Path) -> BenchmarkRun:
     def load(self, n: int):
         results_dir = Path(self.dir) / "results"
         if not results_dir.exists() or not results_dir.is_dir():
-            return []
+            print(
+                f"Warning: {results_dir} is not a valid directory: no historic results loaded."
+            )
+            return
 
         # Get all JSON files in the results directory
         benchmark_files = list(results_dir.glob("*.json"))
@@ -38,7 +42,9 @@ def load(self, n: int):
         # Extract timestamp and sort files by it
         def extract_timestamp(file_path: Path) -> str:
             try:
-                return file_path.stem.split("_")[-1]
+                # Assumes results are stored as <name>_YYYYMMDD_HHMMSS.json
+                ts = file_path.stem[-len("YYYYMMDD_HHMMSS") :]
+                return ts if Validate.timestamp(ts) else ""
             except IndexError:
                 return ""
 
@@ -54,28 +60,65 @@ def extract_timestamp(file_path: Path) -> str:
         self.runs = benchmark_runs
 
     def create_run(self, name: str, results: list[Result]) -> BenchmarkRun:
-        try:
-            script_dir = os.path.dirname(os.path.abspath(__file__))
-            result = run("git rev-parse --short HEAD", cwd=script_dir)
-            git_hash = result.stdout.decode().strip()
-
-            # Get the GitHub repo URL from git remote
-            remote_result = run("git remote get-url origin", cwd=script_dir)
-            remote_url = remote_result.stdout.decode().strip()
-
-            # Convert SSH or HTTPS URL to owner/repo format
-            if remote_url.startswith("git@github.com:"):
-                # SSH format: git@github.com:owner/repo.git
-                github_repo = remote_url.split("git@github.com:")[1].rstrip(".git")
-            elif remote_url.startswith("https://github.com/"):
-                # HTTPS format: https://github.com/owner/repo.git
-                github_repo = remote_url.split("https://github.com/")[1].rstrip(".git")
-            else:
+
+        def git_info_from_path(path: Path) -> (str, str):
+            """
+            Derives git repo, commit information from git repo located in path.
+
+            Returns:
+                (str, str): git_hash, github_repo
+            """
+            try:
+                result = run("git rev-parse --short HEAD", cwd=path)
+                git_hash = result.stdout.decode().strip()
+
+                # Get the GitHub repo URL from git remote
+                remote_result = run("git remote get-url origin", cwd=path)
+                remote_url = remote_result.stdout.decode().strip()
+
+                # Convert SSH or HTTPS URL to owner/repo format
+                if remote_url.startswith("git@github.com:"):
+                    # SSH format: git@github.com:owner/repo.git
+                    github_repo = remote_url.split("git@github.com:")[1].rstrip(".git")
+                elif remote_url.startswith("https://github.com/"):
+                    # HTTPS format: https://github.com/owner/repo.git
+                    github_repo = remote_url.split("https://github.com/")[1].rstrip(
+                        ".git"
+                    )
+                else:
+                    github_repo = None
+
+            except:
+                git_hash = "unknown"
                 github_repo = None
 
-        except:
-            git_hash = "unknown"
-            github_repo = None
+            return git_hash, github_repo
+
+        if options.git_commit_override is None or options.github_repo_override is None:
+            git_hash, github_repo = git_info_from_path(
+                os.path.dirname(os.path.abspath(__file__))
+            )
+        else:
+            git_hash, github_repo = (
+                options.git_commit_override,
+                options.github_repo_override,
+            )
+
+        # Check if RUNNER_NAME environment variable has been declared.
+        #
+        # Github runners obfusicate hostnames, thus running socket.gethostname()
+        # twice produces two different hostnames. Since github runners always
+        # define a RUNNER_NAME variable, use RUNNER_NAME instead if it exists:
+        hostname = os.getenv("RUNNER_NAME")
+        if hostname is None:
+            hostname = socket.gethostname()
+        else:
+            # Ensure RUNNER_NAME has not been tampered with:
+            # TODO is this overkill?
+            Validate.runner_name(
+                hostname,
+                throw=ValueError("Illegal characters found in specified RUNNER_NAME."),
+            )
 
         compute_runtime = (
             options.compute_runtime_tag if options.build_compute_runtime else None
@@ -87,7 +130,7 @@ def create_run(self, name: str, results: list[Result]) -> BenchmarkRun:
             github_repo=github_repo,
             date=datetime.now(tz=timezone.utc),
             results=results,
-            hostname=socket.gethostname(),
+            hostname=hostname,
             compute_runtime=compute_runtime,
         )
 
@@ -103,7 +146,11 @@ def save(self, save_name, results: list[Result], to_file=True):
         os.makedirs(results_dir, exist_ok=True)
 
         # Use formatted timestamp for the filename
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        timestamp = (
+            datetime.now(tz=timezone.utc).strftime("%Y%m%d_%H%M%S")
+            if options.timestamp_override is None
+            else options.timestamp_override
+        )
         file_path = Path(os.path.join(results_dir, f"{save_name}_{timestamp}.json"))
         with file_path.open("w") as file:
             json.dump(serialized, file, indent=4)
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 41dd8624210bb..397632e138978 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -17,6 +17,7 @@
 from history import BenchmarkHistory
 from utils.utils import prepare_workdir
 from utils.compute_runtime import *
+from utils.validate import Validate
 from presets import enabled_suites, presets
 
 import argparse
@@ -258,8 +259,8 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
         chart_data = {this_name: results}
 
     results_dir = directory
-    if options.custom_results_dir:
-        results_dir = Path(options.custom_results_dir)
+    if options.results_directory_override:
+        results_dir = Path(options.results_directory_override)
     history = BenchmarkHistory(results_dir)
     # limit how many files we load.
     # should this be configurable?
@@ -475,12 +476,6 @@ def validate_and_parse_env_args(env_args):
         help="Benchmark preset to run",
         default=options.preset,
     )
-    parser.add_argument(
-        "--results-dir",
-        type=str,
-        help="Specify a custom directory to load/store (historical) results from",
-        default=options.custom_results_dir,
-    )
     parser.add_argument(
         "--build-jobs",
         type=int,
@@ -494,6 +489,47 @@ def validate_and_parse_env_args(env_args):
         default=None,
     )
 
+    # Options intended for CI:
+    parser.add_argument(
+        "--results-dir",
+        type=str,
+        help="Specify a custom directory to load/store (historical) results from",
+        default=options.results_directory_override,
+    )
+    parser.add_argument(
+        "--timestamp-override",
+        type=lambda ts: Validate.timestamp(
+            ts,
+            throw=argparse.ArgumentTypeError(
+                "Specified timestamp not in YYYYMMDD_HHMMSS format."
+            ),
+        ),
+        help="Manually specify timestamp used in metadata",
+        default=options.timestamp_override,
+    )
+    parser.add_argument(
+        "--github-repo",
+        type=lambda gh_repo: Validate.github_repo(
+            gh_repo,
+            throw=argparse.ArgumentTypeError(
+                "Specified github repo not in <owner>/<repo> format."
+            ),
+        ),
+        help="Manually specify github repo metadata of component tested (e.g. SYCL, UMF)",
+        default=options.github_repo_override,
+    )
+    parser.add_argument(
+        "--git-commit",
+        type=lambda commit: Validate.commit_hash(
+            commit,
+            throw=argparse.ArgumentTypeError(
+                "Specified commit is not a valid commit hash."
+            ),
+        ),
+        help="Manually specify commit hash metadata of component tested (e.g. SYCL, UMF)",
+        default=options.git_commit_override,
+    )
+
     args = parser.parse_args()
     additional_env_vars = validate_and_parse_env_args(args.env)
 
@@ -519,7 +555,7 @@ def validate_and_parse_env_args(env_args):
     options.cudnn_directory = args.cudnn_directory
     options.cublas_directory = args.cublas_directory
     options.preset = args.preset
-    options.custom_results_dir = args.results_dir
+    options.results_directory_override = args.results_dir
     options.build_jobs = args.build_jobs
     options.hip_arch = args.hip_arch
 
@@ -532,10 +568,18 @@ def validate_and_parse_env_args(env_args):
         if not os.path.isdir(args.output_dir):
             parser.error("Specified --output-dir is not a valid path")
         options.output_directory = os.path.abspath(args.output_dir)
+
+    # Options intended for CI:
+    options.timestamp_override = args.timestamp_override
     if args.results_dir is not None:
         if not os.path.isdir(args.results_dir):
             parser.error("Specified --results-dir is not a valid path")
-        options.custom_results_dir = os.path.abspath(args.results_dir)
+        options.results_directory_override = os.path.abspath(args.results_dir)
+    if args.github_repo is not None or args.git_commit is not None:
+        if args.github_repo is None or args.git_commit is None:
+            parser.error("--github-repo and --git_commit must both be defined together")
+        options.github_repo_override = args.github_repo
+        options.git_commit_override = args.git_commit
 
     benchmark_filter = re.compile(args.filter) if args.filter else None
 
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index c852e50c71372..b96801de0cb06 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -44,8 +44,25 @@ class Options:
     build_igc: bool = False
     current_run_name: str = "This PR"
     preset: str = "Full"
-    custom_results_dir = None
     build_jobs: int = multiprocessing.cpu_count()
 
+    # Options intended for CI:
+    regression_threshold: float = 0.05
+    # It's necessary in CI to compare or redo benchmark runs. Instead of
+    # generating a new timestamp each run by default, specify a single timestamp
+    # to use across the entire CI run.
+    timestamp_override: str = None
+    # The default directory to fetch results from is args.benchmark_directory,
+    # hence a default value of "None" as the value is decided during runtime.
+    #
+    # However, sometimes you may want to fetch results from a different
+    # directory, i.e. in CI when you clone the results directory elsewhere.
+    results_directory_override: str = None
+    # By default, we fetch SYCL commit info from the folder where main.py is
+    # located. This doesn't work right when CI uses different commits for e.g.
+    # CI scripts vs SYCl build source.
+    github_repo_override: str = None
+    git_commit_override: str = None
+
 
 options = Options()
diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py
index 3f191766deb8c..fc7e1ffb59f3d 100644
--- a/devops/scripts/benchmarks/presets.py
+++ b/devops/scripts/benchmarks/presets.py
@@ -3,6 +3,8 @@
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+import argparse
+
 presets: dict[str, list[str]] = {
     "Full": [
         "Compute Benchmarks",
@@ -36,3 +38,39 @@ def enabled_suites(preset: str) -> list[str]:
         return presets[preset]
     except KeyError:
         raise ValueError(f"Preset '{preset}' not found.")
+
+
+# Utility scripts to validate a given preset, useful for e.g. CI:
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Benchmark Preset Utilities")
+    subparsers = parser.add_subparsers(dest="command", required=True)
+
+    query_parser = subparsers.add_parser(
+        "query", help="Query benchmarks ran by a preset (as defined in presets.py)"
+    )
+    query_parser.add_argument("preset_to_query", type=str, help="preset name to query")
+    query_parser.add_argument(
+        "-q",
+        "--quiet",
+        action="store_true",
+        help="Disable stdout messages: Useful if you want to check if a preset exists within a shell script.",
+    )
+
+    args = parser.parse_args()
+    if args.command == "query":
+        if args.preset_to_query in presets:
+            if not args.quiet:
+                print(f"Benchmark suites to be ran in {args.preset_to_query}:")
+                for suite in presets[args.preset_to_query]:
+                    print(suite)
+            exit(0)
+        else:
+            if not args.quiet:
+                print(f"Error: No preset named '{args.preset_to_query}'.")
+            exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/devops/scripts/benchmarks/utils/aggregate.py b/devops/scripts/benchmarks/utils/aggregate.py
new file mode 100644
index 0000000000000..36ee7cbecaae6
--- /dev/null
+++ b/devops/scripts/benchmarks/utils/aggregate.py
@@ -0,0 +1,53 @@
+import statistics
+from abc import ABC, abstractmethod
+
+
+class Aggregator(ABC):
+    """
+    Aggregator classes used to "aggregate" a pool of elements, and produce an
+    "average" (precisely, some "measure of central tendency") from the elements.
+    """
+
+    @staticmethod
+    @abstractmethod
+    def get_type() -> str:
+        """
+        Return a string indicating the type of average this aggregator
+        produces.
+        """
+        pass
+
+    @abstractmethod
+    def add(self, n: float):
+        """
+        Add/aggregate an element to the pool of elements used by this aggregator
+        to produce an average calculation.
+        """
+        pass
+
+    @abstractmethod
+    def get_avg(self) -> float:
+        """
+        Produce an average from the pool of elements aggregated using add().
+        """
+        pass
+
+
+class SimpleMedian(Aggregator):
+    """
+    Simple median calculation: if the number of samples being generated are low,
+    this is the fastest median method.
+    """
+
+    def __init__(self, starting_elements: list = []):
+        self.elements = starting_elements
+
+    @staticmethod
+    def get_type() -> str:
+        return "median"
+
+    def add(self, n: float):
+        self.elements.append(n)
+
+    def get_avg(self) -> float:
+        return statistics.median(self.elements)
diff --git a/devops/scripts/benchmarks/utils/validate.py b/devops/scripts/benchmarks/utils/validate.py
new file mode 100644
index 0000000000000..b0a2658865562
--- /dev/null
+++ b/devops/scripts/benchmarks/utils/validate.py
@@ -0,0 +1,72 @@
+import re
+
+
+def validate_on_re(val: str, regex: re.Pattern, throw: Exception = None):
+    """
+    Returns True if val is matched by pattern defined by regex, otherwise False.
+
+    If `throw` argument is not None: return val as-is if val matches regex,
+    otherwise raise error defined by throw.
+    """
+    is_matching: bool = re.compile(regex).match(val) is not None
+
+    if throw is None:
+        return is_matching
+    elif not is_matching:
+        raise throw
+    else:
+        return val
+
+
+class Validate:
+    """Static class containing methods for validating various fields"""
+
+    @staticmethod
+    def runner_name(runner_name: str, throw: Exception = None):
+        """
+        Returns True if runner_name is clean (no illegal characters).
+        """
+        return validate_on_re(runner_name, r"^[a-zA-Z0-9_]+$", throw=throw)
+
+    @staticmethod
+    def timestamp(t: str, throw: Exception = None):
+        """
+        Returns True if t is in form YYYYMMDD_HHMMSS, otherwise False.
+
+        If throw argument is specified: return t as-is if t is in aforementioned
+        format, otherwise raise error defined by throw.
+        """
+        return validate_on_re(
+            t,
+            r"^\d{4}(0[1-9]|1[0-2])([0-2][0-9]|3[01])_([01][0-9]|2[0-3])[0-5][0-9][0-5][0-9]$",
+            throw=throw,
+        )
+
+    @staticmethod
+    def github_repo(repo: str, throw: Exception = None):
+        """
+        Returns True if repo is of form <owner>/<repo name>
+
+        If throw argument is specified: return repo as-is if repo is in
+        aforementioned format, otherwise raise error defined by throw.
+        """
+        return validate_on_re(
+            re.sub(r"^https?://github.com/", "", repo),
+            r"^[a-zA-Z0-9_-]{1,39}/[a-zA-Z0-9_.-]{1,100}$",
+            throw=throw,
+        )
+
+    @staticmethod
+    def commit_hash(commit: str, throw: Exception = None, trunc: int = 40):
+        """
+        Returns True if commit is a valid git commit hash.
+
+        If throw argument is specified: return commit hash (truncated to trunc
+        chars long) if commit is a valid commit hash, otherwise raise error
+        defined by throw.
+        """
+        commit_re = r"^[a-f0-9]{7,40}$"
+        if throw is None:
+            return validate_on_re(commit, commit_re)
+        else:
+            return validate_on_re(commit, commit_re, throw=throw)[:trunc]