diff --git a/.github/workflows/sycl-docs.yml b/.github/workflows/sycl-docs.yml
index 5c1e8e425111b..fff88a7ffa5f1 100644
--- a/.github/workflows/sycl-docs.yml
+++ b/.github/workflows/sycl-docs.yml
@@ -47,8 +47,10 @@ jobs:
mkdir $GITHUB_WORKSPACE/install_docs
cd $GITHUB_WORKSPACE/install_docs
mkdir clang
+ mkdir benchmarks
mv $GITHUB_WORKSPACE/build/tools/sycl/doc/html/* .
mv $GITHUB_WORKSPACE/build/tools/clang/docs/html/* clang/
+ cp -r $GITHUB_WORKSPACE/devops/scripts/benchmarks/html benchmarks/
touch .nojekyll
# Upload the generated docs as an artifact and deploy to GitHub Pages.
- name: Upload artifact
diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml
index caec0b79c99e8..26e586410fd75 100644
--- a/.github/workflows/sycl-linux-run-tests.yml
+++ b/.github/workflows/sycl-linux-run-tests.yml
@@ -126,6 +126,7 @@ on:
- '["cts-cpu"]'
- '["Linux", "build"]'
- '["cuda"]'
+ - '["Linux", "bmg"]'
- '["PVC_PERF"]'
image:
type: choice
@@ -154,6 +155,7 @@ on:
- e2e
- cts
- compute-benchmarks
+ - benchmark-v2
env:
description: |
@@ -329,3 +331,12 @@ jobs:
env:
RUNNER_TAG: ${{ inputs.runner }}
GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
+
+ - name: Run Benchmarks
+ if: inputs.tests_selector == 'benchmark-v2'
+ uses: ./devops/actions/run-tests/benchmark_v2
+ with:
+ target_devices: ${{ inputs.target_devices }}
+ env:
+ RUNNER_TAG: ${{ inputs.runner }}
+ GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
\ No newline at end of file
diff --git a/.github/workflows/ur-build-hw.yml b/.github/workflows/ur-build-hw.yml
index 7c912442d6363..081ab76b739ce 100644
--- a/.github/workflows/ur-build-hw.yml
+++ b/.github/workflows/ur-build-hw.yml
@@ -151,4 +151,4 @@ jobs:
- name: Get information about platform
if: ${{ always() }}
- run: ${{github.workspace}}/unified-runtime/.github/scripts/get_system_info.sh
+ run: ${{github.workspace}}/devops/scripts/get_system_info.sh
diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index e357e2bddec30..7f69fdf832982 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -46,27 +46,6 @@ runs:
echo "# This workflow is not guaranteed to work with other backends."
echo "#" ;;
esac
- - name: Compute CPU core range to run benchmarks on
- shell: bash
- run: |
- # Taken from ur-benchmark-reusable.yml:
-
- # Compute the core range for the first NUMA node; second node is used by
- # UMF. Skip the first 4 cores as the kernel is likely to schedule more
- # work on these.
- CORES="$(lscpu | awk '
- /NUMA node0 CPU|On-line CPU/ {line=$0}
- END {
- split(line, a, " ")
- split(a[4], b, ",")
- sub(/^0/, "4", b[1])
- print b[1]
- }')"
- echo "CPU core range to use: $CORES"
- echo "CORES=$CORES" >> $GITHUB_ENV
-
- ZE_AFFINITY_MASK=0
- echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV
- name: Run compute-benchmarks
shell: bash
run: |
@@ -90,7 +69,7 @@ runs:
echo "-----"
sycl-ls
echo "-----"
- taskset -c "$CORES" ./devops/scripts/benchmarking/benchmark.sh -n '${{ runner.name }}' -s || exit 1
+ ./devops/scripts/benchmarking/benchmark.sh -n '${{ runner.name }}' -s || exit 1
- name: Push compute-benchmarks results
if: always()
shell: bash
diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml
new file mode 100644
index 0000000000000..2ec31bed20f69
--- /dev/null
+++ b/devops/actions/run-tests/benchmark_v2/action.yml
@@ -0,0 +1,134 @@
+name: 'Run Benchmarks'
+
+# This action assumes the following prerequisites:
+#
+# - SYCL is placed in ./toolchain -- TODO change this
+# - /devops has been checked out in ./devops.
+# - env.GITHUB_TOKEN was properly set, because according to Github, that's
+# apparently the recommended way to pass a secret into a github action:
+
+# https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions#accessing-your-secrets
+#
+# - env.RUNNER_TAG set to the runner tag used to run this workflow: Currently,
+# only specific runners are fully supported.
+
+inputs:
+ target_devices:
+ type: string
+ required: True
+
+runs:
+ using: "composite"
+ steps:
+ - name: Check specified runner type / target backend
+ shell: bash
+ env:
+ TARGET_DEVICE: ${{ inputs.target_devices }}
+ RUNNER_NAME: ${{ runner.name }}
+ run: |
+ case "$RUNNER_TAG" in
+ '["PVC_PERF"]' ) ;;
+ *)
+ echo "#"
+ echo "# WARNING: Only specific tuned runners are fully supported."
+ echo "# This workflow is not guaranteed to work with other runners."
+ echo "#" ;;
+ esac
+
+ # Ensure runner name has nothing injected
+ # TODO: in terms of security, is this overkill?
+ if [ -z "$(printf '%s' "$RUNNER_NAME" | grep -oE '^[a-zA-Z0-9_-]+$')" ]; then
+ echo "Bad runner name, please ensure runner name is [a-zA-Z0-9_-]."
+ exit 1
+ fi
+ echo "RUNNER_NAME=$RUNNER_NAME" >> $GITHUB_ENV
+
+ # input.target_devices is not directly used, as this allows code injection
+ case "$TARGET_DEVICE" in
+ level_zero:*) ;;
+ *)
+ echo "#"
+ echo "# WARNING: Only level_zero backend is fully supported."
+ echo "# This workflow is not guaranteed to work with other backends."
+ echo "#" ;;
+ esac
+ echo "ONEAPI_DEVICE_SELECTOR=$TARGET_DEVICE" >> $GITHUB_ENV
+
+ - name: Compute CPU core range to run benchmarks on
+ shell: bash
+ run: |
+ # Compute the core range for the first NUMA node; second node is used by
+ # UMF. Skip the first 4 cores as the kernel is likely to schedule more
+ # work on these.
+ CORES="$(lscpu | awk '
+ /NUMA node0 CPU|On-line CPU/ {line=$0}
+ END {
+ split(line, a, " ")
+ split(a[4], b, ",")
+ sub(/^0/, "4", b[1])
+ print b[1]
+ }')"
+ echo "CPU core range to use: $CORES"
+ echo "CORES=$CORES" >> $GITHUB_ENV
+
+ ZE_AFFINITY_MASK=0
+ echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV
+ - name: Checkout results repo
+ shell: bash
+ run: |
+ git clone -b unify-ci https://github.com/intel/llvm-ci-perf-results
+ - name: Run compute-benchmarks
+ shell: bash
+ run: |
+ # TODO generate summary + display helpful message here
+ export CMPLR_ROOT=./toolchain
+ echo "-----"
+ sycl-ls
+ echo "-----"
+ pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt
+ echo "-----"
+ mkdir -p "./llvm-ci-perf-results/$RUNNER_NAME"
+ taskset -c "$CORES" ./devops/scripts/benchmarks/main.py \
+ "$(realpath ./llvm_test_workdir)" \
+ --sycl "$(realpath ./toolchain)" \
+ --save baseline \
+ --output-html remote \
+ --results-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
+ --output-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
+ --preset Minimal
+ echo "-----"
+ ls
+ - name: Push compute-benchmarks results
+ if: always()
+ shell: bash
+ run: |
+ # TODO redo configuration
+ # $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
+
+ cd "./llvm-ci-perf-results"
+ git config user.name "SYCL Benchmarking Bot"
+ git config user.email "sys_sycl_benchmarks@intel.com"
+ git pull
+ git add .
+ # Make sure changes have been made
+ if git diff --quiet && git diff --cached --quiet; then
+ echo "No new results added, skipping push."
+ else
+ git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}"
+ git push "https://$GITHUB_TOKEN@github.com/intel/llvm-ci-perf-results.git" unify-ci
+ fi
+# - name: Find benchmark result artifact here
+# if: always()
+# shell: bash
+# run: |
+# cat << EOF
+# #
+# # Artifact link for benchmark results here:
+# #
+# EOF
+# - name: Archive compute-benchmark results
+# if: always()
+# uses: actions/upload-artifact@v4
+# with:
+# name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }})
+# path: ./artifact
diff --git a/unified-runtime/scripts/benchmarks/README.md b/devops/scripts/benchmarks/README.md
similarity index 100%
rename from unified-runtime/scripts/benchmarks/README.md
rename to devops/scripts/benchmarks/README.md
diff --git a/unified-runtime/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py
similarity index 88%
rename from unified-runtime/scripts/benchmarks/benches/base.py
rename to devops/scripts/benchmarks/benches/base.py
index d1bb5fb53b83a..77365220dbf85 100644
--- a/unified-runtime/scripts/benchmarks/benches/base.py
+++ b/devops/scripts/benchmarks/benches/base.py
@@ -6,7 +6,7 @@
import os
import shutil
from pathlib import Path
-from .result import Result
+from utils.result import Result
from options import options
from utils.utils import download, run
import urllib.request
@@ -55,16 +55,25 @@ def create_data_path(self, name, skip_data_dir=False):
data_path = os.path.join(self.directory, name)
else:
data_path = os.path.join(self.directory, "data", name)
- if options.rebuild and Path(data_path).exists():
+ if options.redownload and Path(data_path).exists():
shutil.rmtree(data_path)
Path(data_path).mkdir(parents=True, exist_ok=True)
return data_path
- def download(self, name, url, file, untar=False, unzip=False, skip_data_dir=False):
+ def download(
+ self,
+ name,
+ url,
+ file,
+ untar=False,
+ unzip=False,
+ skip_data_dir=False,
+ checksum="",
+ ):
self.data_path = self.create_data_path(name, skip_data_dir)
- return download(self.data_path, url, file, untar, unzip)
+ return download(self.data_path, url, file, untar, unzip, checksum)
def name(self):
raise NotImplementedError()
diff --git a/unified-runtime/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
similarity index 70%
rename from unified-runtime/scripts/benchmarks/benches/compute.py
rename to devops/scripts/benchmarks/benches/compute.py
index 25de9f32b4122..1f335cd8838ec 100644
--- a/unified-runtime/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -8,10 +8,11 @@
import io
from utils.utils import run, git_clone, create_build_path
from .base import Benchmark, Suite
-from .result import Result
+from utils.result import Result
from options import options
from enum import Enum
+
class ComputeBench(Suite):
def __init__(self, directory):
self.directory = directory
@@ -47,9 +48,8 @@ def setup(self):
f"-Dunified-runtime_DIR={options.ur}/lib/cmake/unified-runtime",
]
- print(f"{self.__class__.__name__}: Run {configure_command}")
run(configure_command, add_sycl=True)
- print(f"{self.__class__.__name__}: Run cmake --build {build_path} -j")
+
run(f"cmake --build {build_path} -j", add_sycl=True)
self.built = True
@@ -73,16 +73,6 @@ def benchmarks(self) -> list[Benchmark]:
ExecImmediateCopyQueue(self, 0, 1, "Device", "Device", 1024),
ExecImmediateCopyQueue(self, 1, 1, "Device", "Host", 1024),
VectorSum(self),
- MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1),
- MemcpyExecute(self, 100, 8, 102400, 10, 1, 1, 1),
- MemcpyExecute(self, 400, 8, 1024, 1000, 1, 1, 1),
- MemcpyExecute(self, 10, 16, 1024, 10000, 1, 1, 1),
- MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1),
- MemcpyExecute(self, 100, 8, 102400, 10, 0, 1, 1),
- MemcpyExecute(self, 400, 8, 1024, 1000, 0, 1, 1),
- MemcpyExecute(self, 10, 16, 1024, 10000, 0, 1, 1),
- MemcpyExecute(self, 4096, 1, 1024, 10, 0, 1, 0),
- MemcpyExecute(self, 4096, 4, 1024, 10, 0, 1, 0),
GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 0, 5),
GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 1, 5),
GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 0, 100),
@@ -91,6 +81,10 @@ def benchmarks(self) -> list[Benchmark]:
GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 1, 5),
GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 0, 100),
GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 1, 100),
+ UllsEmptyKernel(self, RUNTIMES.SYCL, 1000, 256),
+ UllsEmptyKernel(self, RUNTIMES.LEVEL_ZERO, 1000, 256),
+ UllsKernelSwitch(self, RUNTIMES.SYCL, 8, 200, 0, 0, 1, 1),
+ UllsKernelSwitch(self, RUNTIMES.LEVEL_ZERO, 8, 200, 0, 0, 1, 1),
]
if options.ur is not None:
@@ -98,6 +92,16 @@ def benchmarks(self) -> list[Benchmark]:
SubmitKernelUR(self, 0, 0),
SubmitKernelUR(self, 1, 0),
SubmitKernelUR(self, 1, 1),
+ MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1),
+ MemcpyExecute(self, 100, 8, 102400, 10, 1, 1, 1),
+ MemcpyExecute(self, 400, 8, 1024, 1000, 1, 1, 1),
+ MemcpyExecute(self, 10, 16, 1024, 10000, 1, 1, 1),
+ MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1),
+ MemcpyExecute(self, 100, 8, 102400, 10, 0, 1, 1),
+ MemcpyExecute(self, 400, 8, 1024, 1000, 0, 1, 1),
+ MemcpyExecute(self, 10, 16, 1024, 10000, 0, 1, 1),
+ MemcpyExecute(self, 4096, 1, 1024, 10, 0, 1, 0),
+ MemcpyExecute(self, 4096, 4, 1024, 10, 0, 1, 0),
GraphApiSinKernelGraph(self, RUNTIMES.UR, 0, 5),
GraphApiSinKernelGraph(self, RUNTIMES.UR, 1, 5),
GraphApiSinKernelGraph(self, RUNTIMES.UR, 0, 100),
@@ -136,6 +140,9 @@ def setup(self):
def explicit_group(self):
return ""
+ def description(self) -> str:
+ return ""
+
def run(self, env_vars) -> list[Result]:
command = [
f"{self.benchmark_bin}",
@@ -167,6 +174,7 @@ def run(self, env_vars) -> list[Result]:
env=env_vars,
stdout=result,
unit=parse_unit_type(unit),
+ description=self.description(),
)
)
return ret
@@ -221,6 +229,13 @@ def bin_args(self) -> list[str]:
"--KernelExecTime=1",
]
+ def description(self) -> str:
+ order = "in-order" if self.ioq else "out-of-order"
+ return (
+ f"Measures CPU time overhead of submitting {order} kernels through SYCL API."
+ "Uses 10 simple kernels with minimal execution time to isolate API overhead from kernel execution time."
+ )
+
class SubmitKernelUR(ComputeBenchmark):
def __init__(self, bench, ioq, measureCompletion):
@@ -237,6 +252,15 @@ def name(self):
def explicit_group(self):
return "SubmitKernel"
+ def description(self) -> str:
+ order = "in-order" if self.ioq else "out-of-order"
+ completion = "including" if self.measureCompletion else "excluding"
+ return (
+ f"Measures CPU time overhead of submitting {order} kernels through Unified Runtime API, "
+ f"{completion} kernel completion time. Uses 10 simple kernels with minimal execution time "
+ f"to isolate API overhead."
+ )
+
def bin_args(self) -> list[str]:
return [
f"--Ioq={self.ioq}",
@@ -261,6 +285,14 @@ def name(self):
def explicit_group(self):
return "SubmitKernel"
+ def description(self) -> str:
+ order = "in-order" if self.ioq else "out-of-order"
+ return (
+ f"Measures CPU time overhead of submitting {order} kernels through Level Zero API. "
+ f"Uses immediate command lists with 10 minimal kernels to isolate submission overhead "
+ f"from execution time."
+ )
+
def bin_args(self) -> list[str]:
return [
f"--Ioq={self.ioq}",
@@ -286,6 +318,14 @@ def name(self):
order = "in order" if self.ioq else "out of order"
return f"api_overhead_benchmark_sycl ExecImmediateCopyQueue {order} from {self.source} to {self.destination}, size {self.size}"
+ def description(self) -> str:
+ order = "in-order" if self.ioq else "out-of-order"
+ operation = "copy-only" if self.isCopyOnly else "copy and command submission"
+ return (
+ f"Measures SYCL {order} queue overhead for {operation} from {self.source} to "
+ f"{self.destination} memory with {self.size} bytes. Tests immediate execution overheads."
+ )
+
def bin_args(self) -> list[str]:
return [
"--iterations=100000",
@@ -309,6 +349,13 @@ def __init__(self, bench, isCopyOnly, source, destination, size):
def name(self):
return f"memory_benchmark_sycl QueueInOrderMemcpy from {self.source} to {self.destination}, size {self.size}"
+ def description(self) -> str:
+ operation = "copy-only" if self.isCopyOnly else "copy and command submission"
+ return (
+ f"Measures SYCL in-order queue memory copy performance for {operation} from "
+ f"{self.source} to {self.destination} with {self.size} bytes, executed 100 times per iteration."
+ )
+
def bin_args(self) -> list[str]:
return [
"--iterations=10000",
@@ -330,6 +377,12 @@ def __init__(self, bench, source, destination, size):
def name(self):
return f"memory_benchmark_sycl QueueMemcpy from {self.source} to {self.destination}, size {self.size}"
+ def description(self) -> str:
+ return (
+ f"Measures general SYCL queue memory copy performance from {self.source} to "
+ f"{self.destination} with {self.size} bytes per operation."
+ )
+
def bin_args(self) -> list[str]:
return [
"--iterations=10000",
@@ -349,6 +402,12 @@ def __init__(self, bench, type, size, placement):
def name(self):
return f"memory_benchmark_sycl StreamMemory, placement {self.placement}, type {self.type}, size {self.size}"
+ def description(self) -> str:
+ return (
+ f"Measures {self.placement} memory bandwidth using {self.type} pattern with "
+ f"{self.size} bytes. Higher values (GB/s) indicate better performance."
+ )
+
# measurement is in GB/s
def lower_is_better(self):
return False
@@ -362,6 +421,7 @@ def bin_args(self) -> list[str]:
"--useEvents=0",
"--contents=Zeros",
"--multiplier=1",
+ "--vectorSize=1",
]
@@ -372,6 +432,12 @@ def __init__(self, bench):
def name(self):
return f"miscellaneous_benchmark_sycl VectorSum"
+ def description(self) -> str:
+ return (
+ "Measures performance of vector addition across 3D grid (512x256x256 elements) "
+ "using SYCL."
+ )
+
def bin_args(self) -> list[str]:
return [
"--iterations=1000",
@@ -408,6 +474,16 @@ def name(self):
+ (" without events" if not self.useEvents else "")
)
+ def description(self) -> str:
+ src_type = "device" if self.srcUSM == 1 else "host"
+ dst_type = "device" if self.dstUSM == 1 else "host"
+ events = "with" if self.useEvents else "without"
+ return (
+ f"Measures multithreaded memory copy performance with {self.numThreads} threads "
+ f"each performing {self.numOpsPerThread} operations on {self.allocSize} bytes "
+ f"from {src_type} to {dst_type} memory {events} events."
+ )
+
def bin_args(self) -> list[str]:
return [
"--Ioq=1",
@@ -441,6 +517,13 @@ def __init__(self, bench, runtime: RUNTIMES, withGraphs, numKernels):
def explicit_group(self):
return f"SinKernelGraph {self.numKernels}"
+ def description(self) -> str:
+ execution = "using graphs" if self.withGraphs else "without graphs"
+ return (
+ f"Measures {self.runtime.value.upper()} performance when executing {self.numKernels} "
+ f"sin kernels {execution}. Tests overhead and benefits of graph-based execution."
+ )
+
def name(self):
return f"graph_api_benchmark_{self.runtime.value} SinKernelGraph graphs:{self.withGraphs}, numKernels:{self.numKernels}"
@@ -453,27 +536,60 @@ def bin_args(self) -> list[str]:
"--immediateAppendCmdList=0",
]
+class UllsEmptyKernel(ComputeBenchmark):
+ def __init__(self, bench, runtime: RUNTIMES, wgc, wgs):
+ self.wgc = wgc
+ self.wgs = wgs
+ self.runtime = runtime
+ super().__init__(
+ bench, f"ulls_benchmark_{runtime.value}", "EmptyKernel"
+ )
-class GraphApiSubmitExecGraph(ComputeBenchmark):
- def __init__(self, bench, ioq, submit, numKernels):
- self.ioq = ioq
- self.submit = submit
- self.numKernels = numKernels
- super().__init__(bench, "graph_api_benchmark_sycl", "SubmitExecGraph")
+ def explicit_group(self):
+ return f"EmptyKernel {self.wgc} {self.wgs}"
+
+ def description(self) -> str:
+ return ""
def name(self):
- return f"graph_api_benchmark_sycl SubmitExecGraph ioq:{self.ioq}, submit:{self.submit}, numKernels:{self.numKernels}"
+ return f"ulls_benchmark_{self.runtime.value} EmptyKernel wgc:{self.wgc}, wgs:{self.wgs}"
+
+ def bin_args(self) -> list[str]:
+ return [
+ "--iterations=10000",
+ f"--wgs={self.wgs}",
+ f"--wgc={self.wgs}",
+ ]
+
+class UllsKernelSwitch(ComputeBenchmark):
+ def __init__(self, bench, runtime: RUNTIMES, count, kernelTime, barrier, hostVisible, ioq, ctrBasedEvents):
+ self.count = count
+ self.kernelTime = kernelTime
+ self.barrier = barrier
+ self.hostVisible = hostVisible
+ self.ctrBasedEvents = ctrBasedEvents
+ self.runtime = runtime
+ self.ioq = ioq
+ super().__init__(
+ bench, f"ulls_benchmark_{runtime.value}", "KernelSwitch"
+ )
def explicit_group(self):
- if self.submit:
- return "SubmitGraph"
- else:
- return "ExecGraph"
+ return f"KernelSwitch {self.count} {self.kernelTime}"
+
+ def description(self) -> str:
+ return ""
+
+ def name(self):
+ return f"ulls_benchmark_{self.runtime.value} KernelSwitch count {self.count} kernelTime {self.kernelTime}"
def bin_args(self) -> list[str]:
return [
- "--iterations=100",
- f"--measureSubmit={self.submit}",
+ "--iterations=1000",
+ f"--count={self.count}",
+ f"--kernelTime={self.kernelTime}",
+ f"--barrier={self.barrier}",
+ f"--hostVisible={self.hostVisible}",
f"--ioq={self.ioq}",
- f"--numKernels={self.numKernels}",
+ f"--ctrBasedEvents={self.ctrBasedEvents}",
]
diff --git a/unified-runtime/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py
similarity index 86%
rename from unified-runtime/scripts/benchmarks/benches/llamacpp.py
rename to devops/scripts/benchmarks/benches/llamacpp.py
index 6524c95a9f56f..c12f811942849 100644
--- a/unified-runtime/scripts/benchmarks/benches/llamacpp.py
+++ b/devops/scripts/benchmarks/benches/llamacpp.py
@@ -8,10 +8,10 @@
from pathlib import Path
from utils.utils import download, git_clone
from .base import Benchmark, Suite
-from .result import Result
+from utils.result import Result
from utils.utils import run, create_build_path
from options import options
-from .oneapi import get_oneapi
+from utils.oneapi import get_oneapi
import os
@@ -43,6 +43,7 @@ def setup(self):
self.models_dir,
"https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf",
"Phi-3-mini-4k-instruct-q4.gguf",
+ checksum="fc4f45c9729874a33a527465b2ec78189a18e5726b7121182623feeae38632ace4f280617b01d4a04875acf49d263ee4",
)
self.oneapi = get_oneapi()
@@ -62,9 +63,9 @@ def setup(self):
f'-DCMAKE_CXX_FLAGS=-I"{self.oneapi.mkl_include()}"',
f"-DCMAKE_SHARED_LINKER_FLAGS=-L{self.oneapi.compiler_lib()} -L{self.oneapi.mkl_lib()}",
]
- print(f"{self.__class__.__name__}: Run {configure_command}")
+
run(configure_command, add_sycl=True)
- print(f"{self.__class__.__name__}: Run cmake --build {self.build_path} -j")
+
run(
f"cmake --build {self.build_path} -j",
add_sycl=True,
@@ -92,6 +93,14 @@ def setup(self):
def name(self):
return f"llama.cpp"
+ def description(self) -> str:
+ return (
+ "Performance testing tool for llama.cpp that measures LLM inference speed in tokens per second. "
+ "Runs both prompt processing (initial context processing) and text generation benchmarks with "
+ "different batch sizes. Higher values indicate better performance. Uses the Phi-3-mini-4k-instruct "
+ "quantized model and leverages SYCL with oneDNN for acceleration."
+ )
+
def lower_is_better(self):
return False
@@ -130,6 +139,7 @@ def run(self, env_vars) -> list[Result]:
env=env_vars,
stdout=result,
unit="token/s",
+ description=self.description(),
)
)
return results
diff --git a/unified-runtime/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py
similarity index 93%
rename from unified-runtime/scripts/benchmarks/benches/syclbench.py
rename to devops/scripts/benchmarks/benches/syclbench.py
index f7cf571a7ecd7..cc2db0a2fcf7c 100644
--- a/unified-runtime/scripts/benchmarks/benches/syclbench.py
+++ b/devops/scripts/benchmarks/benches/syclbench.py
@@ -8,7 +8,7 @@
import io
from utils.utils import run, git_clone, create_build_path
from .base import Benchmark, Suite
-from .result import Result
+from utils.result import Result
from options import options
@@ -65,14 +65,14 @@ def benchmarks(self) -> list[Benchmark]:
DagTaskS(self),
HostDevBandwidth(self),
LocalMem(self),
- Pattern_L2(self),
- Reduction(self),
+ # Pattern_L2(self), # validation failure
+ # Reduction(self), # validation failure
ScalarProd(self),
SegmentReduction(self),
- UsmAccLatency(self),
+ # UsmAccLatency(self), # validation failure
UsmAllocLatency(self),
- UsmInstrMix(self),
- UsmPinnedOverhead(self),
+ # UsmInstrMix(self), # validation failure
+ # UsmPinnedOverhead(self), # validation failure
VecAdd(self),
# *** sycl-bench single benchmarks
# TwoDConvolution(self), # run time < 1ms
@@ -82,20 +82,20 @@ def benchmarks(self) -> list[Benchmark]:
Atax(self),
# Atomic_reduction(self), # run time < 1ms
Bicg(self),
- Correlation(self),
- Covariance(self),
- Gemm(self),
- Gesumv(self),
- Gramschmidt(self),
+ # Correlation(self), # validation failure
+ # Covariance(self), # validation failure
+ # Gemm(self), # validation failure
+ # Gesumv(self), # validation failure
+ # Gramschmidt(self), # validation failure
KMeans(self),
LinRegCoeff(self),
# LinRegError(self), # run time < 1ms
- MatmulChain(self),
+ # MatmulChain(self), # validation failure
MolDyn(self),
- Mvt(self),
+ # Mvt(self), # validation failure
Sf(self),
- Syr2k(self),
- Syrk(self),
+ # Syr2k(self), # validation failure
+ # Syrk(self), # validation failure
]
@@ -105,7 +105,6 @@ def __init__(self, bench, name, test):
self.bench = bench
self.bench_name = name
self.test = test
- self.done = False
def bin_args(self) -> list[str]:
return []
@@ -119,10 +118,8 @@ def setup(self):
)
def run(self, env_vars) -> list[Result]:
- if self.done:
- return
self.outputfile = os.path.join(self.bench.directory, self.test + ".csv")
- print(f"{self.__class__.__name__}: Results in {self.outputfile}")
+
command = [
f"{self.benchmark_bin}",
f"--warmup-run",
@@ -143,7 +140,7 @@ def run(self, env_vars) -> list[Result]:
if not row[0].startswith("#"):
res_list.append(
Result(
- label=row[0],
+ label=f"{self.name()} {row[0]}",
value=float(row[12]) * 1000, # convert to ms
passed=(row[1] == "PASS"),
command=command,
@@ -152,16 +149,16 @@ def run(self, env_vars) -> list[Result]:
unit="ms",
)
)
- self.done = True
- return res_list
- def teardown(self):
- print(f"Removing {self.outputfile}...")
os.remove(self.outputfile)
- return
+
+ return res_list
def name(self):
- return self.test
+ return f"{self.bench.name()} {self.test}"
+
+ def teardown(self):
+ return
# multi benchmarks
diff --git a/unified-runtime/scripts/benchmarks/benches/test.py b/devops/scripts/benchmarks/benches/test.py
similarity index 80%
rename from unified-runtime/scripts/benchmarks/benches/test.py
rename to devops/scripts/benchmarks/benches/test.py
index 06eac12b25344..18794d4e9c73c 100644
--- a/unified-runtime/scripts/benchmarks/benches/test.py
+++ b/devops/scripts/benchmarks/benches/test.py
@@ -6,7 +6,7 @@
import random
from utils.utils import git_clone
from .base import Benchmark, Suite
-from .result import Result
+from utils.result import Result
from utils.utils import run, create_build_path
from options import options
import os
@@ -19,6 +19,9 @@ def __init__(self):
def setup(self):
return
+ def name(self) -> str:
+ return "Test Suite"
+
def benchmarks(self) -> list[Benchmark]:
bench_configs = [
("Memory Bandwidth", 2000, 200, "Foo Group"),
@@ -36,18 +39,18 @@ def benchmarks(self) -> list[Benchmark]:
value = base_value * value_multiplier
diff = base_diff * value_multiplier
- result.append(TestBench(name, value, diff, group))
+ result.append(TestBench(self, name, value, diff, group))
return result
class TestBench(Benchmark):
- def __init__(self, name, value, diff, group=""):
+ def __init__(self, suite, name, value, diff, group=""):
+ super().__init__("", suite)
self.bname = name
self.value = value
self.diff = diff
self.group = group
- super().__init__("")
def name(self):
return self.bname
@@ -58,6 +61,9 @@ def lower_is_better(self):
def setup(self):
return
+ def description(self) -> str:
+ return f"This is a test benchmark for {self.bname}."
+
def run(self, env_vars) -> list[Result]:
random_value = self.value + random.uniform(-1 * (self.diff), self.diff)
return [
@@ -65,10 +71,11 @@ def run(self, env_vars) -> list[Result]:
label=self.name(),
explicit_group=self.group,
value=random_value,
- command="",
+ command=["test", "--arg1", "foo"],
env={"A": "B"},
stdout="no output",
unit="ms",
+ description=self.description(),
)
]
diff --git a/unified-runtime/scripts/benchmarks/benches/umf.py b/devops/scripts/benchmarks/benches/umf.py
similarity index 97%
rename from unified-runtime/scripts/benchmarks/benches/umf.py
rename to devops/scripts/benchmarks/benches/umf.py
index c7b767f02bbe1..1f736e7755f92 100644
--- a/unified-runtime/scripts/benchmarks/benches/umf.py
+++ b/devops/scripts/benchmarks/benches/umf.py
@@ -6,10 +6,10 @@
import random
from utils.utils import git_clone
from .base import Benchmark, Suite
-from .result import Result
+from utils.result import Result
from utils.utils import run, create_build_path
from options import options
-from .oneapi import get_oneapi
+from utils.oneapi import get_oneapi
import os
import csv
import io
@@ -22,8 +22,6 @@ def isUMFAvailable():
class UMFSuite(Suite):
def __init__(self, directory):
self.directory = directory
- if not isUMFAvailable():
- print("UMF not provided. Related benchmarks will not run")
def name(self) -> str:
return "UMF"
diff --git a/unified-runtime/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py
similarity index 79%
rename from unified-runtime/scripts/benchmarks/benches/velocity.py
rename to devops/scripts/benchmarks/benches/velocity.py
index b7d06cbe4a3a2..652a831d0222e 100644
--- a/unified-runtime/scripts/benchmarks/benches/velocity.py
+++ b/devops/scripts/benchmarks/benches/velocity.py
@@ -7,10 +7,10 @@
import shutil
from utils.utils import git_clone
from .base import Benchmark, Suite
-from .result import Result
+from utils.result import Result
from utils.utils import run, create_build_path
from options import options
-from .oneapi import get_oneapi
+from utils.oneapi import get_oneapi
import shutil
import os
@@ -115,6 +115,9 @@ def extra_env_vars(self) -> dict:
def parse_output(self, stdout: str) -> float:
raise NotImplementedError()
+ def description(self) -> str:
+ return ""
+
def run(self, env_vars) -> list[Result]:
env_vars.update(self.extra_env_vars())
@@ -133,6 +136,7 @@ def run(self, env_vars) -> list[Result]:
env=env_vars,
stdout=result,
unit=self.unit,
+ description=self.description(),
)
]
@@ -147,6 +151,12 @@ def __init__(self, vb: VelocityBench):
def name(self):
return "Velocity-Bench Hashtable"
+ def description(self) -> str:
+ return (
+ "Measures hash table search performance using an efficient lock-free algorithm with linear probing. "
+ "Reports throughput in millions of keys processed per second. Higher values indicate better performance."
+ )
+
def bin_args(self) -> list[str]:
return ["--no-verify"]
@@ -170,6 +180,13 @@ def __init__(self, vb: VelocityBench):
def name(self):
return "Velocity-Bench Bitcracker"
+ def description(self) -> str:
+ return (
+ "Password-cracking application for BitLocker-encrypted memory units. "
+ "Uses dictionary attack to find user or recovery passwords. "
+ "Measures total time required to process 60000 passwords."
+ )
+
def bin_args(self) -> list[str]:
self.data_path = os.path.join(self.vb.repo_path, "bitcracker", "hash_pass")
@@ -204,11 +221,19 @@ def download_deps(self):
"https://github.com/oneapi-src/Velocity-Bench/raw/main/sobel_filter/res/sobel_filter_data.tgz?download=",
"sobel_filter_data.tgz",
untar=True,
+ checksum="7fc62aa729792ede80ed8ae70fb56fa443d479139c5888ed4d4047b98caec106687a0f05886a9ced77922ccba7f65e66",
)
def name(self):
return "Velocity-Bench Sobel Filter"
+ def description(self) -> str:
+ return (
+ "Popular RGB-to-grayscale image conversion technique that applies a gaussian filter "
+ "to reduce edge artifacts. Processes a large 32K x 32K image and measures "
+ "the time required to apply the filter."
+ )
+
def bin_args(self) -> list[str]:
return [
"-i",
@@ -249,6 +274,13 @@ def run(self, env_vars) -> list[Result]:
def name(self):
return "Velocity-Bench QuickSilver"
+ def description(self) -> str:
+ return (
+ "Solves a simplified dynamic Monte Carlo particle-transport problem used in HPC. "
+ "Replicates memory access patterns, communication patterns, and branching of Mercury workloads. "
+ "Reports a figure of merit in MMS/CTT where higher values indicate better performance."
+ )
+
def lower_is_better(self):
return False
@@ -279,14 +311,22 @@ def __init__(self, vb: VelocityBench):
def download_deps(self):
self.download(
"easywave",
- "https://git.gfz-potsdam.de/id2/geoperil/easyWave/-/raw/master/data/examples.tar.gz",
+ "https://gitlab.oca.eu/AstroGeoGPM/eazyWave/-/raw/master/data/examples.tar.gz",
"examples.tar.gz",
untar=True,
+ checksum="3b0cd0efde10122934ba6db8451b8c41f4f95a3370fc967fc5244039ef42aae7e931009af1586fa5ed2143ade8ed47b1",
)
def name(self):
return "Velocity-Bench Easywave"
+ def description(self) -> str:
+ return (
+ "A tsunami wave simulator used for researching tsunami generation and wave propagation. "
+ "Measures the elapsed time in milliseconds to simulate a specified tsunami event "
+ "based on real-world data."
+ )
+
def bin_args(self) -> list[str]:
return [
"-grid",
@@ -341,6 +381,13 @@ def download_deps(self):
def name(self):
return "Velocity-Bench CudaSift"
+ def description(self) -> str:
+ return (
+ "Implementation of the SIFT (Scale Invariant Feature Transform) algorithm "
+ "for detecting, describing, and matching local features in images. "
+ "Measures average processing time in milliseconds."
+ )
+
def parse_output(self, stdout: str) -> float:
match = re.search(r"Avg workload time = (\d+\.\d+) ms", stdout)
if match:
@@ -364,6 +411,7 @@ def download_deps(self):
"cifar-10-binary.tar.gz",
untar=True,
skip_data_dir=True,
+ checksum="974b1bd62da0cb3b7a42506d42b1e030c9a0cb4a0f2c359063f9c0e65267c48f0329e4493c183a348f44ddc462eaf814",
)
return
@@ -382,6 +430,13 @@ def extra_cmake_args(self):
def name(self):
return "Velocity-Bench dl-cifar"
+ def description(self) -> str:
+ return (
+ "Deep learning image classification workload based on the CIFAR-10 dataset "
+ "of 60,000 32x32 color images in 10 classes. Uses neural networks to "
+ "classify input images and measures total calculation time."
+ )
+
def parse_output(self, stdout: str) -> float:
match = re.search(
r"dl-cifar - total time for whole calculation: (\d+\.\d+) s", stdout
@@ -407,6 +462,7 @@ def download_deps(self):
"train-images.idx3-ubyte.gz",
unzip=True,
skip_data_dir=True,
+ checksum="f40eb179f7c3d2637e789663bde56d444a23e4a0a14477a9e6ed88bc39c8ad6eaff68056c0cd9bb60daf0062b70dc8ee",
)
self.download(
"datasets",
@@ -414,6 +470,7 @@ def download_deps(self):
"train-labels.idx1-ubyte.gz",
unzip=True,
skip_data_dir=True,
+ checksum="ba9c11bf9a7f7c2c04127b8b3e568cf70dd3429d9029ca59b7650977a4ac32f8ff5041fe42bc872097487b06a6794e00",
)
self.download(
"datasets",
@@ -421,6 +478,7 @@ def download_deps(self):
"t10k-images.idx3-ubyte.gz",
unzip=True,
skip_data_dir=True,
+ checksum="1bf45877962fd391f7abb20534a30fd2203d0865309fec5f87d576dbdbefdcb16adb49220afc22a0f3478359d229449c",
)
self.download(
"datasets",
@@ -428,6 +486,7 @@ def download_deps(self):
"t10k-labels.idx1-ubyte.gz",
unzip=True,
skip_data_dir=True,
+ checksum="ccc1ee70f798a04e6bfeca56a4d0f0de8d8eeeca9f74641c1e1bfb00cf7cc4aa4d023f6ea1b40e79bb4707107845479d",
)
def extra_cmake_args(self):
@@ -445,6 +504,13 @@ def extra_cmake_args(self):
def name(self):
return "Velocity-Bench dl-mnist"
+ def description(self) -> str:
+ return (
+ "Digit recognition based on the MNIST database, one of the oldest and most popular "
+ "databases of handwritten digits. Uses neural networks to identify digits "
+ "and measures total calculation time."
+ )
+
def bin_args(self):
return ["-conv_algo", "ONEDNN_AUTO"]
@@ -488,6 +554,13 @@ def extra_cmake_args(self):
def name(self):
return "Velocity-Bench svm"
+ def description(self) -> str:
+ return (
+ "Implementation of Support Vector Machine, a popular classical machine learning technique. "
+ "Uses supervised learning models with associated algorithms to analyze data "
+ "for classification and regression analysis. Measures total elapsed time."
+ )
+
def bin_args(self):
return [
f"{self.code_path}/a9a",
diff --git a/unified-runtime/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
similarity index 70%
rename from unified-runtime/scripts/benchmarks/history.py
rename to devops/scripts/benchmarks/history.py
index 7902aa4f04c35..d1bdc3bfdb940 100644
--- a/unified-runtime/scripts/benchmarks/history.py
+++ b/devops/scripts/benchmarks/history.py
@@ -6,14 +6,14 @@
import os
import json
from pathlib import Path
-from benches.result import Result, BenchmarkRun
+import socket
+from utils.result import Result, BenchmarkRun
from options import Compare, options
from datetime import datetime, timezone
from utils.utils import run
class BenchmarkHistory:
- benchmark_run_index_max = 0
runs = []
def __init__(self, dir):
@@ -35,42 +35,55 @@ def load(self, n: int):
# Get all JSON files in the results directory
benchmark_files = list(results_dir.glob("*.json"))
- # Extract index numbers and sort files by index number
- def extract_index(file_path: Path) -> int:
+ # Extract timestamp and sort files by it
+ def extract_timestamp(file_path: Path) -> str:
try:
- return int(file_path.stem.split("_")[0])
- except (IndexError, ValueError):
- return -1
+ return file_path.stem.split("_")[-1]
+ except IndexError:
+ return ""
- benchmark_files = [
- file for file in benchmark_files if extract_index(file) != -1
- ]
- benchmark_files.sort(key=extract_index)
+ benchmark_files.sort(key=extract_timestamp, reverse=True)
# Load the first n benchmark files
benchmark_runs = []
- for file_path in benchmark_files[n::-1]:
+ for file_path in benchmark_files[:n]:
benchmark_run = self.load_result(file_path)
if benchmark_run:
benchmark_runs.append(benchmark_run)
- if benchmark_files:
- self.benchmark_run_index_max = extract_index(benchmark_files[-1])
-
self.runs = benchmark_runs
def create_run(self, name: str, results: list[Result]) -> BenchmarkRun:
try:
- result = run("git rev-parse --short HEAD")
+ script_dir = os.path.dirname(os.path.abspath(__file__))
+ result = run("git rev-parse --short HEAD", cwd=script_dir)
git_hash = result.stdout.decode().strip()
+
+ # Get the GitHub repo URL from git remote
+ remote_result = run("git remote get-url origin", cwd=script_dir)
+ remote_url = remote_result.stdout.decode().strip()
+
+ # Convert SSH or HTTPS URL to owner/repo format
+ if remote_url.startswith("git@github.com:"):
+ # SSH format: git@github.com:owner/repo.git
+ github_repo = remote_url.split("git@github.com:")[1].rstrip(".git")
+ elif remote_url.startswith("https://github.com/"):
+ # HTTPS format: https://github.com/owner/repo.git
+ github_repo = remote_url.split("https://github.com/")[1].rstrip(".git")
+ else:
+ github_repo = None
+
except:
git_hash = "unknown"
+ github_repo = None
return BenchmarkRun(
name=name,
git_hash=git_hash,
+ github_repo=github_repo,
date=datetime.now(tz=timezone.utc),
results=results,
+ hostname=socket.gethostname()
)
def save(self, save_name, results: list[Result], to_file=True):
@@ -84,10 +97,11 @@ def save(self, save_name, results: list[Result], to_file=True):
results_dir = Path(os.path.join(self.dir, "results"))
os.makedirs(results_dir, exist_ok=True)
- self.benchmark_run_index_max += 1
+ # Use formatted timestamp for the filename
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
file_path = Path(
os.path.join(
- results_dir, f"{self.benchmark_run_index_max}_{save_name}.json"
+ results_dir, f"{save_name}_{timestamp}.json"
)
)
with file_path.open("w") as file:
@@ -120,6 +134,7 @@ def compute_average(self, data: list[BenchmarkRun]):
name=first_run.name,
git_hash="average",
date=first_run.date, # should this be different?
+ hostname=first_run.hostname
)
return average_benchmark_run
diff --git a/devops/scripts/benchmarks/html/config.js b/devops/scripts/benchmarks/html/config.js
new file mode 100644
index 0000000000000..3e67ae1dce8e5
--- /dev/null
+++ b/devops/scripts/benchmarks/html/config.js
@@ -0,0 +1,2 @@
+//remoteDataUrl = 'https://example.com/data.json';
+//defaultCompareNames = ['baseline'];
diff --git a/devops/scripts/benchmarks/html/data.js b/devops/scripts/benchmarks/html/data.js
new file mode 100644
index 0000000000000..a5b96c72834ba
--- /dev/null
+++ b/devops/scripts/benchmarks/html/data.js
@@ -0,0 +1,3 @@
+benchmarkRuns = [];
+
+defaultCompareNames = [];
diff --git a/devops/scripts/benchmarks/html/index.html b/devops/scripts/benchmarks/html/index.html
new file mode 100644
index 0000000000000..c10844f15c707
--- /dev/null
+++ b/devops/scripts/benchmarks/html/index.html
@@ -0,0 +1,205 @@
+
+
+
+
+
+
+ Benchmark Results
+
+
+
+
+
+
+
+
+
+
Benchmark Results
+
+ Loading data, please wait...
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Historical Results
+
+
+
+ Comparisons
+
+
+
+
+
diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js
new file mode 100644
index 0000000000000..2bd52a70b07c8
--- /dev/null
+++ b/devops/scripts/benchmarks/html/scripts.js
@@ -0,0 +1,565 @@
+// Copyright (C) 2024 Intel Corporation
+// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
+// See LICENSE.TXT
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// Core state
+let activeRuns = new Set(defaultCompareNames);
+let chartInstances = new Map();
+let suiteNames = new Set();
+let timeseriesData, barChartsData, allRunNames;
+
+// DOM Elements
+let runSelect, selectedRunsDiv, suiteFiltersContainer;
+
+// Run selector functions
+function updateSelectedRuns(forceUpdate = true) {
+ selectedRunsDiv.innerHTML = '';
+ activeRuns.forEach(name => {
+ selectedRunsDiv.appendChild(createRunElement(name));
+ });
+ if (forceUpdate)
+ updateCharts();
+}
+
+function createRunElement(name) {
+ const runElement = document.createElement('span');
+ runElement.className = 'selected-run';
+ runElement.innerHTML = `${name} `;
+ return runElement;
+}
+
+function addSelectedRun() {
+ const selectedRun = runSelect.value;
+ if (selectedRun && !activeRuns.has(selectedRun)) {
+ activeRuns.add(selectedRun);
+ updateSelectedRuns();
+ }
+}
+
+function removeRun(name) {
+ activeRuns.delete(name);
+ updateSelectedRuns();
+}
+
+// Chart creation and update
+function createChart(data, containerId, type) {
+ if (chartInstances.has(containerId)) {
+ chartInstances.get(containerId).destroy();
+ }
+
+ const ctx = document.getElementById(containerId).getContext('2d');
+ const options = {
+ responsive: true,
+ plugins: {
+ title: {
+ display: true,
+ text: data.label
+ },
+ subtitle: {
+ display: true,
+ text: data.lower_is_better ? "Lower is better" : "Higher is better"
+ },
+ tooltip: {
+ callbacks: {
+ label: (context) => {
+ if (type === 'time') {
+ const point = context.raw;
+ return [
+ `${data.label}:`,
+ `Value: ${point.y.toFixed(2)} ${data.unit}`,
+ `Stddev: ${point.stddev.toFixed(2)} ${data.unit}`,
+ `Git Hash: ${point.gitHash}`,
+ ];
+ } else {
+ return [`${context.dataset.label}:`,
+ `Value: ${context.parsed.y.toFixed(2)} ${data.unit}`,
+ ];
+ }
+ }
+ }
+ }
+ },
+ scales: {
+ y: {
+ title: {
+ display: true,
+ text: data.unit
+ }
+ }
+ }
+ };
+
+ if (type === 'time') {
+ options.interaction = {
+ mode: 'nearest',
+ intersect: false
+ };
+ options.onClick = (event, elements) => {
+ if (elements.length > 0) {
+ const point = elements[0].element.$context.raw;
+ if (point.gitHash && point.gitRepo) {
+ window.open(`https://github.com/${point.gitRepo}/commit/${point.gitHash}`, '_blank');
+ }
+ }
+ };
+ options.scales.x = {
+ type: 'time',
+ ticks: {
+ maxRotation: 45,
+ minRotation: 45,
+ autoSkip: true,
+ maxTicksLimit: 10
+ }
+ };
+ }
+
+ const chartConfig = {
+ type: type === 'time' ? 'line' : 'bar',
+ data: type === 'time' ? {
+ datasets: createTimeseriesDatasets(data)
+ } : {
+ labels: data.labels,
+ datasets: data.datasets
+ },
+ options: options
+ };
+
+ const chart = new Chart(ctx, chartConfig);
+ chartInstances.set(containerId, chart);
+ return chart;
+}
+
+function createTimeseriesDatasets(data) {
+ return Object.entries(data.runs).map(([name, points]) => ({
+ label: name,
+ data: points.map(p => ({
+ x: new Date(p.date),
+ y: p.value,
+ gitHash: p.git_hash,
+ gitRepo: p.github_repo,
+ stddev: p.stddev
+ })),
+ borderWidth: 1,
+ pointRadius: 3,
+ pointStyle: 'circle',
+ pointHoverRadius: 5
+ }));
+}
+
+function updateCharts() {
+ // Filter data by active runs
+ const filteredTimeseriesData = timeseriesData.map(chart => ({
+ ...chart,
+ runs: Object.fromEntries(
+ Object.entries(chart.runs).filter(([name]) => activeRuns.has(name))
+ )
+ }));
+
+ const filteredBarChartsData = barChartsData.map(chart => ({
+ ...chart,
+ labels: chart.labels.filter(label => activeRuns.has(label)),
+ datasets: chart.datasets.map(dataset => ({
+ ...dataset,
+ data: dataset.data.filter((_, i) => activeRuns.has(chart.labels[i]))
+ }))
+ }));
+
+ // Draw charts with filtered data
+ drawCharts(filteredTimeseriesData, filteredBarChartsData);
+}
+
+function drawCharts(filteredTimeseriesData, filteredBarChartsData) {
+ // Clear existing charts
+ document.querySelectorAll('.charts').forEach(container => container.innerHTML = '');
+ chartInstances.forEach(chart => chart.destroy());
+ chartInstances.clear();
+
+ // Create timeseries charts
+ filteredTimeseriesData.forEach((data, index) => {
+ const containerId = `timeseries-${index}`;
+ const container = createChartContainer(data, containerId);
+ document.querySelector('.timeseries .charts').appendChild(container);
+ createChart(data, containerId, 'time');
+ });
+
+ // Create bar charts
+ filteredBarChartsData.forEach((data, index) => {
+ const containerId = `barchart-${index}`;
+ const container = createChartContainer(data, containerId);
+ document.querySelector('.bar-charts .charts').appendChild(container);
+ createChart(data, containerId, 'bar');
+ });
+
+ // Apply current filters
+ filterCharts();
+}
+
+function createChartContainer(data, canvasId) {
+ const container = document.createElement('div');
+ container.className = 'chart-container';
+ container.setAttribute('data-label', data.label);
+ container.setAttribute('data-suite', data.suite);
+
+ const canvas = document.createElement('canvas');
+ canvas.id = canvasId;
+ container.appendChild(canvas);
+
+ // Create details section for extra info
+ const details = document.createElement('details');
+ const summary = document.createElement('summary');
+ summary.textContent = "Details";
+
+ // Add subtle download button to the summary
+ const downloadButton = document.createElement('button');
+ downloadButton.className = 'download-button';
+ downloadButton.textContent = 'Download';
+ downloadButton.onclick = (event) => {
+ event.stopPropagation(); // Prevent details toggle
+ downloadChart(canvasId, data.label);
+ };
+ summary.appendChild(downloadButton);
+ details.appendChild(summary);
+
+ latestRunsLookup = createLatestRunsLookup(benchmarkRuns);
+
+ // Create and append extra info
+ const extraInfo = document.createElement('div');
+ extraInfo.className = 'extra-info';
+ extraInfo.innerHTML = generateExtraInfo(latestRunsLookup, data);
+ details.appendChild(extraInfo);
+
+ container.appendChild(details);
+
+ return container;
+}
+
+// Pre-compute a lookup for the latest run per label
+function createLatestRunsLookup(benchmarkRuns) {
+ const latestRunsMap = new Map();
+
+ benchmarkRuns.forEach(run => {
+ // Yes, we need to convert the date every time. I checked.
+ const runDate = new Date(run.date);
+ run.results.forEach(result => {
+ const label = result.label;
+ if (!latestRunsMap.has(label) || runDate > new Date(latestRunsMap.get(label).date)) {
+ latestRunsMap.set(label, {
+ run,
+ result
+ });
+ }
+ });
+ });
+
+ return latestRunsMap;
+}
+
+function generateExtraInfo(latestRunsLookup, data) {
+ const labels = data.datasets ? data.datasets.map(dataset => dataset.label) : [data.label];
+
+ return labels.map(label => {
+ const latestRun = latestRunsLookup.get(label);
+
+ if (latestRun) {
+ return ``;
+ }
+ return ``;
+ }).join('');
+}
+
+function formatCommand(run) {
+ const envVars = Object.entries(run.env || {}).map(([key, value]) => `${key}=${value}`).join(' ');
+ let command = run.command ? [...run.command] : [];
+
+ return `${envVars} ${command.join(' ')}`.trim();
+}
+
+function downloadChart(canvasId, label) {
+ const chart = chartInstances.get(canvasId);
+ if (chart) {
+ const link = document.createElement('a');
+ link.href = chart.toBase64Image('image/png', 1)
+ link.download = `${label}.png`;
+ link.click();
+ }
+}
+
+// URL and filtering functions
+function getQueryParam(param) {
+ const urlParams = new URLSearchParams(window.location.search);
+ return urlParams.get(param);
+}
+
+function updateURL() {
+ const url = new URL(window.location);
+ const regex = document.getElementById('bench-filter').value;
+ const activeSuites = getActiveSuites();
+ const activeRunsList = Array.from(activeRuns);
+
+ if (regex) {
+ url.searchParams.set('regex', regex);
+ } else {
+ url.searchParams.delete('regex');
+ }
+
+ if (activeSuites.length > 0 && activeSuites.length != suiteNames.size) {
+ url.searchParams.set('suites', activeSuites.join(','));
+ } else {
+ url.searchParams.delete('suites');
+ }
+
+ // Handle the runs parameter
+ if (activeRunsList.length > 0) {
+ // Check if the active runs are the same as default runs
+ const defaultRuns = new Set(defaultCompareNames || []);
+ const isDefaultRuns = activeRunsList.length === defaultRuns.size &&
+ activeRunsList.every(run => defaultRuns.has(run));
+
+ if (isDefaultRuns) {
+ // If it's just the default runs, omit the parameter entirely
+ url.searchParams.delete('runs');
+ } else {
+ url.searchParams.set('runs', activeRunsList.join(','));
+ }
+ } else {
+ url.searchParams.delete('runs');
+ }
+
+ history.replaceState(null, '', url);
+}
+
+function filterCharts() {
+ const regexInput = document.getElementById('bench-filter').value;
+ const regex = new RegExp(regexInput, 'i');
+ const activeSuites = getActiveSuites();
+
+ document.querySelectorAll('.chart-container').forEach(container => {
+ const label = container.getAttribute('data-label');
+ const suite = container.getAttribute('data-suite');
+ container.style.display = (regex.test(label) && activeSuites.includes(suite)) ? '' : 'none';
+ });
+
+ updateURL();
+}
+
+function getActiveSuites() {
+ return Array.from(document.querySelectorAll('.suite-checkbox:checked'))
+ .map(checkbox => checkbox.getAttribute('data-suite'));
+}
+
+// Data processing
+function processTimeseriesData(benchmarkRuns) {
+ const resultsByLabel = {};
+
+ benchmarkRuns.forEach(run => {
+ const runDate = run.date ? new Date(run.date) : null;
+ run.results.forEach(result => {
+ if (!resultsByLabel[result.label]) {
+ resultsByLabel[result.label] = {
+ label: result.label,
+ suite: result.suite,
+ unit: result.unit,
+ lower_is_better: result.lower_is_better,
+ runs: {}
+ };
+ }
+
+ if (!resultsByLabel[result.label].runs[run.name]) {
+ resultsByLabel[result.label].runs[run.name] = [];
+ }
+
+ resultsByLabel[result.label].runs[run.name].push({
+ date: runDate,
+ value: result.value,
+ stddev: result.stddev,
+ git_hash: run.git_hash,
+ github_repo: run.github_repo
+ });
+ });
+ });
+
+ return Object.values(resultsByLabel);
+}
+
+function processBarChartsData(benchmarkRuns) {
+ const groupedResults = {};
+
+ benchmarkRuns.forEach(run => {
+ run.results.forEach(result => {
+ if (!result.explicit_group) return;
+
+ if (!groupedResults[result.explicit_group]) {
+ groupedResults[result.explicit_group] = {
+ label: result.explicit_group,
+ suite: result.suite,
+ unit: result.unit,
+ lower_is_better: result.lower_is_better,
+ labels: [],
+ datasets: []
+ };
+ }
+
+ const group = groupedResults[result.explicit_group];
+
+ if (!group.labels.includes(run.name)) {
+ group.labels.push(run.name);
+ }
+
+ let dataset = group.datasets.find(d => d.label === result.label);
+ if (!dataset) {
+ dataset = {
+ label: result.label,
+ data: new Array(group.labels.length).fill(null)
+ };
+ group.datasets.push(dataset);
+ }
+
+ const runIndex = group.labels.indexOf(run.name);
+ dataset.data[runIndex] = result.value;
+ });
+ });
+
+ return Object.values(groupedResults);
+}
+
+// Setup functions
+function setupRunSelector() {
+ runSelect = document.getElementById('run-select');
+ selectedRunsDiv = document.getElementById('selected-runs');
+
+ allRunNames.forEach(name => {
+ const option = document.createElement('option');
+ option.value = name;
+ option.textContent = name;
+ runSelect.appendChild(option);
+ });
+
+ updateSelectedRuns(false);
+}
+
+function setupSuiteFilters() {
+ suiteFiltersContainer = document.getElementById('suite-filters');
+
+ benchmarkRuns.forEach(run => {
+ run.results.forEach(result => {
+ suiteNames.add(result.suite);
+ });
+ });
+
+ suiteNames.forEach(suite => {
+ const label = document.createElement('label');
+ const checkbox = document.createElement('input');
+ checkbox.type = 'checkbox';
+ checkbox.className = 'suite-checkbox';
+ checkbox.dataset.suite = suite;
+ checkbox.checked = true;
+ label.appendChild(checkbox);
+ label.appendChild(document.createTextNode(' ' + suite));
+ suiteFiltersContainer.appendChild(label);
+ suiteFiltersContainer.appendChild(document.createTextNode(' '));
+ });
+}
+
+function initializeCharts() {
+ // Process raw data
+ timeseriesData = processTimeseriesData(benchmarkRuns);
+ barChartsData = processBarChartsData(benchmarkRuns);
+ allRunNames = [...new Set(benchmarkRuns.map(run => run.name))];
+
+ // Set up active runs
+ const runsParam = getQueryParam('runs');
+ if (runsParam) {
+ const runsFromUrl = runsParam.split(',');
+
+ // Start with an empty set
+ activeRuns = new Set();
+
+ // Process each run from URL
+ runsFromUrl.forEach(run => {
+ if (run === 'default') {
+ // Special case: include all default runs
+ (defaultCompareNames || []).forEach(defaultRun => {
+ if (allRunNames.includes(defaultRun)) {
+ activeRuns.add(defaultRun);
+ }
+ });
+ } else if (allRunNames.includes(run)) {
+ // Add the specific run if it exists
+ activeRuns.add(run);
+ }
+ });
+ } else {
+ // No runs parameter, use defaults
+ activeRuns = new Set(defaultCompareNames || []);
+ }
+
+ // Setup UI components
+ setupRunSelector();
+ setupSuiteFilters();
+
+ // Apply URL parameters
+ const regexParam = getQueryParam('regex');
+ const suitesParam = getQueryParam('suites');
+
+ if (regexParam) {
+ document.getElementById('bench-filter').value = regexParam;
+ }
+
+ if (suitesParam) {
+ const suites = suitesParam.split(',');
+ document.querySelectorAll('.suite-checkbox').forEach(checkbox => {
+ checkbox.checked = suites.includes(checkbox.getAttribute('data-suite'));
+ });
+ }
+
+ // Setup event listeners
+ document.querySelectorAll('.suite-checkbox').forEach(checkbox => {
+ checkbox.addEventListener('change', filterCharts);
+ });
+ document.getElementById('bench-filter').addEventListener('input', filterCharts);
+
+ // Draw initial charts
+ updateCharts();
+}
+
+// Make functions available globally for onclick handlers
+window.addSelectedRun = addSelectedRun;
+window.removeRun = removeRun;
+
+// Load data based on configuration
+function loadData() {
+ const loadingIndicator = document.getElementById('loading-indicator');
+ loadingIndicator.style.display = 'block'; // Show loading indicator
+
+ if (typeof remoteDataUrl !== 'undefined' && remoteDataUrl !== '') {
+ // Fetch data from remote URL
+ fetch(remoteDataUrl)
+ .then(response => response.json())
+ .then(data => {
+ benchmarkRuns = data;
+ initializeCharts();
+ })
+ .catch(error => {
+ console.error('Error fetching remote data:', error);
+ loadingIndicator.textContent = 'Fetching remote data failed.';
+ })
+ .finally(() => {
+ loadingIndicator.style.display = 'none'; // Hide loading indicator
+ });
+ } else {
+ // Use local data
+ initializeCharts();
+ loadingIndicator.style.display = 'none'; // Hide loading indicator
+ }
+}
+
+// Initialize when DOM is ready
+document.addEventListener('DOMContentLoaded', () => {
+ loadData();
+});
diff --git a/unified-runtime/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
similarity index 79%
rename from unified-runtime/scripts/benchmarks/main.py
rename to devops/scripts/benchmarks/main.py
index 4ad90b39b9001..1a15e5407daf3 100755
--- a/unified-runtime/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -17,6 +17,7 @@
from history import BenchmarkHistory
from utils.utils import prepare_workdir
from utils.compute_runtime import *
+from presets import enabled_suites, presets
import argparse
import re
@@ -27,23 +28,27 @@
def run_iterations(
- benchmark: Benchmark, env_vars, iters: int, results: dict[str, list[Result]]
+ benchmark: Benchmark,
+ env_vars,
+ iters: int,
+ results: dict[str, list[Result]],
+ failures: dict[str, str],
):
for iter in range(iters):
- print(f"running {benchmark.name()}, iteration {iter}... ", end="", flush=True)
+ print(f"running {benchmark.name()}, iteration {iter}... ", flush=True)
bench_results = benchmark.run(env_vars)
if bench_results is None:
- print(f"did not finish (OK for sycl-bench).")
+ failures[benchmark.name()] = "benchmark produced no results!"
break
for bench_result in bench_results:
- # TODO: report failures in markdown/html ?
if not bench_result.passed:
- print(f"complete ({bench_result.label}: verification FAILED)")
+ failures[bench_result.label] = "verification failed"
+ print(f"complete ({bench_result.label}: verification failed).")
continue
print(
- f"complete ({bench_result.label}: {bench_result.value:.3f} {bench_result.unit})."
+ f"{benchmark.name()} complete ({bench_result.label}: {bench_result.value:.3f} {bench_result.unit})."
)
bench_result.name = bench_result.label
@@ -149,15 +154,19 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
SyclBench(directory),
LlamaCppBench(directory),
UMFSuite(directory),
- # TestSuite()
+ TestSuite(),
]
if not options.dry_run
else []
)
benchmarks = []
+ failures = {}
for s in suites:
+ if s.name() not in enabled_suites(options.preset):
+ continue
+
suite_benchmarks = s.benchmarks()
if filter:
suite_benchmarks = [
@@ -170,25 +179,26 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
print(f"Setting up {type(s).__name__}")
try:
s.setup()
- except:
+ except Exception as e:
+ failures[s.name()] = f"Suite setup failure: {e}"
print(f"{type(s).__name__} setup failed. Benchmarks won't be added.")
else:
print(f"{type(s).__name__} setup complete.")
benchmarks += suite_benchmarks
- for b in benchmarks:
- print(b.name())
-
for benchmark in benchmarks:
try:
- print(f"Setting up {benchmark.name()}... ")
+ if options.verbose:
+ print(f"Setting up {benchmark.name()}... ")
benchmark.setup()
- print(f"{benchmark.name()} setup complete.")
+ if options.verbose:
+ print(f"{benchmark.name()} setup complete.")
except Exception as e:
if options.exit_on_failure:
raise e
else:
+ failures[benchmark.name()] = f"Benchmark setup failure: {e}"
print(f"failed: {e}")
results = []
@@ -199,7 +209,11 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
processed: list[Result] = []
for _ in range(options.iterations_stddev):
run_iterations(
- benchmark, merged_env_vars, options.iterations, intermediate_results
+ benchmark,
+ merged_env_vars,
+ options.iterations,
+ intermediate_results,
+ failures,
)
valid, processed = process_results(
intermediate_results, benchmark.stddev_threshold()
@@ -211,12 +225,16 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
if options.exit_on_failure:
raise e
else:
+ failures[benchmark.name()] = f"Benchmark run failure: {e}"
print(f"failed: {e}")
for benchmark in benchmarks:
- print(f"tearing down {benchmark.name()}... ", end="", flush=True)
+ # this never has any useful information anyway, so hide it behind verbose
+ if options.verbose:
+ print(f"tearing down {benchmark.name()}... ", flush=True)
benchmark.teardown()
- print("complete.")
+ if options.verbose:
+ print("{benchmark.name()} teardown complete.")
this_name = options.current_run_name
chart_data = {}
@@ -224,7 +242,10 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
if not options.dry_run:
chart_data = {this_name: results}
- history = BenchmarkHistory(directory)
+ results_dir = directory
+ if options.custom_results_dir:
+ results_dir = Path(options.custom_results_dir)
+ history = BenchmarkHistory(results_dir)
# limit how many files we load.
# should this be configurable?
history.load(1000)
@@ -241,14 +262,18 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
if options.output_markdown:
markdown_content = generate_markdown(
- this_name, chart_data, options.output_markdown
+ this_name, chart_data, failures, options.output_markdown
)
- with open("benchmark_results.md", "w") as file:
+ md_path = options.output_directory
+ if options.output_directory is None:
+ md_path = os.getcwd()
+
+ with open(os.path.join(md_path, "benchmark_results.md"), "w") as file:
file.write(markdown_content)
print(
- f"Markdown with benchmark results has been written to {os.getcwd()}/benchmark_results.md"
+ f"Markdown with benchmark results has been written to {md_path}/benchmark_results.md"
)
saved_name = save_name if save_name is not None else this_name
@@ -262,14 +287,10 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
compare_names.append(saved_name)
if options.output_html:
- html_content = generate_html(history.runs, "intel/llvm", compare_names)
-
- with open("benchmark_results.html", "w") as file:
- file.write(html_content)
-
- print(
- f"HTML with benchmark results has been written to {os.getcwd()}/benchmark_results.html"
- )
+ html_path = options.output_directory
+ if options.output_directory is None:
+ html_path = os.path.join(os.path.dirname(__file__), "html")
+ generate_html(history.runs, compare_names, html_path)
def validate_and_parse_env_args(env_args):
@@ -305,6 +326,11 @@ def validate_and_parse_env_args(env_args):
help="Do not rebuild the benchmarks from scratch.",
action="store_true",
)
+ parser.add_argument(
+ "--redownload",
+ help="Always download benchmark data dependencies, even if they already exist.",
+ action="store_true",
+ )
parser.add_argument(
"--env",
type=str,
@@ -347,12 +373,6 @@ def validate_and_parse_env_args(env_args):
help="Regex pattern to filter benchmarks by name.",
default=None,
)
- parser.add_argument(
- "--epsilon",
- type=float,
- help="Threshold to consider change of performance significant",
- default=options.epsilon,
- )
parser.add_argument(
"--verbose", help="Print output of all the commands.", action="store_true"
)
@@ -379,7 +399,17 @@ def validate_and_parse_env_args(env_args):
help="Specify whether markdown output should fit the content size limit for request validation",
)
parser.add_argument(
- "--output-html", help="Create HTML output", action="store_true", default=False
+ "--output-html",
+ help="Create HTML output. Local output is for direct local viewing of the html file, remote is for server deployment.",
+ nargs="?",
+ const=options.output_html,
+ choices=["local", "remote"],
+ )
+ parser.add_argument(
+ "--output-dir",
+ type=str,
+ help="Location for output files, if --output-html or --output_markdown was specified.",
+ default=None
)
parser.add_argument(
"--dry-run",
@@ -423,6 +453,19 @@ def validate_and_parse_env_args(env_args):
help="Directory for cublas library",
default=None,
)
+ parser.add_argument(
+ "--preset",
+ type=str,
+ choices=[p for p in presets.keys()],
+ help="Benchmark preset to run.",
+ default=options.preset,
+ )
+ parser.add_argument(
+ "--results-dir",
+ type=str,
+ help="Specify a custom results directory",
+ default=options.custom_results_dir,
+ )
args = parser.parse_args()
additional_env_vars = validate_and_parse_env_args(args.env)
@@ -430,10 +473,10 @@ def validate_and_parse_env_args(env_args):
options.workdir = args.benchmark_directory
options.verbose = args.verbose
options.rebuild = not args.no_rebuild
+ options.redownload = args.redownload
options.sycl = args.sycl
options.iterations = args.iterations
options.timeout = args.timeout
- options.epsilon = args.epsilon
options.ur = args.ur
options.ur_adapter = args.adapter
options.exit_on_failure = args.exit_on_failure
@@ -448,12 +491,18 @@ def validate_and_parse_env_args(env_args):
options.current_run_name = args.relative_perf
options.cudnn_directory = args.cudnn_directory
options.cublas_directory = args.cublas_directory
+ options.preset = args.preset
+ options.custom_results_dir = args.results_dir
if args.build_igc and args.compute_runtime is None:
parser.error("--build-igc requires --compute-runtime to be set")
if args.compute_runtime is not None:
options.build_compute_runtime = True
options.compute_runtime_tag = args.compute_runtime
+ if args.output_dir is not None:
+ if not os.path.isdir(args.output_dir):
+ parser.error("Specified --output-dir is not a valid path")
+ options.output_directory = os.path.abspath(args.output_dir)
benchmark_filter = re.compile(args.filter) if args.filter else None
diff --git a/unified-runtime/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
similarity index 81%
rename from unified-runtime/scripts/benchmarks/options.py
rename to devops/scripts/benchmarks/options.py
index 2e92675264544..332d1615bc78d 100644
--- a/unified-runtime/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -1,6 +1,7 @@
from dataclasses import dataclass, field
from enum import Enum
+from presets import presets
class Compare(Enum):
LATEST = "latest"
@@ -21,6 +22,7 @@ class Options:
ur_adapter: str = None
umf: str = None
rebuild: bool = True
+ redownload: bool = False
benchmark_cwd: str = "INVALID"
timeout: float = 600
iterations: int = 3
@@ -28,18 +30,19 @@ class Options:
compare: Compare = Compare.LATEST
compare_max: int = 10 # average/median over how many results
output_markdown: MarkdownSize = MarkdownSize.SHORT
- output_html: bool = False
+ output_html: str = "local"
+ output_directory: str = None
dry_run: bool = False
- # these two should probably be merged into one setting
stddev_threshold: float = 0.02
- epsilon: float = 0.02
iterations_stddev: int = 5
build_compute_runtime: bool = False
extra_ld_libraries: list[str] = field(default_factory=list)
extra_env_vars: dict = field(default_factory=dict)
- compute_runtime_tag: str = "25.05.32567.12"
+ compute_runtime_tag: str = "25.05.32567.18"
build_igc: bool = False
current_run_name: str = "This PR"
+ preset: str = "Full"
+ custom_results_dir = None
options = Options()
diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py
new file mode 100644
index 0000000000000..49b4d1d84a214
--- /dev/null
+++ b/devops/scripts/benchmarks/output_html.py
@@ -0,0 +1,46 @@
+# Copyright (C) 2024 Intel Corporation
+# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
+# See LICENSE.TXT
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+import json
+import os
+from options import options
+
+
+def generate_html(benchmark_runs: list, compare_names: list[str], html_path: str):
+ benchmark_runs.sort(key=lambda run: run.date, reverse=True)
+
+ if options.output_html == "local":
+ data_path = os.path.join(html_path, "data.js")
+ # Write data to js file
+ # We can't store this as a standalone json file because it needs to be inline in the html
+ with open(data_path, "w") as f:
+ f.write("benchmarkRuns = [\n")
+ # it might be tempting to just to create a list and convert
+ # that to a json, but that leads to json being serialized twice.
+ for i, run in enumerate(benchmark_runs):
+ if i > 0:
+ f.write(",\n")
+ f.write(run.to_json())
+
+ f.write("\n];\n\n") # terminates benchmarkRuns
+
+ f.write("defaultCompareNames = ")
+ json.dump(compare_names, f)
+ f.write(";\n") # terminates defaultCompareNames
+
+ print(f"See {os.getcwd()}/html/index.html for the results.")
+ else:
+ data_path = os.path.join(html_path, "data.json")
+ with open(data_path, "w") as f:
+ f.write("[\n")
+ for i, run in enumerate(benchmark_runs):
+ if i > 0:
+ f.write(",\n")
+ f.write(run.to_json())
+ f.write("\n]\n")
+
+ print(
+ f"Upload {data_path} to a location set in config.js remoteDataUrl argument."
+ )
diff --git a/unified-runtime/scripts/benchmarks/output_markdown.py b/devops/scripts/benchmarks/output_markdown.py
similarity index 92%
rename from unified-runtime/scripts/benchmarks/output_markdown.py
rename to devops/scripts/benchmarks/output_markdown.py
index dd6711cec6365..3295968603d0c 100644
--- a/unified-runtime/scripts/benchmarks/output_markdown.py
+++ b/devops/scripts/benchmarks/output_markdown.py
@@ -5,7 +5,7 @@
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
import collections
-from benches.result import Result
+from utils.result import Result
from options import options, MarkdownSize
import ast
@@ -79,7 +79,7 @@ def get_improved_regressed_summary(is_improved: bool, rows_count: int):
"\n\n"
"\n"
f"{title} {rows_count} "
- f"(threshold {options.epsilon*100:.2f}%)\n"
+ f"(threshold {options.stddev_threshold*100:.2f}%)\n"
"
\n\n"
)
@@ -138,17 +138,6 @@ def generate_markdown_details(
env_dict = res.env
command = res.command
- # If data is collected from already saved results,
- # the content is parsed as strings
- if isinstance(res.env, str):
- # Since the scripts would be used solely on data prepared
- # by our scripts, this should be safe
- # However, maybe needs an additional blessing
- # https://docs.python.org/3/library/ast.html#ast.literal_eval
- env_dict = ast.literal_eval(res.env)
- if isinstance(res.command, str):
- command = ast.literal_eval(res.command)
-
section = (
"\n\n"
f"{res.label}
\n\n"
@@ -179,7 +168,7 @@ def generate_markdown_details(
return "\nBenchmark details contain too many chars to display\n"
-def generate_summary_table_and_chart(
+def generate_summary_table(
chart_data: dict[str, list[Result]], baseline_name: str, markdown_size: MarkdownSize
):
summary_table = get_chart_markdown_header(
@@ -276,7 +265,7 @@ def generate_summary_table_and_chart(
delta = oln.diff - 1
oln.row += f" {delta*100:.2f}%"
- if abs(delta) > options.epsilon:
+ if abs(delta) > options.stddev_threshold:
if delta > 0:
improved_rows.append(oln.row + " | \n")
else:
@@ -374,10 +363,27 @@ def generate_summary_table_and_chart(
return "\n# Summary\n" "Benchmark output is too large to display\n\n"
+def generate_failures_section(failures: dict[str, str]) -> str:
+ if not failures:
+ return ""
+
+ section = "\n# Failures\n"
+ section += "| Name | Failure |\n"
+ section += "|---|---|\n"
+
+ for name, failure in failures.items():
+ section += f"| {name} | {failure} |\n"
+
+ return section
+
+
def generate_markdown(
- name: str, chart_data: dict[str, list[Result]], markdown_size: MarkdownSize
+ name: str,
+ chart_data: dict[str, list[Result]],
+ failures: dict[str, str],
+ markdown_size: MarkdownSize,
):
- (summary_line, summary_table) = generate_summary_table_and_chart(
+ (summary_line, summary_table) = generate_summary_table(
chart_data, name, markdown_size
)
@@ -396,4 +402,6 @@ def generate_markdown(
)
generated_markdown += "\n# Details\n" f"{markdown_details}\n"
- return generated_markdown
+ failures_section = generate_failures_section(failures)
+
+ return failures_section + generated_markdown
diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py
new file mode 100644
index 0000000000000..7f5dc8d78460a
--- /dev/null
+++ b/devops/scripts/benchmarks/presets.py
@@ -0,0 +1,38 @@
+# Copyright (C) 2024 Intel Corporation
+# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
+# See LICENSE.TXT
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+presets: dict[str, list[str]] = {
+ "Full": [
+ "Compute Benchmarks",
+ "llama.cpp bench",
+ "SYCL-Bench",
+ "Velocity Bench",
+ "UMF",
+ ],
+ "SYCL": [
+ "Compute Benchmarks",
+ "llama.cpp bench",
+ "SYCL-Bench",
+ "Velocity Bench",
+ ],
+ "Minimal": [
+ "Compute Benchmarks",
+ ],
+ "Normal": [
+ "Compute Benchmarks",
+ "llama.cpp bench",
+ "Velocity Bench",
+ ],
+ "Test": [
+ "Test Suite",
+ ],
+}
+
+def enabled_suites(preset: str) -> list[str]:
+ try:
+ return presets[preset]
+ except KeyError:
+ raise ValueError(f"Preset '{preset}' not found.")
+
diff --git a/unified-runtime/scripts/benchmarks/requirements.txt b/devops/scripts/benchmarks/requirements.txt
similarity index 85%
rename from unified-runtime/scripts/benchmarks/requirements.txt
rename to devops/scripts/benchmarks/requirements.txt
index 99ba0caab55c2..9f0381ceef6c2 100644
--- a/unified-runtime/scripts/benchmarks/requirements.txt
+++ b/devops/scripts/benchmarks/requirements.txt
@@ -2,3 +2,4 @@ matplotlib==3.9.2
mpld3==0.5.10
dataclasses-json==0.6.7
PyYAML==6.0.1
+Mako==1.3.9
diff --git a/unified-runtime/scripts/benchmarks/utils/compute_runtime.py b/devops/scripts/benchmarks/utils/compute_runtime.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/utils/compute_runtime.py
rename to devops/scripts/benchmarks/utils/compute_runtime.py
diff --git a/unified-runtime/scripts/benchmarks/benches/oneapi.py b/devops/scripts/benchmarks/utils/oneapi.py
similarity index 79%
rename from unified-runtime/scripts/benchmarks/benches/oneapi.py
rename to devops/scripts/benchmarks/utils/oneapi.py
index 0547f6646e39e..e1876b5ed37fb 100644
--- a/unified-runtime/scripts/benchmarks/benches/oneapi.py
+++ b/devops/scripts/benchmarks/utils/oneapi.py
@@ -7,29 +7,33 @@
from utils.utils import download, run
from options import options
import os
+import hashlib
class OneAPI:
- # random unique number for benchmark oneAPI installation
- ONEAPI_BENCHMARK_INSTANCE_ID = 987654
-
def __init__(self):
self.oneapi_dir = os.path.join(options.workdir, "oneapi")
Path(self.oneapi_dir).mkdir(parents=True, exist_ok=True)
- # delete if some option is set?
+ self.oneapi_instance_id = self.generate_unique_oneapi_id(self.oneapi_dir)
# can we just hardcode these links?
self.install_package(
"dnnl",
"https://registrationcenter-download.intel.com/akdlm/IRC_NAS/87e117ab-039b-437d-9c80-dcd5c9e675d5/intel-onednn-2025.0.0.862_offline.sh",
+ "6866feb5b8dfefd6ff45d6bfabed44f01d7fba8fd452480ae1fd86b92e9481ae052c24842da14f112f672f5c4859945b",
)
self.install_package(
"mkl",
"https://registrationcenter-download.intel.com/akdlm/IRC_NAS/79153e0f-74d7-45af-b8c2-258941adf58a/intel-onemkl-2025.0.0.940_offline.sh",
+ "122bb84cf943ea27753cb399c81ab2ae218ebd51b789c74d273240157722925ab4d5a43cb0b5de41b854f2c5a59a4002",
)
return
- def install_package(self, name, url):
+ def generate_unique_oneapi_id(self, path):
+ hash_object = hashlib.md5(path.encode())
+ return hash_object.hexdigest()
+
+ def install_package(self, name, url, checksum):
package_path = os.path.join(self.oneapi_dir, name)
if Path(package_path).exists():
print(
@@ -37,11 +41,13 @@ def install_package(self, name, url):
)
return
- package = download(self.oneapi_dir, url, f"package_{name}.sh")
+ package = download(
+ self.oneapi_dir, url, f"package_{name}.sh", checksum=checksum
+ )
try:
print(f"installing {name}")
run(
- f"sh {package} -a -s --eula accept --install-dir {self.oneapi_dir} --instance f{self.ONEAPI_BENCHMARK_INSTANCE_ID}"
+ f"sh {package} -a -s --eula accept --install-dir {self.oneapi_dir} --instance {self.oneapi_instance_id}"
)
except:
print("oneAPI installation likely exists already")
diff --git a/unified-runtime/scripts/benchmarks/benches/result.py b/devops/scripts/benchmarks/utils/result.py
similarity index 67%
rename from unified-runtime/scripts/benchmarks/benches/result.py
rename to devops/scripts/benchmarks/utils/result.py
index 52a098d91c24a..7d82d9e488edf 100644
--- a/unified-runtime/scripts/benchmarks/benches/result.py
+++ b/devops/scripts/benchmarks/utils/result.py
@@ -3,9 +3,9 @@
# See LICENSE.TXT
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-from dataclasses import dataclass
+from dataclasses import dataclass, field
from typing import Optional
-from dataclasses_json import dataclass_json
+from dataclasses_json import config, dataclass_json
from datetime import datetime
@@ -14,8 +14,8 @@
class Result:
label: str
value: float
- command: str
- env: str
+ command: list[str]
+ env: dict[str, str]
stdout: str
passed: bool = True
unit: str = ""
@@ -26,9 +26,8 @@ class Result:
# values below should not be set by the benchmark
name: str = ""
lower_is_better: bool = True
- git_hash: str = ""
- date: Optional[datetime] = None
suite: str = "Unknown"
+ description: str = "No description provided."
@dataclass_json
@@ -36,5 +35,10 @@ class Result:
class BenchmarkRun:
results: list[Result]
name: str = "This PR"
+ hostname: str = "Unknown"
git_hash: str = ""
- date: datetime = None
+ github_repo: str = None
+ date: datetime = field(
+ default=None,
+ metadata=config(encoder=datetime.isoformat, decoder=datetime.fromisoformat),
+ )
diff --git a/unified-runtime/scripts/benchmarks/utils/utils.py b/devops/scripts/benchmarks/utils/utils.py
similarity index 81%
rename from unified-runtime/scripts/benchmarks/utils/utils.py
rename to devops/scripts/benchmarks/utils/utils.py
index 3a516e8d724f7..2d5fad6cd8917 100644
--- a/unified-runtime/scripts/benchmarks/utils/utils.py
+++ b/devops/scripts/benchmarks/utils/utils.py
@@ -12,6 +12,7 @@
import urllib # nosec B404
from options import options
from pathlib import Path
+import hashlib
def run(
@@ -45,6 +46,12 @@ def run(
env.update(env_vars)
+ if options.verbose:
+ command_str = " ".join(command)
+ env_str = " ".join(f"{key}={value}" for key, value in env_vars.items())
+ full_command_str = f"{env_str} {command_str}".strip()
+ print(f"Running: {full_command_str}")
+
result = subprocess.run(
command,
cwd=cwd,
@@ -107,7 +114,7 @@ def prepare_workdir(dir, version):
shutil.rmtree(dir)
else:
raise Exception(
- f"The directory {dir} exists but is a benchmark work directory."
+ f"The directory {dir} exists but is not a benchmark work directory."
)
os.makedirs(dir)
@@ -128,11 +135,26 @@ def create_build_path(directory, name):
return build_path
-def download(dir, url, file, untar=False, unzip=False):
+def calculate_checksum(file_path):
+ sha_hash = hashlib.sha384()
+ with open(file_path, "rb") as f:
+ for byte_block in iter(lambda: f.read(4096), b""):
+ sha_hash.update(byte_block)
+ return sha_hash.hexdigest()
+
+
+def download(dir, url, file, untar=False, unzip=False, checksum=""):
data_file = os.path.join(dir, file)
if not Path(data_file).exists():
print(f"{data_file} does not exist, downloading")
urllib.request.urlretrieve(url, data_file)
+ calculated_checksum = calculate_checksum(data_file)
+ if calculated_checksum != checksum:
+ print(
+ f"Checksum mismatch: expected {checksum}, got {calculated_checksum}. Refusing to continue."
+ )
+ exit(1)
+
if untar:
file = tarfile.open(data_file)
file.extractall(dir)
diff --git a/unified-runtime/scripts/benchmarks/workflow.png b/devops/scripts/benchmarks/workflow.png
similarity index 100%
rename from unified-runtime/scripts/benchmarks/workflow.png
rename to devops/scripts/benchmarks/workflow.png
diff --git a/unified-runtime/.github/scripts/get_system_info.sh b/devops/scripts/get_system_info.sh
similarity index 100%
rename from unified-runtime/.github/scripts/get_system_info.sh
rename to devops/scripts/get_system_info.sh
diff --git a/unified-runtime/scripts/benchmarks/benchmark_results.html.template b/unified-runtime/scripts/benchmarks/benchmark_results.html.template
deleted file mode 100644
index 1deeedad66b00..0000000000000
--- a/unified-runtime/scripts/benchmarks/benchmark_results.html.template
+++ /dev/null
@@ -1,192 +0,0 @@
-
-
-
-
-
- Benchmark Results
-
-
-
-
-
-
Benchmark Results
-
-
-
-
- ${suite_checkboxes_html}
-
-
- Historical Results
-
- ${timeseries_charts_html}
-
-
-
- Comparisons
-
- ${bar_charts_html}
-
-
-
-
-
diff --git a/unified-runtime/scripts/benchmarks/output_html.py b/unified-runtime/scripts/benchmarks/output_html.py
deleted file mode 100644
index 4ba395bc3aac6..0000000000000
--- a/unified-runtime/scripts/benchmarks/output_html.py
+++ /dev/null
@@ -1,340 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
-# See LICENSE.TXT
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-import re
-import os
-from pathlib import Path
-import matplotlib.pyplot as plt
-import mpld3
-from collections import defaultdict
-from dataclasses import dataclass
-import matplotlib.dates as mdates
-from benches.result import BenchmarkRun, Result
-import numpy as np
-from string import Template
-
-
-@dataclass
-class BenchmarkMetadata:
- unit: str
- suite: str
- lower_is_better: bool
-
-
-@dataclass
-class BenchmarkSeries:
- label: str
- metadata: BenchmarkMetadata
- runs: list[BenchmarkRun]
-
-
-@dataclass
-class BenchmarkChart:
- label: str
- suite: str
- html: str
-
-
-def tooltip_css() -> str:
- return ".mpld3-tooltip{background:white;padding:8px;border:1px solid #ddd;border-radius:4px;font-family:monospace;white-space:pre;}"
-
-
-def create_time_series_chart(
- benchmarks: list[BenchmarkSeries], github_repo: str
-) -> list[BenchmarkChart]:
- plt.close("all")
-
- num_benchmarks = len(benchmarks)
- if num_benchmarks == 0:
- return []
-
- html_charts = []
-
- for _, benchmark in enumerate(benchmarks):
- fig, ax = plt.subplots(figsize=(10, 4))
-
- all_values = []
- all_stddevs = []
-
- for run in benchmark.runs:
- sorted_points = sorted(run.results, key=lambda x: x.date)
- dates = [point.date for point in sorted_points]
- values = [point.value for point in sorted_points]
- stddevs = [point.stddev for point in sorted_points]
-
- all_values.extend(values)
- all_stddevs.extend(stddevs)
-
- ax.errorbar(dates, values, yerr=stddevs, fmt="-", label=run.name, alpha=0.5)
- scatter = ax.scatter(dates, values, picker=True)
-
- tooltip_labels = [
- f"Date: {point.date.strftime('%Y-%m-%d %H:%M:%S')}\n"
- f"Value: {point.value:.2f} {benchmark.metadata.unit}\n"
- f"Stddev: {point.stddev:.2f} {benchmark.metadata.unit}\n"
- f"Git Hash: {point.git_hash}"
- for point in sorted_points
- ]
-
- targets = [
- f"https://github.com/{github_repo}/commit/{point.git_hash}"
- for point in sorted_points
- ]
-
- tooltip = mpld3.plugins.PointHTMLTooltip(
- scatter, tooltip_labels, css=tooltip_css(), targets=targets
- )
- mpld3.plugins.connect(fig, tooltip)
-
- ax.set_title(benchmark.label, pad=20)
- performance_indicator = (
- "lower is better"
- if benchmark.metadata.lower_is_better
- else "higher is better"
- )
- ax.text(
- 0.5,
- 1.05,
- f"({performance_indicator})",
- ha="center",
- transform=ax.transAxes,
- style="italic",
- fontsize=7,
- color="#666666",
- )
-
- ax.set_xlabel("")
- unit = benchmark.metadata.unit
- ax.set_ylabel(f"Value ({unit})" if unit else "Value")
- ax.grid(True, alpha=0.2)
- ax.legend(bbox_to_anchor=(1, 1), loc="upper left")
- ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter("%Y-%m-%d %H:%M:%S"))
-
- plt.tight_layout()
- html_charts.append(
- BenchmarkChart(
- html=mpld3.fig_to_html(fig),
- label=benchmark.label,
- suite=benchmark.metadata.suite,
- )
- )
- plt.close(fig)
-
- return html_charts
-
-
-@dataclass
-class ExplicitGroup:
- name: str
- nnames: int
- metadata: BenchmarkMetadata
- runs: dict[str, dict[str, Result]]
-
-
-def create_explicit_groups(
- benchmark_runs: list[BenchmarkRun], compare_names: list[str]
-) -> list[ExplicitGroup]:
- groups = {}
-
- for run in benchmark_runs:
- if run.name in compare_names:
- for res in run.results:
- if res.explicit_group != "":
- if res.explicit_group not in groups:
- groups[res.explicit_group] = ExplicitGroup(
- name=res.explicit_group,
- nnames=len(compare_names),
- metadata=BenchmarkMetadata(
- unit=res.unit,
- lower_is_better=res.lower_is_better,
- suite=res.suite,
- ),
- runs={},
- )
-
- group = groups[res.explicit_group]
- if res.label not in group.runs:
- group.runs[res.label] = {name: None for name in compare_names}
-
- if group.runs[res.label][run.name] is None:
- group.runs[res.label][run.name] = res
-
- return list(groups.values())
-
-
-def create_grouped_bar_charts(groups: list[ExplicitGroup]) -> list[BenchmarkChart]:
- plt.close("all")
-
- html_charts = []
-
- for group in groups:
- fig, ax = plt.subplots(figsize=(10, 6))
-
- x = np.arange(group.nnames)
- x_labels = []
- width = 0.8 / len(group.runs)
-
- max_height = 0
-
- for i, (run_name, run_results) in enumerate(group.runs.items()):
- offset = width * i
-
- positions = x + offset
- x_labels = run_results.keys()
- valid_data = [r.value if r is not None else 0 for r in run_results.values()]
- rects = ax.bar(positions, valid_data, width, label=run_name)
- # This is a hack to disable all bar_label. Setting labels to empty doesn't work.
- # We create our own labels below for each bar, this works better in mpld3.
- ax.bar_label(rects, fmt="")
-
- for rect, run, res in zip(rects, run_results.keys(), run_results.values()):
- if res is None:
- continue
-
- height = rect.get_height()
- if height > max_height:
- max_height = height
-
- ax.text(
- rect.get_x() + rect.get_width() / 2.0,
- height + 1,
- f"{res.value:.1f}",
- ha="center",
- va="bottom",
- fontsize=9,
- )
-
- tooltip_labels = [
- f"Date: {res.date.strftime('%Y-%m-%d %H:%M:%S')}\n"
- f"Run: {run}\n"
- f"Label: {res.label}\n"
- f"Value: {res.value:.2f} {res.unit}\n"
- f"Stddev: {res.stddev:.2f} {res.unit}\n"
- ]
- tooltip = mpld3.plugins.LineHTMLTooltip(
- rect, tooltip_labels, css=tooltip_css()
- )
- mpld3.plugins.connect(ax.figure, tooltip)
-
- # normally we'd just set legend to be outside
- # the chart, but this is not supported by mpld3.
- # instead, we adjust the y axis to account for
- # the height of the bars.
- legend_height = len(group.runs) * 0.1
- ax.set_ylim(0, max_height * (1 + legend_height))
-
- ax.set_xticks([])
- ax.grid(True, axis="y", alpha=0.2)
- ax.set_ylabel(f"Value ({group.metadata.unit})")
- ax.legend(loc="upper left")
- ax.set_title(group.name, pad=20)
- performance_indicator = (
- "lower is better" if group.metadata.lower_is_better else "higher is better"
- )
- ax.text(
- 0.5,
- 1.03,
- f"({performance_indicator})",
- ha="center",
- transform=ax.transAxes,
- style="italic",
- fontsize=7,
- color="#666666",
- )
-
- for idx, label in enumerate(x_labels):
- # this is a hack to get labels to show above the legend
- # we normalize the idx to transAxes transform and offset it a little.
- x_norm = (idx + 0.3 - ax.get_xlim()[0]) / (
- ax.get_xlim()[1] - ax.get_xlim()[0]
- )
- ax.text(x_norm, 1.03, label, transform=ax.transAxes, color="#666666")
-
- plt.tight_layout()
- html_charts.append(
- BenchmarkChart(
- label=group.name,
- html=mpld3.fig_to_html(fig),
- suite=group.metadata.suite,
- )
- )
- plt.close(fig)
-
- return html_charts
-
-
-def process_benchmark_data(
- benchmark_runs: list[BenchmarkRun], compare_names: list[str]
-) -> list[BenchmarkSeries]:
- benchmark_metadata: dict[str, BenchmarkMetadata] = {}
- run_map: dict[str, dict[str, list[Result]]] = defaultdict(lambda: defaultdict(list))
-
- for run in benchmark_runs:
- if run.name not in compare_names:
- continue
-
- for result in run.results:
- if result.label not in benchmark_metadata:
- benchmark_metadata[result.label] = BenchmarkMetadata(
- unit=result.unit,
- lower_is_better=result.lower_is_better,
- suite=result.suite,
- )
-
- result.date = run.date
- result.git_hash = run.git_hash
- run_map[result.label][run.name].append(result)
-
- benchmark_series = []
- for label, metadata in benchmark_metadata.items():
- runs = [
- BenchmarkRun(name=run_name, results=results)
- for run_name, results in run_map[label].items()
- ]
- benchmark_series.append(
- BenchmarkSeries(label=label, metadata=metadata, runs=runs)
- )
-
- return benchmark_series
-
-
-def generate_html(
- benchmark_runs: list[BenchmarkRun], github_repo: str, compare_names: list[str]
-) -> str:
- benchmarks = process_benchmark_data(benchmark_runs, compare_names)
-
- timeseries = create_time_series_chart(benchmarks, github_repo)
- timeseries_charts_html = "\n".join(
- f''
- for ts in timeseries
- )
-
- explicit_groups = create_explicit_groups(benchmark_runs, compare_names)
-
- bar_charts = create_grouped_bar_charts(explicit_groups)
- bar_charts_html = "\n".join(
- f''
- for bc in bar_charts
- )
-
- suite_names = {t.suite for t in timeseries}
- suite_checkboxes_html = " ".join(
- f''
- for suite in suite_names
- )
-
- script_path = os.path.dirname(os.path.realpath(__file__))
- results_template_path = Path(script_path, "benchmark_results.html.template")
- with open(results_template_path, "r") as file:
- html_template = file.read()
-
- template = Template(html_template)
- data = {
- "suite_checkboxes_html": suite_checkboxes_html,
- "timeseries_charts_html": timeseries_charts_html,
- "bar_charts_html": bar_charts_html,
- }
-
- return template.substitute(data)