diff --git a/.github/workflows/ur-build-hw.yml b/.github/workflows/ur-build-hw.yml
index a0f94ab10f538..eebac4e424a4b 100644
--- a/.github/workflows/ur-build-hw.yml
+++ b/.github/workflows/ur-build-hw.yml
@@ -156,4 +156,4 @@ jobs:
 
     - name: Get information about platform
       if: ${{ always() }}
-      run: ${{github.workspace}}/unified-runtime/.github/scripts/get_system_info.sh
+      run: ${{github.workspace}}/devops/scripts/get_system_info.sh
diff --git a/unified-runtime/scripts/benchmarks/README.md b/devops/scripts/benchmarks/README.md
similarity index 100%
rename from unified-runtime/scripts/benchmarks/README.md
rename to devops/scripts/benchmarks/README.md
diff --git a/unified-runtime/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py
similarity index 88%
rename from unified-runtime/scripts/benchmarks/benches/base.py
rename to devops/scripts/benchmarks/benches/base.py
index d1bb5fb53b83a..77365220dbf85 100644
--- a/unified-runtime/scripts/benchmarks/benches/base.py
+++ b/devops/scripts/benchmarks/benches/base.py
@@ -6,7 +6,7 @@
 import os
 import shutil
 from pathlib import Path
-from .result import Result
+from utils.result import Result
 from options import options
 from utils.utils import download, run
 import urllib.request
@@ -55,16 +55,25 @@ def create_data_path(self, name, skip_data_dir=False):
             data_path = os.path.join(self.directory, name)
         else:
             data_path = os.path.join(self.directory, "data", name)
-            if options.rebuild and Path(data_path).exists():
+            if options.redownload and Path(data_path).exists():
                 shutil.rmtree(data_path)
 
         Path(data_path).mkdir(parents=True, exist_ok=True)
 
         return data_path
 
-    def download(self, name, url, file, untar=False, unzip=False, skip_data_dir=False):
+    def download(
+        self,
+        name,
+        url,
+        file,
+        untar=False,
+        unzip=False,
+        skip_data_dir=False,
+        checksum="",
+    ):
         self.data_path = self.create_data_path(name, skip_data_dir)
-        return download(self.data_path, url, file, untar, unzip)
+        return download(self.data_path, url, file, untar, unzip, checksum)
 
     def name(self):
         raise NotImplementedError()
diff --git a/unified-runtime/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
similarity index 53%
rename from unified-runtime/scripts/benchmarks/benches/compute.py
rename to devops/scripts/benchmarks/benches/compute.py
index 4658a3414e16a..4b02c8b97e0cc 100644
--- a/unified-runtime/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -8,10 +8,25 @@
 import io
 from utils.utils import run, git_clone, create_build_path
 from .base import Benchmark, Suite
-from .result import Result
+from utils.result import BenchmarkMetadata, Result
 from options import options
 from enum import Enum
 
+
+class RUNTIMES(Enum):
+    SYCL = "sycl"
+    LEVEL_ZERO = "l0"
+    UR = "ur"
+
+
+def runtime_to_name(runtime: RUNTIMES) -> str:
+    return {
+        RUNTIMES.SYCL: "SYCL",
+        RUNTIMES.LEVEL_ZERO: "Level Zero",
+        RUNTIMES.UR: "Unified Runtime",
+    }[runtime]
+
+
 class ComputeBench(Suite):
     def __init__(self, directory):
         self.directory = directory
@@ -27,7 +42,7 @@ def setup(self):
             self.directory,
             "compute-benchmarks-repo",
             "https://github.com/intel/compute-benchmarks.git",
-            "dfdbf2ff9437ee159627cc2cd9159c289da1a7ba",
+            "b5cc46acf61766ab00da04e85bd4da4f7591eb21",
         )
         build_path = create_build_path(self.directory, "compute-benchmarks-build")
 
@@ -47,13 +62,38 @@ def setup(self):
                 f"-Dunified-runtime_DIR={options.ur}/lib/cmake/unified-runtime",
             ]
 
-        print(f"{self.__class__.__name__}: Run {configure_command}")
         run(configure_command, add_sycl=True)
-        print(f"{self.__class__.__name__}: Run cmake --build {build_path} -j")
+
         run(f"cmake --build {build_path} -j", add_sycl=True)
 
         self.built = True
 
+    def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
+        return {
+            "SubmitKernel": BenchmarkMetadata(
+                type="group",
+                description="Measures CPU time overhead of submitting kernels through different APIs.",
+                notes="Each layer builds on top of the previous layer, adding functionality and overhead.\n"
+                "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API.\n"
+                "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance.\n"
+                "Work is ongoing to reduce the overhead of the SYCL API\n",
+            ),
+            "SinKernelGraph": BenchmarkMetadata(
+                type="group",
+                unstable="This benchmark combines both eager and graph execution, and may not be representative of real use cases.",
+            ),
+        }
+
+    def enabled_runtimes(self, supported_runtimes=None):
+        # all runtimes in the RUNTIMES enum
+        runtimes = supported_runtimes or list(RUNTIMES)
+
+        # Filter out UR if not available
+        if options.ur is None:
+            runtimes = [r for r in runtimes if r != RUNTIMES.UR]
+
+        return runtimes
+
     def benchmarks(self) -> list[Benchmark]:
         if options.sycl is None:
             return []
@@ -61,11 +101,46 @@ def benchmarks(self) -> list[Benchmark]:
         if options.ur_adapter == "cuda":
             return []
 
-        benches = [
-            SubmitKernelL0(self, 0),
-            SubmitKernelL0(self, 1),
-            SubmitKernelSYCL(self, 0),
-            SubmitKernelSYCL(self, 1),
+        benches = []
+
+        # Add SubmitKernel benchmarks using loops
+        for runtime in self.enabled_runtimes():
+            for in_order_queue in [0, 1]:
+                for measure_completion in [0, 1]:
+                    benches.append(
+                        SubmitKernel(self, runtime, in_order_queue, measure_completion)
+                    )
+
+        # Add SinKernelGraph benchmarks
+        for runtime in self.enabled_runtimes():
+            for with_graphs in [0, 1]:
+                for num_kernels in [5, 100]:
+                    benches.append(
+                        GraphApiSinKernelGraph(self, runtime, with_graphs, num_kernels)
+                    )
+
+        # Add ULLS benchmarks
+        for runtime in self.enabled_runtimes([RUNTIMES.SYCL, RUNTIMES.LEVEL_ZERO]):
+            benches.append(UllsEmptyKernel(self, runtime, 1000, 256))
+            benches.append(UllsKernelSwitch(self, runtime, 8, 200, 0, 0, 1, 1))
+
+        # Add GraphApiSubmitGraph benchmarks
+        for runtime in self.enabled_runtimes([RUNTIMES.SYCL]):
+            for in_order_queue in [0, 1]:
+                for num_kernels in [4, 10, 32]:
+                    for measure_completion_time in [0, 1]:
+                        benches.append(
+                            GraphApiSubmitGraph(
+                                self,
+                                runtime,
+                                in_order_queue,
+                                num_kernels,
+                                measure_completion_time,
+                            )
+                        )
+
+        # Add other benchmarks
+        benches += [
             QueueInOrderMemcpy(self, 0, "Device", "Device", 1024),
             QueueInOrderMemcpy(self, 0, "Host", "Device", 1024),
             QueueMemcpy(self, "Device", "Device", 1024),
@@ -73,29 +148,14 @@ def benchmarks(self) -> list[Benchmark]:
             ExecImmediateCopyQueue(self, 0, 1, "Device", "Device", 1024),
             ExecImmediateCopyQueue(self, 1, 1, "Device", "Host", 1024),
             VectorSum(self),
-            MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1),
-            MemcpyExecute(self, 400, 8, 1024, 100, 1, 1, 1),
-            MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1),
-            MemcpyExecute(self, 4096, 4, 1024, 10, 0, 1, 0),
-            GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 0, 5),
-            GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 1, 5),
-            GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 0, 100),
-            GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 1, 100),
-            GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 0, 5),
-            GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 1, 5),
-            GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 0, 100),
-            GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 1, 100),
         ]
 
+        # Add UR-specific benchmarks
         if options.ur is not None:
-            benches += [
-                SubmitKernelUR(self, 0, 0),
-                SubmitKernelUR(self, 1, 0),
-                SubmitKernelUR(self, 1, 1),
-                GraphApiSinKernelGraph(self, RUNTIMES.UR, 0, 5),
-                GraphApiSinKernelGraph(self, RUNTIMES.UR, 1, 5),
-                GraphApiSinKernelGraph(self, RUNTIMES.UR, 0, 100),
-                GraphApiSinKernelGraph(self, RUNTIMES.UR, 1, 100),
+            benches += :
+                MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1),
+                MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1),
+                MemcpyExecute(self, 4096, 4, 1024, 10, 0, 1, 0),
             ]
 
         return benches
@@ -130,6 +190,9 @@ def setup(self):
     def explicit_group(self):
         return ""
 
+    def description(self) -> str:
+        return ""
+
     def run(self, env_vars) -> list[Result]:
         command = [
             f"{self.benchmark_bin}",
@@ -161,6 +224,7 @@ def run(self, env_vars) -> list[Result]:
                     env=env_vars,
                     stdout=result,
                     unit=parse_unit_type(unit),
+                    description=self.description(),
                 )
             )
         return ret
@@ -192,74 +256,49 @@ def teardown(self):
         return
 
 
-class SubmitKernelSYCL(ComputeBenchmark):
-    def __init__(self, bench, ioq):
+class SubmitKernel(ComputeBenchmark):
+    def __init__(self, bench, runtime: RUNTIMES, ioq, measure_completion=0):
         self.ioq = ioq
-        super().__init__(bench, "api_overhead_benchmark_sycl", "SubmitKernel")
+        self.runtime = runtime
+        self.measure_completion = measure_completion
+        super().__init__(
+            bench, f"api_overhead_benchmark_{runtime.value}", "SubmitKernel"
+        )
 
     def name(self):
         order = "in order" if self.ioq else "out of order"
-        return f"api_overhead_benchmark_sycl SubmitKernel {order}"
+        completion_str = " with measure completion" if self.measure_completion else ""
+        return f"api_overhead_benchmark_{self.runtime.value} SubmitKernel {order}{completion_str}"
 
     def explicit_group(self):
-        return "SubmitKernel"
-
-    def bin_args(self) -> list[str]:
-        return [
-            f"--Ioq={self.ioq}",
-            "--DiscardEvents=0",
-            "--MeasureCompletion=0",
-            "--iterations=100000",
-            "--Profiling=0",
-            "--NumKernels=10",
-            "--KernelExecTime=1",
-        ]
-
-
-class SubmitKernelUR(ComputeBenchmark):
-    def __init__(self, bench, ioq, measureCompletion):
-        self.ioq = ioq
-        self.measureCompletion = measureCompletion
-        super().__init__(bench, "api_overhead_benchmark_ur", "SubmitKernel")
-
-    def name(self):
-        order = "in order" if self.ioq else "out of order"
-        return f"api_overhead_benchmark_ur SubmitKernel {order}" + (
-            " with measure completion" if self.measureCompletion else ""
+        return (
+            "SubmitKernel"
+            if self.measure_completion == 0
+            else "SubmitKernel With Completion"
         )
 
-    def explicit_group(self):
-        return "SubmitKernel"
+    def description(self) -> str:
+        order = "in-order" if self.ioq else "out-of-order"
+        runtime_name = runtime_to_name(self.runtime)
 
-    def bin_args(self) -> list[str]:
-        return [
-            f"--Ioq={self.ioq}",
-            "--DiscardEvents=0",
-            f"--MeasureCompletion={self.measureCompletion}",
-            "--iterations=100000",
-            "--Profiling=0",
-            "--NumKernels=10",
-            "--KernelExecTime=1",
-        ]
-
-
-class SubmitKernelL0(ComputeBenchmark):
-    def __init__(self, bench, ioq):
-        self.ioq = ioq
-        super().__init__(bench, "api_overhead_benchmark_l0", "SubmitKernel")
+        completion_desc = ""
+        if self.runtime == RUNTIMES.UR:
+            completion_desc = f", {'including' if self.measure_completion else 'excluding'} kernel completion time"
 
-    def name(self):
-        order = "in order" if self.ioq else "out of order"
-        return f"api_overhead_benchmark_l0 SubmitKernel {order}"
+        l0_specific = ""
+        if self.runtime == RUNTIMES.LEVEL_ZERO:
+            l0_specific = " Uses immediate command lists"
 
-    def explicit_group(self):
-        return "SubmitKernel"
+        return (
+            f"Measures CPU time overhead of submitting {order} kernels through {runtime_name} API{completion_desc}. "
+            f"Runs 10 simple kernels with minimal execution time to isolate API overhead from kernel execution time. {l0_specific}"
+        )
 
     def bin_args(self) -> list[str]:
         return [
             f"--Ioq={self.ioq}",
             "--DiscardEvents=0",
-            "--MeasureCompletion=0",
+            f"--MeasureCompletion={self.measure_completion}",
             "--iterations=100000",
             "--Profiling=0",
             "--NumKernels=10",
@@ -280,6 +319,14 @@ def name(self):
         order = "in order" if self.ioq else "out of order"
         return f"api_overhead_benchmark_sycl ExecImmediateCopyQueue {order} from {self.source} to {self.destination}, size {self.size}"
 
+    def description(self) -> str:
+        order = "in-order" if self.ioq else "out-of-order"
+        operation = "copy-only" if self.isCopyOnly else "copy and command submission"
+        return (
+            f"Measures SYCL {order} queue overhead for {operation} from {self.source} to "
+            f"{self.destination} memory with {self.size} bytes. Tests immediate execution overheads."
+        )
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=100000",
@@ -303,6 +350,13 @@ def __init__(self, bench, isCopyOnly, source, destination, size):
     def name(self):
         return f"memory_benchmark_sycl QueueInOrderMemcpy from {self.source} to {self.destination}, size {self.size}"
 
+    def description(self) -> str:
+        operation = "copy-only" if self.isCopyOnly else "copy and command submission"
+        return (
+            f"Measures SYCL in-order queue memory copy performance for {operation} from "
+            f"{self.source} to {self.destination} with {self.size} bytes, executed 100 times per iteration."
+        )
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=10000",
@@ -324,6 +378,12 @@ def __init__(self, bench, source, destination, size):
     def name(self):
         return f"memory_benchmark_sycl QueueMemcpy from {self.source} to {self.destination}, size {self.size}"
 
+    def description(self) -> str:
+        return (
+            f"Measures general SYCL queue memory copy performance from {self.source} to "
+            f"{self.destination} with {self.size} bytes per operation."
+        )
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=10000",
@@ -343,6 +403,12 @@ def __init__(self, bench, type, size, placement):
     def name(self):
         return f"memory_benchmark_sycl StreamMemory, placement {self.placement}, type {self.type}, size {self.size}"
 
+    def description(self) -> str:
+        return (
+            f"Measures {self.placement} memory bandwidth using {self.type} pattern with "
+            f"{self.size} bytes. Higher values (GB/s) indicate better performance."
+        )
+
     # measurement is in GB/s
     def lower_is_better(self):
         return False
@@ -356,6 +422,7 @@ def bin_args(self) -> list[str]:
             "--useEvents=0",
             "--contents=Zeros",
             "--multiplier=1",
+            "--vectorSize=1",
         ]
 
 
@@ -366,6 +433,12 @@ def __init__(self, bench):
     def name(self):
         return f"miscellaneous_benchmark_sycl VectorSum"
 
+    def description(self) -> str:
+        return (
+            "Measures performance of vector addition across 3D grid (512x256x256 elements) "
+            "using SYCL."
+        )
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=1000",
@@ -402,6 +475,16 @@ def name(self):
             + (" without events" if not self.useEvents else "")
         )
 
+    def description(self) -> str:
+        src_type = "device" if self.srcUSM == 1 else "host"
+        dst_type = "device" if self.dstUSM == 1 else "host"
+        events = "with" if self.useEvents else "without"
+        return (
+            f"Measures multithreaded memory copy performance with {self.numThreads} threads "
+            f"each performing {self.numOpsPerThread} operations on {self.allocSize} bytes "
+            f"from {src_type} to {dst_type} memory {events} events."
+        )
+
     def bin_args(self) -> list[str]:
         return [
             "--Ioq=1",
@@ -417,12 +500,6 @@ def bin_args(self) -> list[str]:
         ]
 
 
-class RUNTIMES(Enum):
-    SYCL = "sycl"
-    LEVEL_ZERO = "l0"
-    UR = "ur"
-
-
 class GraphApiSinKernelGraph(ComputeBenchmark):
     def __init__(self, bench, runtime: RUNTIMES, withGraphs, numKernels):
         self.withGraphs = withGraphs
@@ -435,9 +512,19 @@ def __init__(self, bench, runtime: RUNTIMES, withGraphs, numKernels):
     def explicit_group(self):
         return f"SinKernelGraph {self.numKernels}"
 
+    def description(self) -> str:
+        execution = "using graphs" if self.withGraphs else "without graphs"
+        return (
+            f"Measures {self.runtime.value.upper()} performance when executing {self.numKernels} "
+            f"sin kernels {execution}. Tests overhead and benefits of graph-based execution."
+        )
+
     def name(self):
         return f"graph_api_benchmark_{self.runtime.value} SinKernelGraph graphs:{self.withGraphs}, numKernels:{self.numKernels}"
 
+    def unstable(self) -> str:
+        return "This benchmark combines both eager and graph execution, and may not be representative of real use cases."
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=10000",
@@ -448,26 +535,100 @@ def bin_args(self) -> list[str]:
         ]
 
 
-class GraphApiSubmitExecGraph(ComputeBenchmark):
-    def __init__(self, bench, ioq, submit, numKernels):
-        self.ioq = ioq
-        self.submit = submit
+class GraphApiSubmitGraph(ComputeBenchmark):
+    def __init__(
+        self, bench, runtime: RUNTIMES, inOrderQueue, numKernels, measureCompletionTime
+    ):
+        self.inOrderQueue = inOrderQueue
         self.numKernels = numKernels
-        super().__init__(bench, "graph_api_benchmark_sycl", "SubmitExecGraph")
+        self.runtime = runtime
+        self.measureCompletionTime = measureCompletionTime
+        super().__init__(bench, f"graph_api_benchmark_{runtime.value}", "SubmitGraph")
+
+    def explicit_group(self):
+        return f"SubmitGraph {self.numKernels}"
+
+    def description(self) -> str:
+        return (
+            f"Measures {self.runtime.value.upper()} performance when executing {self.numKernels} "
+            f"trivial kernels using graphs. Tests overhead and benefits of graph-based execution."
+        )
 
     def name(self):
-        return f"graph_api_benchmark_sycl SubmitExecGraph ioq:{self.ioq}, submit:{self.submit}, numKernels:{self.numKernels}"
+        return f"graph_api_benchmark_{self.runtime.value} SubmitGraph numKernels:{self.numKernels} ioq {self.inOrderQueue} measureCompletion {self.measureCompletionTime}"
+
+    def bin_args(self) -> list[str]:
+        return [
+            "--iterations=10000",
+            f"--NumKernels={self.numKernels}",
+            f"--MeasureCompletionTime={self.measureCompletionTime}",
+            f"--InOrderQueue={self.inOrderQueue}",
+            "--Profiling=0",
+            "--KernelExecutionTime=1",
+        ]
+
+
+class UllsEmptyKernel(ComputeBenchmark):
+    def __init__(self, bench, runtime: RUNTIMES, wgc, wgs):
+        self.wgc = wgc
+        self.wgs = wgs
+        self.runtime = runtime
+        super().__init__(bench, f"ulls_benchmark_{runtime.value}", "EmptyKernel")
+
+    def explicit_group(self):
+        return f"EmptyKernel {self.wgc} {self.wgs}"
+
+    def description(self) -> str:
+        return ""
+
+    def name(self):
+        return f"ulls_benchmark_{self.runtime.value} EmptyKernel wgc:{self.wgc}, wgs:{self.wgs}"
+
+    def bin_args(self) -> list[str]:
+        return [
+            "--iterations=10000",
+            f"--wgs={self.wgs}",
+            f"--wgc={self.wgs}",
+        ]
+
+
+class UllsKernelSwitch(ComputeBenchmark):
+    def __init__(
+        self,
+        bench,
+        runtime: RUNTIMES,
+        count,
+        kernelTime,
+        barrier,
+        hostVisible,
+        ioq,
+        ctrBasedEvents,
+    ):
+        self.count = count
+        self.kernelTime = kernelTime
+        self.barrier = barrier
+        self.hostVisible = hostVisible
+        self.ctrBasedEvents = ctrBasedEvents
+        self.runtime = runtime
+        self.ioq = ioq
+        super().__init__(bench, f"ulls_benchmark_{runtime.value}", "KernelSwitch")
 
     def explicit_group(self):
-        if self.submit:
-            return "SubmitGraph"
-        else:
-            return "ExecGraph"
+        return f"KernelSwitch {self.count} {self.kernelTime}"
+
+    def description(self) -> str:
+        return ""
+
+    def name(self):
+        return f"ulls_benchmark_{self.runtime.value} KernelSwitch count {self.count} kernelTime {self.kernelTime}"
 
     def bin_args(self) -> list[str]:
         return [
-            "--iterations=100",
-            f"--measureSubmit={self.submit}",
+            "--iterations=1000",
+            f"--count={self.count}",
+            f"--kernelTime={self.kernelTime}",
+            f"--barrier={self.barrier}",
+            f"--hostVisible={self.hostVisible}",
             f"--ioq={self.ioq}",
-            f"--numKernels={self.numKernels}",
+            f"--ctrBasedEvents={self.ctrBasedEvents}",
         ]
diff --git a/unified-runtime/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py
similarity index 86%
rename from unified-runtime/scripts/benchmarks/benches/llamacpp.py
rename to devops/scripts/benchmarks/benches/llamacpp.py
index 6524c95a9f56f..d8e0ab5d007bb 100644
--- a/unified-runtime/scripts/benchmarks/benches/llamacpp.py
+++ b/devops/scripts/benchmarks/benches/llamacpp.py
@@ -8,10 +8,10 @@
 from pathlib import Path
 from utils.utils import download, git_clone
 from .base import Benchmark, Suite
-from .result import Result
+from utils.result import Result
 from utils.utils import run, create_build_path
 from options import options
-from .oneapi import get_oneapi
+from utils.oneapi import get_oneapi
 import os
 
 
@@ -43,6 +43,7 @@ def setup(self):
             self.models_dir,
             "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf",
             "Phi-3-mini-4k-instruct-q4.gguf",
+            checksum="fc4f45c9729874a33a527465b2ec78189a18e5726b7121182623feeae38632ace4f280617b01d4a04875acf49d263ee4",
         )
 
         self.oneapi = get_oneapi()
@@ -62,9 +63,9 @@ def setup(self):
             f'-DCMAKE_CXX_FLAGS=-I"{self.oneapi.mkl_include()}"',
             f"-DCMAKE_SHARED_LINKER_FLAGS=-L{self.oneapi.compiler_lib()} -L{self.oneapi.mkl_lib()}",
         ]
-        print(f"{self.__class__.__name__}: Run {configure_command}")
+
         run(configure_command, add_sycl=True)
-        print(f"{self.__class__.__name__}: Run cmake --build {self.build_path} -j")
+
         run(
             f"cmake --build {self.build_path} -j",
             add_sycl=True,
@@ -92,6 +93,14 @@ def setup(self):
     def name(self):
         return f"llama.cpp"
 
+    def description(self) -> str:
+        return (
+            "Performance testing tool for llama.cpp that measures LLM inference speed in tokens per second. "
+            "Runs both prompt processing (initial context processing) and text generation benchmarks with "
+            "different batch sizes. Higher values indicate better performance. Uses the Phi-3-mini-4k-instruct "
+            "quantized model and leverages SYCL with oneDNN for acceleration."
+        )
+
     def lower_is_better(self):
         return False
 
@@ -130,6 +139,7 @@ def run(self, env_vars) -> list[Result]:
                     env=env_vars,
                     stdout=result,
                     unit="token/s",
+                    description=self.description()
                 )
             )
         return results
diff --git a/unified-runtime/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py
similarity index 93%
rename from unified-runtime/scripts/benchmarks/benches/syclbench.py
rename to devops/scripts/benchmarks/benches/syclbench.py
index f7cf571a7ecd7..47326b2555a68 100644
--- a/unified-runtime/scripts/benchmarks/benches/syclbench.py
+++ b/devops/scripts/benchmarks/benches/syclbench.py
@@ -8,7 +8,7 @@
 import io
 from utils.utils import run, git_clone, create_build_path
 from .base import Benchmark, Suite
-from .result import Result
+from utils.result import Result
 from options import options
 
 
@@ -65,14 +65,14 @@ def benchmarks(self) -> list[Benchmark]:
             DagTaskS(self),
             HostDevBandwidth(self),
             LocalMem(self),
-            Pattern_L2(self),
-            Reduction(self),
+            # Pattern_L2(self), # validation failure
+            # Reduction(self), # validation failure
             ScalarProd(self),
             SegmentReduction(self),
-            UsmAccLatency(self),
+            # UsmAccLatency(self), # validation failure
             UsmAllocLatency(self),
-            UsmInstrMix(self),
-            UsmPinnedOverhead(self),
+            # UsmInstrMix(self), # validation failure
+            # UsmPinnedOverhead(self), # validation failure
             VecAdd(self),
             # *** sycl-bench single benchmarks
             # TwoDConvolution(self), # run time < 1ms
@@ -82,20 +82,20 @@ def benchmarks(self) -> list[Benchmark]:
             Atax(self),
             # Atomic_reduction(self), # run time < 1ms
             Bicg(self),
-            Correlation(self),
-            Covariance(self),
-            Gemm(self),
-            Gesumv(self),
-            Gramschmidt(self),
+            # Correlation(self), # validation failure
+            # Covariance(self), # validation failure
+            # Gemm(self), # validation failure
+            # Gesumv(self), # validation failure
+            # Gramschmidt(self), # validation failure
             KMeans(self),
             LinRegCoeff(self),
             # LinRegError(self), # run time < 1ms
-            MatmulChain(self),
+            # MatmulChain(self), # validation failure
             MolDyn(self),
-            Mvt(self),
+            # Mvt(self), # validation failure
             Sf(self),
-            Syr2k(self),
-            Syrk(self),
+            # Syr2k(self), # validation failure
+            # Syrk(self), # validation failure
         ]
 
 
@@ -122,7 +122,7 @@ def run(self, env_vars) -> list[Result]:
         if self.done:
             return
         self.outputfile = os.path.join(self.bench.directory, self.test + ".csv")
-        print(f"{self.__class__.__name__}: Results in {self.outputfile}")
+
         command = [
             f"{self.benchmark_bin}",
             f"--warmup-run",
@@ -143,7 +143,7 @@ def run(self, env_vars) -> list[Result]:
                 if not row[0].startswith("#"):
                     res_list.append(
                         Result(
-                            label=row[0],
+                            label=f"{self.name()} {row[0]}",
                             value=float(row[12]) * 1000,  # convert to ms
                             passed=(row[1] == "PASS"),
                             command=command,
@@ -161,7 +161,7 @@ def teardown(self):
         return
 
     def name(self):
-        return self.test
+        return f"{self.bench.name()} {self.test}"
 
 
 # multi benchmarks
diff --git a/unified-runtime/scripts/benchmarks/benches/test.py b/devops/scripts/benchmarks/benches/test.py
similarity index 80%
rename from unified-runtime/scripts/benchmarks/benches/test.py
rename to devops/scripts/benchmarks/benches/test.py
index 06eac12b25344..18794d4e9c73c 100644
--- a/unified-runtime/scripts/benchmarks/benches/test.py
+++ b/devops/scripts/benchmarks/benches/test.py
@@ -6,7 +6,7 @@
 import random
 from utils.utils import git_clone
 from .base import Benchmark, Suite
-from .result import Result
+from utils.result import Result
 from utils.utils import run, create_build_path
 from options import options
 import os
@@ -19,6 +19,9 @@ def __init__(self):
     def setup(self):
         return
 
+    def name(self) -> str:
+        return "Test Suite"
+
     def benchmarks(self) -> list[Benchmark]:
         bench_configs = [
             ("Memory Bandwidth", 2000, 200, "Foo Group"),
@@ -36,18 +39,18 @@ def benchmarks(self) -> list[Benchmark]:
                 value = base_value * value_multiplier
                 diff = base_diff * value_multiplier
 
-                result.append(TestBench(name, value, diff, group))
+                result.append(TestBench(self, name, value, diff, group))
 
         return result
 
 
 class TestBench(Benchmark):
-    def __init__(self, name, value, diff, group=""):
+    def __init__(self, suite, name, value, diff, group=""):
+        super().__init__("", suite)
         self.bname = name
         self.value = value
         self.diff = diff
         self.group = group
-        super().__init__("")
 
     def name(self):
         return self.bname
@@ -58,6 +61,9 @@ def lower_is_better(self):
     def setup(self):
         return
 
+    def description(self) -> str:
+        return f"This is a test benchmark for {self.bname}."
+
     def run(self, env_vars) -> list[Result]:
         random_value = self.value + random.uniform(-1 * (self.diff), self.diff)
         return [
@@ -65,10 +71,11 @@ def run(self, env_vars) -> list[Result]:
                 label=self.name(),
                 explicit_group=self.group,
                 value=random_value,
-                command="",
+                command=["test", "--arg1", "foo"],
                 env={"A": "B"},
                 stdout="no output",
                 unit="ms",
+                description=self.description(),
             )
         ]
 
diff --git a/unified-runtime/scripts/benchmarks/benches/umf.py b/devops/scripts/benchmarks/benches/umf.py
similarity index 97%
rename from unified-runtime/scripts/benchmarks/benches/umf.py
rename to devops/scripts/benchmarks/benches/umf.py
index c7b767f02bbe1..1f736e7755f92 100644
--- a/unified-runtime/scripts/benchmarks/benches/umf.py
+++ b/devops/scripts/benchmarks/benches/umf.py
@@ -6,10 +6,10 @@
 import random
 from utils.utils import git_clone
 from .base import Benchmark, Suite
-from .result import Result
+from utils.result import Result
 from utils.utils import run, create_build_path
 from options import options
-from .oneapi import get_oneapi
+from utils.oneapi import get_oneapi
 import os
 import csv
 import io
@@ -22,8 +22,6 @@ def isUMFAvailable():
 class UMFSuite(Suite):
     def __init__(self, directory):
         self.directory = directory
-        if not isUMFAvailable():
-            print("UMF not provided. Related benchmarks will not run")
 
     def name(self) -> str:
         return "UMF"
diff --git a/unified-runtime/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py
similarity index 79%
rename from unified-runtime/scripts/benchmarks/benches/velocity.py
rename to devops/scripts/benchmarks/benches/velocity.py
index b7d06cbe4a3a2..be36c47ca36d5 100644
--- a/unified-runtime/scripts/benchmarks/benches/velocity.py
+++ b/devops/scripts/benchmarks/benches/velocity.py
@@ -7,10 +7,10 @@
 import shutil
 from utils.utils import git_clone
 from .base import Benchmark, Suite
-from .result import Result
+from utils.result import Result
 from utils.utils import run, create_build_path
 from options import options
-from .oneapi import get_oneapi
+from utils.oneapi import get_oneapi
 import shutil
 
 import os
@@ -115,6 +115,9 @@ def extra_env_vars(self) -> dict:
     def parse_output(self, stdout: str) -> float:
         raise NotImplementedError()
 
+    def description(self) -> str:
+        return ""
+
     def run(self, env_vars) -> list[Result]:
         env_vars.update(self.extra_env_vars())
 
@@ -133,6 +136,7 @@ def run(self, env_vars) -> list[Result]:
                 env=env_vars,
                 stdout=result,
                 unit=self.unit,
+                description=self.description()
             )
         ]
 
@@ -147,6 +151,12 @@ def __init__(self, vb: VelocityBench):
     def name(self):
         return "Velocity-Bench Hashtable"
 
+    def description(self) -> str:
+        return (
+            "Measures hash table search performance using an efficient lock-free algorithm with linear probing. "
+            "Reports throughput in millions of keys processed per second. Higher values indicate better performance."
+        )
+
     def bin_args(self) -> list[str]:
         return ["--no-verify"]
 
@@ -170,6 +180,13 @@ def __init__(self, vb: VelocityBench):
     def name(self):
         return "Velocity-Bench Bitcracker"
 
+    def description(self) -> str:
+        return (
+            "Password-cracking application for BitLocker-encrypted memory units. "
+            "Uses dictionary attack to find user or recovery passwords. "
+            "Measures total time required to process 60000 passwords."
+        )
+
     def bin_args(self) -> list[str]:
         self.data_path = os.path.join(self.vb.repo_path, "bitcracker", "hash_pass")
 
@@ -204,11 +221,19 @@ def download_deps(self):
             "https://github.com/oneapi-src/Velocity-Bench/raw/main/sobel_filter/res/sobel_filter_data.tgz?download=",
             "sobel_filter_data.tgz",
             untar=True,
+            checksum="7fc62aa729792ede80ed8ae70fb56fa443d479139c5888ed4d4047b98caec106687a0f05886a9ced77922ccba7f65e66",
         )
 
     def name(self):
         return "Velocity-Bench Sobel Filter"
 
+    def description(self) -> str:
+        return (
+            "Popular RGB-to-grayscale image conversion technique that applies a gaussian filter "
+            "to reduce edge artifacts. Processes a large 32K x 32K image and measures "
+            "the time required to apply the filter."
+        )
+
     def bin_args(self) -> list[str]:
         return [
             "-i",
@@ -249,6 +274,13 @@ def run(self, env_vars) -> list[Result]:
     def name(self):
         return "Velocity-Bench QuickSilver"
 
+    def description(self) -> str:
+        return (
+            "Solves a simplified dynamic Monte Carlo particle-transport problem used in HPC. "
+            "Replicates memory access patterns, communication patterns, and branching of Mercury workloads. "
+            "Reports a figure of merit in MMS/CTT where higher values indicate better performance."
+        )
+
     def lower_is_better(self):
         return False
 
@@ -279,14 +311,22 @@ def __init__(self, vb: VelocityBench):
     def download_deps(self):
         self.download(
             "easywave",
-            "https://git.gfz-potsdam.de/id2/geoperil/easyWave/-/raw/master/data/examples.tar.gz",
+            "https://gitlab.oca.eu/AstroGeoGPM/eazyWave/-/raw/master/data/examples.tar.gz",
             "examples.tar.gz",
             untar=True,
+            checksum="3b0cd0efde10122934ba6db8451b8c41f4f95a3370fc967fc5244039ef42aae7e931009af1586fa5ed2143ade8ed47b1",
         )
 
     def name(self):
         return "Velocity-Bench Easywave"
 
+    def description(self) -> str:
+        return (
+            "A tsunami wave simulator used for researching tsunami generation and wave propagation. "
+            "Measures the elapsed time in milliseconds to simulate a specified tsunami event "
+            "based on real-world data."
+        )
+
     def bin_args(self) -> list[str]:
         return [
             "-grid",
@@ -341,6 +381,13 @@ def download_deps(self):
     def name(self):
         return "Velocity-Bench CudaSift"
 
+    def description(self) -> str:
+        return (
+            "Implementation of the SIFT (Scale Invariant Feature Transform) algorithm "
+            "for detecting, describing, and matching local features in images. "
+            "Measures average processing time in milliseconds."
+        )
+
     def parse_output(self, stdout: str) -> float:
         match = re.search(r"Avg workload time = (\d+\.\d+) ms", stdout)
         if match:
@@ -364,6 +411,7 @@ def download_deps(self):
             "cifar-10-binary.tar.gz",
             untar=True,
             skip_data_dir=True,
+            checksum="974b1bd62da0cb3b7a42506d42b1e030c9a0cb4a0f2c359063f9c0e65267c48f0329e4493c183a348f44ddc462eaf814",
         )
         return
 
@@ -382,6 +430,13 @@ def extra_cmake_args(self):
     def name(self):
         return "Velocity-Bench dl-cifar"
 
+    def description(self) -> str:
+        return (
+            "Deep learning image classification workload based on the CIFAR-10 dataset "
+            "of 60,000 32x32 color images in 10 classes. Uses neural networks to "
+            "classify input images and measures total calculation time."
+        )
+
     def parse_output(self, stdout: str) -> float:
         match = re.search(
             r"dl-cifar - total time for whole calculation: (\d+\.\d+) s", stdout
@@ -407,6 +462,7 @@ def download_deps(self):
             "train-images.idx3-ubyte.gz",
             unzip=True,
             skip_data_dir=True,
+            checksum="f40eb179f7c3d2637e789663bde56d444a23e4a0a14477a9e6ed88bc39c8ad6eaff68056c0cd9bb60daf0062b70dc8ee",
         )
         self.download(
             "datasets",
@@ -414,6 +470,7 @@ def download_deps(self):
             "train-labels.idx1-ubyte.gz",
             unzip=True,
             skip_data_dir=True,
+            checksum="ba9c11bf9a7f7c2c04127b8b3e568cf70dd3429d9029ca59b7650977a4ac32f8ff5041fe42bc872097487b06a6794e00",
         )
         self.download(
             "datasets",
@@ -421,6 +478,7 @@ def download_deps(self):
             "t10k-images.idx3-ubyte.gz",
             unzip=True,
             skip_data_dir=True,
+            checksum="1bf45877962fd391f7abb20534a30fd2203d0865309fec5f87d576dbdbefdcb16adb49220afc22a0f3478359d229449c",
         )
         self.download(
             "datasets",
@@ -428,6 +486,7 @@ def download_deps(self):
             "t10k-labels.idx1-ubyte.gz",
             unzip=True,
             skip_data_dir=True,
+            checksum="ccc1ee70f798a04e6bfeca56a4d0f0de8d8eeeca9f74641c1e1bfb00cf7cc4aa4d023f6ea1b40e79bb4707107845479d",
         )
 
     def extra_cmake_args(self):
@@ -445,6 +504,13 @@ def extra_cmake_args(self):
     def name(self):
         return "Velocity-Bench dl-mnist"
 
+    def description(self) -> str:
+        return (
+            "Digit recognition based on the MNIST database, one of the oldest and most popular "
+            "databases of handwritten digits. Uses neural networks to identify digits "
+            "and measures total calculation time."
+        )
+
     def bin_args(self):
         return ["-conv_algo", "ONEDNN_AUTO"]
 
@@ -488,6 +554,13 @@ def extra_cmake_args(self):
     def name(self):
         return "Velocity-Bench svm"
 
+    def description(self) -> str:
+        return (
+            "Implementation of Support Vector Machine, a popular classical machine learning technique. "
+            "Uses supervised learning models with associated algorithms to analyze data "
+            "for classification and regression analysis. Measures total elapsed time."
+        )
+
     def bin_args(self):
         return [
             f"{self.code_path}/a9a",
diff --git a/unified-runtime/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
similarity index 84%
rename from unified-runtime/scripts/benchmarks/history.py
rename to devops/scripts/benchmarks/history.py
index 7902aa4f04c35..2bb0b9db8ea38 100644
--- a/unified-runtime/scripts/benchmarks/history.py
+++ b/devops/scripts/benchmarks/history.py
@@ -6,7 +6,7 @@
 import os
 import json
 from pathlib import Path
-from benches.result import Result, BenchmarkRun
+from utils.result import Result, BenchmarkRun
 from options import Compare, options
 from datetime import datetime, timezone
 from utils.utils import run
@@ -63,12 +63,29 @@ def create_run(self, name: str, results: list[Result]) -> BenchmarkRun:
         try:
             result = run("git rev-parse --short HEAD")
             git_hash = result.stdout.decode().strip()
+
+            # Get the GitHub repo URL from git remote
+            remote_result = run("git remote get-url origin")
+            remote_url = remote_result.stdout.decode().strip()
+
+            # Convert SSH or HTTPS URL to owner/repo format
+            if remote_url.startswith("git@github.com:"):
+                # SSH format: git@github.com:owner/repo.git
+                github_repo = remote_url.split("git@github.com:")[1].rstrip(".git")
+            elif remote_url.startswith("https://github.com/"):
+                # HTTPS format: https://github.com/owner/repo.git
+                github_repo = remote_url.split("https://github.com/")[1].rstrip(".git")
+            else:
+                github_repo = None
+
         except:
             git_hash = "unknown"
+            github_repo = None
 
         return BenchmarkRun(
             name=name,
             git_hash=git_hash,
+            github_repo=github_repo,
             date=datetime.now(tz=timezone.utc),
             results=results,
         )
diff --git a/devops/scripts/benchmarks/html/config.js b/devops/scripts/benchmarks/html/config.js
new file mode 100644
index 0000000000000..c1210b2b21da5
--- /dev/null
+++ b/devops/scripts/benchmarks/html/config.js
@@ -0,0 +1,5 @@
+const config = {
+    remoteDataUrl: ''
+};
+// defaultCompareNames = [];
+// suiteNames = [];
diff --git a/devops/scripts/benchmarks/html/data.js b/devops/scripts/benchmarks/html/data.js
new file mode 100644
index 0000000000000..bd2a4bb9c6f36
--- /dev/null
+++ b/devops/scripts/benchmarks/html/data.js
@@ -0,0 +1,16 @@
+benchmarkRuns = [
+{"results": [{"label": "Memory Bandwidth 1", "value": 2040.8882991390067, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 34.457610431783294, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2529.3774380653363, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 135.81200692232412, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2719.8110231537125, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 162.32053564116694, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3227.632839523546, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 165.72010893383725, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3514.4167999909496, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 203.05909225714902, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4012.1042760150494, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 213.80137392913923, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 103.58153862508325, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 11.155836817249414, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 125.92477357063481, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.26567067278589, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 133.83240260210536, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.763812811796768, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 156.26773548103202, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.861842969825087, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 167.3255955272463, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 24.48929969639468, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 220.49290675578928, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.900958177754223, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1480.3642886335488, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 97.14840825777334, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1757.3646882744213, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 94.97795059309506, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2141.760057641498, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 32.20444501013399, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2465.113025920638, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 142.56485787432257, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2646.9736547641232, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 165.21303041397977, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2797.023188351585, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 49.789332852672736, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3072.2144224296385, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 100.0435838937749, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3645.5868819428038, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 186.63713430054412, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4365.696214338321, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 70.80581668642078, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4712.424975602965, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 237.2219789185776, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5490.717140126425, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 102.98496803461086, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5899.69529717778, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 365.8281107263356, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 249.0033673842501, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 12.641649890532847, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 307.2248975403931, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.106532892713558, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 364.94516101524755, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 22.487184395370704, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 415.1825140704191, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 4.837117436872584, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 440.50926932373267, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 6.400527065008065, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 513.2345717731824, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.92653205921289, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "13462f5f6", "github_repo": "pbalcer/llvm", "date": "2025-03-07T14:04:12.881983+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 2061.891541779758, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 45.43418752146129, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2418.370570307403, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 23.41390025375235, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2759.548256219084, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 140.04750469338484, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3268.9851244693905, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 179.65245219605663, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3573.980571932074, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 174.27214661339116, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3913.178724155857, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 187.41955301323392, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 96.66099349103821, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 9.949437203365676, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 116.94033117978861, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.670085238288802, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 141.8516673102208, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.49397378099331, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 154.47973126513787, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.7581068444608, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 194.47100906915202, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.603348605481727, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 189.26766261792042, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 22.80270435298115, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1548.0366148601304, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 22.556620202365167, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1804.0612981627564, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 130.9251933818919, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2117.020524938414, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 124.18576268885376, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2340.6226309817375, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 45.23157229205414, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2657.435335624127, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 178.93395582367347, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3100.1660243239976, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 59.26661177659249, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2973.0427624231074, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 133.47659228805884, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3499.50915562217, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 202.92584935080856, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 3906.063346066898, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 58.67588644266499, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4776.315860317371, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 337.294287649651, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5294.515316259128, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 310.6460231086305, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5883.364679907042, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 433.9862905464425, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 247.81458542543336, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.259893742055365, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 301.324345463754, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 7.537217356717523, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 350.317230088579, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.694135619195492, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 404.94767826325585, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 24.03967001195265, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 448.68781789313334, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 37.68940635002855, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 479.7145913704619, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 29.819332357308436, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "52dba2a69", "github_repo": "pbalcer/llvm", "date": "2025-03-07T13:48:42.727410+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 1944.712475358489, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 137.3517754822544, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2494.968647183357, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 144.62096222735542, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2827.96959627778, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 161.09215987917975, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3246.4235207906368, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 194.8841813593721, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3415.497030173447, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 207.51586434688852, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3947.173405699456, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 208.35155081978226, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 96.27501062264594, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 10.62997659996243, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 129.58001802257706, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.223861407928204, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 152.60658050771121, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.644344734962786, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 157.8365309090243, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 1.9279203474927489, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 179.69325992783263, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.567971182588, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 190.29777300705297, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.545022416801082, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1520.7774888153917, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 69.44363449416652, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1841.9402998174073, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 36.99472050334539, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2063.573372718332, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 103.76799421011498, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2411.1299338593512, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 157.55096124823987, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2636.4186072468115, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 136.15002376636508, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3012.5429889405455, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 220.10345804333795, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2912.3694681990496, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 208.24541212948046, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3634.840665141933, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 205.90393111568957, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4221.70291649172, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 245.0992536434908, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4563.9141528786395, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 148.15450755100105, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5449.735755715656, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 283.67446282594074, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6103.288896553245, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 497.0264510256128, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 247.1162346822855, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.349695364944424, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 301.0848370650819, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.091832690685845, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 368.2173261284879, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.911533458328602, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 400.932628864893, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.298171550718916, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 465.45774333645085, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 27.008461742975705, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 494.19807030391513, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 31.290996975880688, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "a15019b41", "github_repo": "pbalcer/llvm", "date": "2025-03-07T13:42:53.963514+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 1971.9235866578244, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 107.4119769093561, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2381.359513168276, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 158.1820922785026, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2816.164331241929, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 152.82523354152792, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3207.788500404049, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 203.98152700892044, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3612.0807949868076, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 238.29524372895352, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4041.187128183399, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 244.78707963276804, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 110.17204676929632, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 11.7488792731298, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 110.04874446073308, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.111000761355566, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 139.80726599267632, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.761524761674202, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 167.65946901880108, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.961270297928603, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 175.07359940308456, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.654053542209933, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 188.92280945420617, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.32935674842163, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1498.3892879578825, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 72.76968286004643, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1802.449855059067, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 117.35877323708975, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2141.6873668536814, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 109.1211656598374, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2481.234320462784, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 142.29288921121633, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2592.315439130817, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 171.50618527958042, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2986.630322110839, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 134.14155338256344, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3023.0069882524413, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 137.0861804957972, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3491.2685416445424, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 208.82885721897767, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4267.684357012167, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 258.535523100285, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4833.943488351638, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 288.5816839229039, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5460.197706764911, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 294.3526928188145, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6211.479518188777, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 448.53753098503586, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 248.60974821168077, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 12.966964309950376, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 299.08129766722294, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.458275817843905, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 345.13218478336375, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.88260705972654, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 368.43448345001804, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 7.0293359056239115, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 462.81719243303485, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 29.16929631101137, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 498.84520836251704, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 7.943372517547482, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "461343280", "github_repo": "pbalcer/llvm", "date": "2025-03-07T13:37:14.849756+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 2013.395440288061, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 119.82142134259605, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2432.2596423503755, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 118.39327416892019, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2674.0160578165187, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 194.41545828080007, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3063.9534832147688, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 205.67379884852215, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3584.672342581568, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 181.67353531675607, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4125.180591214061, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 273.2758074594961, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 106.37633318466106, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 6.247008579218756, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 111.99312616915259, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.168574067720925, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 148.4561344088857, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.59295361046173, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 162.0852714518944, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.380760230770385, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 187.04637816265117, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 22.658051327117878, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 200.16012739025047, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.6645406941134, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1505.183607875215, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 93.57793481885791, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1786.864494698917, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 122.1347513455775, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2104.854088217566, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 128.42311038597916, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2373.3921231994896, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 140.26128420435194, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2680.62360254391, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 184.49504836547473, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2957.0424468763595, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 203.13611056356788, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3024.0197501043167, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 155.3618836169113, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3658.757514096598, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 149.8130576669698, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4336.791327103415, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 267.10403249537495, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4594.550884548686, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 339.1255595981214, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5619.202557626439, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 324.7429329550701, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6145.450470023206, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 397.2604324517752, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 242.7598020860891, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 4.503364581661284, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 295.888600531132, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.878793912236713, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 333.6634181341022, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 27.945944118430873, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 386.559044229885, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.909652211845977, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 433.56985826314695, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.16786402230611, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 475.40739140041325, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 6.532574731353257, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "461343280", "github_repo": "pbalcer/llvm", "date": "2025-03-07T12:55:23.831147+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 2036.879511822098, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 147.49123010982262, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2358.605120547564, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 148.31108709325747, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2782.758869742085, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 137.07850443580668, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3211.303768537726, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 160.64603088602735, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3726.2788114170226, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 203.68455828387613, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4034.451298605878, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 214.04589132488434, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 97.81132147931729, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 11.4388910648024, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 123.47877514885052, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.850644538343035, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 138.3636972712076, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.453475343660529, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 159.0926504710019, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.406923335827646, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 177.58148765355367, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.719641698346496, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 213.78191902260386, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.56513730925096, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1508.4347909839335, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 96.90540186941426, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1765.9068352126365, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 83.00665769599348, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2079.3459975121978, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 129.25159465427944, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2370.0084472113276, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 110.2565848005119, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2598.252204318904, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 170.98495052891545, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2969.9956302642463, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 157.29990951898574, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2929.264699223759, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 158.51544383864362, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3605.747338045167, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 208.72266927612378, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4169.092383202888, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 221.65028734739832, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4342.400927657371, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 10.226688336643164, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5335.841345368252, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 322.69883423073804, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5891.394678938614, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 442.78667173376004, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 253.57797655240805, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.797128115716593, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 300.17543480746747, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.95344804548685, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 353.0001179231053, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.30650858255822, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 393.61574583773006, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 29.460697740276498, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 411.7013399749935, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 2.8389196983489504, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 493.65540609194693, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 32.30948655635452, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "59d88dae7", "github_repo": "pbalcer/llvm", "date": "2025-03-07T12:49:15.115091+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 2195.552651542308, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 40.940741416639945, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2207.459054225258, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 31.681573504875555, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2791.852261483982, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 145.62649882463464, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3134.2219672329984, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 168.02514783326134, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3767.7635130447607, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 157.24591155046014, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3942.521187753682, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 228.82977417585033, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 100.809622959215, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 11.473952358992248, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 123.83059821116996, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.60938099214386, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 140.93982647796008, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.29049957344098, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 157.82319101117525, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.247880470121356, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 177.31431566581708, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.811044444821867, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 217.37228664795157, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.08328831134193, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1549.1191711106521, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 100.63323493526255, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1748.2566655197188, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 125.49717792070385, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2038.1492661325733, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 101.90033883093976, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2435.624131184369, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 158.4633804704484, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2625.115911806016, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 142.00862169479268, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3041.342229934156, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 168.4496950355338, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2937.258997841614, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 155.30016809201283, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3538.971007263721, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 226.88178732022945, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4063.7149977059134, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 317.4858199901966, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4911.07807577187, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 250.7864115701977, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5377.1846970238585, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 306.0068346396366, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6245.575950509069, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 298.97595013407596, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 247.84781710540977, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.78683687151215, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 295.5304009113721, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.652016327478979, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 357.4112170450192, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.461446948742276, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 395.8114457367419, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.580352011562915, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 449.871031326954, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 30.053959147816688, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 504.6580132142422, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 29.41875628689506, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "PR1234", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T11:58:34.927820+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 1958.784118312001, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 126.57484819538932, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2440.601149884664, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 158.0533346583976, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2721.428822801097, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 249.6308268113163, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3177.0055972660625, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 146.92056751044575, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3549.5230383598678, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 234.94466209634086, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3978.0960993946674, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 188.9037213571779, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 103.09498391363023, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 12.02579026210347, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 109.08496102147217, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.749411126280116, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 161.69893522471634, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 2.4430257786783773, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 162.34529521039352, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 2.7714067922127894, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 170.86523239479655, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.608020176521034, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 181.05706010508592, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.277369339946695, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1463.0649649228315, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 86.83848693136936, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1864.683141120113, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 86.4841206172361, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2130.758830413485, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 160.54699391922728, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2381.8935399566794, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 144.76036506870986, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2662.7577579295776, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 132.5724441198216, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3078.79130536842, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 17.097525165274803, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2955.7832223272444, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 171.2189444201398, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3688.781307878483, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 65.65926515650821, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4183.4728233450305, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 101.81987978181542, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4939.824132342117, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 289.1390313704078, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5502.544756998508, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 379.9176358151893, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5664.321185867887, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 103.74897438065652, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 246.62407640713522, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.589667669507943, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 301.08780541388853, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.339251126835014, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 349.13408375848826, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 6.707215404345545, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 420.6620028708826, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.922885386248023, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 470.0593095392814, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 10.595229921387679, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 495.115546467953, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.928558698066297, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline2", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T11:57:43.925526+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 2171.099861571096, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 19.23255817429395, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2429.228219203666, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 181.04518738452575, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2756.5078091010796, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 126.73272767497978, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3197.349485288246, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 154.47555387593712, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3607.973454642879, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 213.0597134090529, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3925.314914910963, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 293.48112660476045, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 104.57782310281735, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 10.873834118675967, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 129.5117553518436, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 12.407159402934873, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 142.08007511017124, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.930090749895689, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 157.0629031829932, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.918041427401283, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 188.6427038678885, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.828269431125875, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 200.60322195597215, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.338879356636095, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1491.980189873357, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 71.9836340794669, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1794.0628090299717, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 14.307364673980224, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2192.3591192326044, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 114.60420372385168, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2422.202702788314, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 119.26859163162072, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2770.8727103546726, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 195.12079821799085, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2951.282362921916, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 128.2254379990313, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3039.27661040724, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 174.6539091592498, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3578.211797262128, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 159.14128724739464, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4128.29686489867, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 223.4100922139098, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4848.219925955905, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 77.93231029690887, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5070.191606088231, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 69.94019467972001, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5966.489310951252, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 336.7173682128105, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 254.57850713986198, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.385164783606097, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 304.8091397808394, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.103188082400504, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 350.1613069208256, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.345582528912242, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 411.1456865029576, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.86244360659498, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 426.04740645126986, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.597587190328635, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 545.743901896845, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 8.94286171044266, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T11:57:27.051913+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 1993.661134316776, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 123.85525126992296, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2301.0905948917325, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 133.48673687735095, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2873.4628362191897, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 162.61249284171058, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3238.735403505523, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 56.51716037758475, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3728.4508889231124, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 118.24607483750995, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4034.9082581910916, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 74.76961240079906, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 100.88113187316719, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.905008641590433, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 121.61102013493655, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.792042693243397, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 140.99528044475127, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.222627363561376, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 163.077114107551, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.17919680914877, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 188.59968240327134, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.466938787214904, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 198.73690996443867, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.07228063106639, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1456.8721146219054, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 97.05357208107213, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1760.0202375360182, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 113.83470167982718, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2033.3289371002388, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 131.96155202489578, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2408.2974437457224, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 157.38445697767614, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2693.2667748312374, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 147.88552510962938, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2991.3045632907692, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 36.616739773559836, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3006.5513639744195, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 174.20153435546402, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3946.7240883975173, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 24.834845762711534, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4471.79595749108, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 222.54023025674027, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4746.352137751869, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 299.0771752770653, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5465.286069604949, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 348.6918957133431, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5823.519621687581, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 294.3249644414966, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 249.32918263045667, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.03544118455393, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 288.1546272324227, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.7727205750953, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 363.3503259942238, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.098142551778466, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 392.91985489944227, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 27.846918288877376, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 456.7540443475017, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.728347618091988, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 499.13159330438293, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 24.2322764193576, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline2", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T10:48:34.707858+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 2038.9496500003788, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 117.27052133056621, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2294.3238192937456, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 137.05216178962178, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2816.7462067242177, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 120.10657812200931, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3330.947955167447, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 165.07867992457224, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3427.804220062, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 62.398802753262366, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3931.7861541695424, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 259.7643410153898, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 101.89870179257153, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 9.924103694663449, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 124.9849961475332, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.073706451113821, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 150.17912140564707, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 2.831834198448414, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 165.06404530951897, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.098638603407267, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 189.4271367424946, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.049029334825786, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 211.70091863399844, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 24.393712112471537, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1534.395057650628, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 81.6427334392383, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1778.474541262558, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 42.56143420705744, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2133.7461366070925, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 116.35913144113613, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2459.5790315346367, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 96.71322011411286, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2637.4334475618302, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 183.30427116704686, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2944.098595726341, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 170.72289928237976, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2907.9632013559226, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 174.53757173689922, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3509.107421580347, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 236.8620853533764, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4200.093284524192, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 239.58028996799285, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4713.504209113087, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 227.25719976419228, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5049.944494674869, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 96.03307008996549, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6191.498973826217, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 317.5921715209765, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 248.80616580373456, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.592467485447356, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 301.08520837227366, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 10.677266179208607, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 357.6038589068661, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 5.454584817104773, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 385.0134083066721, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 27.301075636602707, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 444.0720671004903, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.366607976819555, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 544.9286314848067, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 5.8252101632892845, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T10:43:24.047048+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 2021.1035365873993, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 69.72840561483144, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2338.909416436906, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 140.64663652969023, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2858.077160911349, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 192.0675550591675, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3306.833623604521, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 56.99029424270755, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3627.5542312476477, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 124.9433053351406, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3950.086638208113, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 226.7800326425516, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 96.47479639005672, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.581115036930171, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 112.93833387666766, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.456175417231416, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 127.96521280400299, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 7.881167162370817, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 164.06646826051218, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.400563021933642, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 172.50207971758653, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.59514547087479, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 206.57752612959177, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.6206498096027, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1450.762861653755, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 62.85051722934544, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1744.8736145848297, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 28.4724370062761, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2137.935073637293, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 133.15696927062444, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2405.7909943176865, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 138.83795715557775, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2660.942840886126, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 160.5879766560021, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3070.783714494726, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 225.80178015382134, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3021.0961116313642, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 63.199028430669784, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3562.444757764406, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 233.25324926372082, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4147.683102448584, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 267.47351186248994, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4681.79862307404, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 201.00316493809274, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5257.332484362561, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 324.82272792943763, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5860.230588756176, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 370.86153080312647, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 245.42900602601247, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.361128649495964, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 300.16320013554315, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.935265770560466, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 345.53233993081176, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.5441134792233, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 397.50592062832635, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 22.267205299179718, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 426.56360681512984, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 28.587460065910978, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 493.39520093238633, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.049730400867045, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T10:40:45.136466+00:00"}
+];
+
+defaultCompareNames = [];
diff --git a/devops/scripts/benchmarks/html/index.html b/devops/scripts/benchmarks/html/index.html
new file mode 100644
index 0000000000000..c10844f15c707
--- /dev/null
+++ b/devops/scripts/benchmarks/html/index.html
@@ -0,0 +1,205 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
+  See LICENSE.TXT
+  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+-->
+<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <title>Benchmark Results</title>
+    <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
+    <script src="https://cdn.jsdelivr.net/npm/chartjs-adapter-date-fns"></script>
+    <script src="data.js"></script>
+    <script src="config.js"></script>
+    <script src="scripts.js"></script>
+    <style>
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
+            margin: 0;
+            padding: 16px;
+            background: #f8f9fa;
+        }
+        .container {
+            max-width: 1100px;
+            margin: 0 auto;
+        }
+        h1, h2 {
+            color: #212529;
+            text-align: center;
+            margin-bottom: 24px;
+            font-weight: 500;
+        }
+        .chart-container {
+            background: white;
+            border-radius: 8px;
+            padding: 24px;
+            margin-bottom: 24px;
+            box-shadow: 0 1px 3px rgba(0,0,0,0.1);
+        }
+        @media (max-width: 768px) {
+            body {
+                padding: 12px;
+            }
+            .chart-container {
+                padding: 16px;
+                border-radius: 6px;
+            }
+            h1 {
+                font-size: 24px;
+                margin-bottom: 16px;
+            }
+        }
+        .filter-container {
+            text-align: center;
+            margin-bottom: 24px;
+        }
+        .filter-container input {
+            padding: 8px;
+            font-size: 16px;
+            border: 1px solid #ccc;
+            border-radius: 4px;
+            width: 400px;
+            max-width: 100%;
+        }
+        .suite-filter-container {
+            text-align: center;
+            margin-bottom: 24px;
+            padding: 16px;
+            background: #e9ecef;
+            border-radius: 8px;
+        }
+        .suite-checkbox {
+            margin: 0 8px;
+        }
+        details {
+            margin-bottom: 24px;
+        }
+        summary {
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            font-size: 16px;
+            font-weight: 500;
+            cursor: pointer;
+            padding: 8px;
+            background: #e9ecef;
+            border-radius: 8px;
+            user-select: none;
+        }
+        summary:hover {
+            background: #dee2e6;
+        }
+        .extra-info {
+            padding: 8px;
+            background: #f8f9fa;
+            border-radius: 8px;
+            margin-top: 8px;
+        }
+        .run-selector {
+            text-align: center;
+            margin-bottom: 24px;
+            padding: 16px;
+            background: #e9ecef;
+            border-radius: 8px;
+        }
+        .run-selector select {
+            width: 300px;
+            padding: 8px;
+            margin-right: 8px;
+        }
+        .run-selector button {
+            padding: 8px 16px;
+            background: #0068B5;
+            color: white;
+            border: none;
+            border-radius: 4px;
+            cursor: pointer;
+        }
+        .run-selector button:hover {
+            background: #00C7FD;
+        }
+        .selected-runs {
+            margin-top: 12px;
+        }
+        .selected-run {
+            display: inline-block;
+            padding: 4px 8px;
+            margin: 4px;
+            background: #e2e6ea;
+            border-radius: 4px;
+        }
+        .selected-run button {
+            margin-left: 8px;
+            padding: 0 4px;
+            background: none;
+            border: none;
+            color: #dc3545;
+            cursor: pointer;
+        }
+        .download-button {
+            background: none;
+            border: none;
+            color: #0068B5;
+            cursor: pointer;
+            font-size: 16px;
+            padding: 4px;
+            margin-left: 8px;
+        }
+        .download-button:hover {
+            color: #00C7FD;
+        }
+        .loading-indicator {
+            text-align: center;
+            font-size: 18px;
+            color: #0068B5;
+            margin-bottom: 20px;
+        }
+        .extra-info-entry {
+            border: 1px solid #ddd;
+            padding: 10px;
+            margin-bottom: 10px;
+            background-color: #f9f9f9;
+            border-radius: 5px;
+        }
+        .extra-info-entry strong {
+            display: block;
+            margin-bottom: 5px;
+        }
+        .extra-info-entry em {
+            color: #555;
+        }
+</style>
+</head>
+<body>
+    <div class="container">
+        <h1>Benchmark Results</h1>
+        <div id="loading-indicator" class="loading-indicator" style="display: none;">
+            Loading data, please wait...
+        </div>
+        <div class="filter-container">
+            <input type="text" id="bench-filter" placeholder="Regex...">
+        </div>
+        <div class="suite-filter-container" id="suite-filters">
+            <!-- Suite checkboxes will be generated by JavaScript -->
+        </div>
+        <div class="run-selector">
+            <select id="run-select">
+                <option value="">Select a run to compare...</option>
+            </select>
+            <button onclick="addSelectedRun()">Add</button>
+            <div id="selected-runs" class="selected-runs"></div>
+        </div>
+        <details class="timeseries" open>
+            <summary>Historical Results</summary>
+            <div class="charts"></div>
+        </details>
+        <details class="bar-charts" open>
+            <summary>Comparisons</summary>
+            <div class="charts"></div>
+        </details>
+    </div>
+</body>
+</html>
diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js
new file mode 100644
index 0000000000000..7ba00738e727a
--- /dev/null
+++ b/devops/scripts/benchmarks/html/scripts.js
@@ -0,0 +1,564 @@
+// Copyright (C) 2024 Intel Corporation
+// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
+// See LICENSE.TXT
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// Core state
+let activeRuns = new Set(defaultCompareNames);
+let chartInstances = new Map();
+let suiteNames = new Set();
+let timeseriesData, barChartsData, allRunNames;
+
+// DOM Elements
+let runSelect, selectedRunsDiv, suiteFiltersContainer;
+
+// Run selector functions
+function updateSelectedRuns() {
+    selectedRunsDiv.innerHTML = '';
+    activeRuns.forEach(name => {
+        selectedRunsDiv.appendChild(createRunElement(name));
+    });
+    updateCharts();
+}
+
+function createRunElement(name) {
+    const runElement = document.createElement('span');
+    runElement.className = 'selected-run';
+    runElement.innerHTML = `${name} <button onclick="removeRun('${name}')">X</button>`;
+    return runElement;
+}
+
+function addSelectedRun() {
+    const selectedRun = runSelect.value;
+    if (selectedRun && !activeRuns.has(selectedRun)) {
+        activeRuns.add(selectedRun);
+        updateSelectedRuns();
+    }
+}
+
+function removeRun(name) {
+    activeRuns.delete(name);
+    updateSelectedRuns();
+}
+
+// Chart creation and update
+function createChart(data, containerId, type) {
+    if (chartInstances.has(containerId)) {
+        chartInstances.get(containerId).destroy();
+    }
+
+    const ctx = document.getElementById(containerId).getContext('2d');
+    const options = {
+        responsive: true,
+        plugins: {
+            title: {
+                display: true,
+                text: data.label
+            },
+            subtitle: {
+                display: true,
+                text: data.lower_is_better ? "Lower is better" : "Higher is better"
+            },
+            tooltip: {
+                callbacks: {
+                    label: (context) => {
+                        if (type === 'time') {
+                            const point = context.raw;
+                            return [
+                                `${data.label}:`,
+                                `Value: ${point.y.toFixed(2)} ${data.unit}`,
+                                `Stddev: ${point.stddev.toFixed(2)} ${data.unit}`,
+                                `Git Hash: ${point.gitHash}`,
+                            ];
+                        } else {
+                            return [`${context.dataset.label}:`,
+                                `Value: ${context.parsed.y.toFixed(2)} ${data.unit}`,
+                            ];
+                        }
+                    }
+                }
+            }
+        },
+        scales: {
+            y: {
+                title: {
+                    display: true,
+                    text: data.unit
+                }
+            }
+        }
+    };
+
+    if (type === 'time') {
+        options.interaction = {
+            mode: 'nearest',
+            intersect: false
+        };
+        options.onClick = (event, elements) => {
+            if (elements.length > 0) {
+                const point = elements[0].element.$context.raw;
+                if (point.gitHash && point.gitRepo) {
+                    window.open(`https://github.com/${point.gitRepo}/commit/${point.gitHash}`, '_blank');
+                }
+            }
+        };
+        options.scales.x = {
+            type: 'time',
+            ticks: {
+                maxRotation: 45,
+                minRotation: 45,
+                autoSkip: true,
+                maxTicksLimit: 10
+            }
+        };
+    }
+
+    const chartConfig = {
+        type: type === 'time' ? 'line' : 'bar',
+        data: type === 'time' ? {
+            datasets: createTimeseriesDatasets(data)
+        } : {
+            labels: data.labels,
+            datasets: data.datasets
+        },
+        options: options
+    };
+
+    const chart = new Chart(ctx, chartConfig);
+    chartInstances.set(containerId, chart);
+    return chart;
+}
+
+function createTimeseriesDatasets(data) {
+    return Object.entries(data.runs).map(([name, points]) => ({
+        label: name,
+        data: points.map(p => ({
+            x: new Date(p.date),
+            y: p.value,
+            gitHash: p.git_hash,
+            gitRepo: p.github_repo,
+            stddev: p.stddev
+        })),
+        borderWidth: 1,
+        pointRadius: 3,
+        pointStyle: 'circle',
+        pointHoverRadius: 5
+    }));
+}
+
+function updateCharts() {
+    // Filter data by active runs
+    const filteredTimeseriesData = timeseriesData.map(chart => ({
+        ...chart,
+        runs: Object.fromEntries(
+            Object.entries(chart.runs).filter(([name]) => activeRuns.has(name))
+        )
+    }));
+
+    const filteredBarChartsData = barChartsData.map(chart => ({
+        ...chart,
+        labels: chart.labels.filter(label => activeRuns.has(label)),
+        datasets: chart.datasets.map(dataset => ({
+            ...dataset,
+            data: dataset.data.filter((_, i) => activeRuns.has(chart.labels[i]))
+        }))
+    }));
+
+    // Draw charts with filtered data
+    drawCharts(filteredTimeseriesData, filteredBarChartsData);
+}
+
+function drawCharts(filteredTimeseriesData, filteredBarChartsData) {
+    // Clear existing charts
+    document.querySelectorAll('.charts').forEach(container => container.innerHTML = '');
+    chartInstances.forEach(chart => chart.destroy());
+    chartInstances.clear();
+
+    // Create timeseries charts
+    filteredTimeseriesData.forEach((data, index) => {
+        const containerId = `timeseries-${index}`;
+        const container = createChartContainer(data, containerId);
+        document.querySelector('.timeseries .charts').appendChild(container);
+        createChart(data, containerId, 'time');
+    });
+
+    // Create bar charts
+    filteredBarChartsData.forEach((data, index) => {
+        const containerId = `barchart-${index}`;
+        const container = createChartContainer(data, containerId);
+        document.querySelector('.bar-charts .charts').appendChild(container);
+        createChart(data, containerId, 'bar');
+    });
+
+    // Apply current filters
+    filterCharts();
+}
+
+function createChartContainer(data, canvasId) {
+    const container = document.createElement('div');
+    container.className = 'chart-container';
+    container.setAttribute('data-label', data.label);
+    container.setAttribute('data-suite', data.suite);
+
+    const canvas = document.createElement('canvas');
+    canvas.id = canvasId;
+    container.appendChild(canvas);
+
+    // Create details section for extra info
+    const details = document.createElement('details');
+    const summary = document.createElement('summary');
+    summary.textContent = "Details";
+
+    // Add subtle download button to the summary
+    const downloadButton = document.createElement('button');
+    downloadButton.className = 'download-button';
+    downloadButton.textContent = 'Download';
+    downloadButton.onclick = (event) => {
+        event.stopPropagation(); // Prevent details toggle
+        downloadChart(canvasId, data.label);
+    };
+    summary.appendChild(downloadButton);
+    details.appendChild(summary);
+
+    latestRunsLookup = createLatestRunsLookup(benchmarkRuns);
+
+    // Create and append extra info
+    const extraInfo = document.createElement('div');
+    extraInfo.className = 'extra-info';
+    extraInfo.innerHTML = generateExtraInfo(latestRunsLookup, data);
+    details.appendChild(extraInfo);
+
+    container.appendChild(details);
+
+    return container;
+}
+
+// Pre-compute a lookup for the latest run per label
+function createLatestRunsLookup(benchmarkRuns) {
+    const latestRunsMap = new Map();
+
+    benchmarkRuns.forEach(run => {
+        // Yes, we need to convert the date every time. I checked.
+        const runDate = new Date(run.date);
+        run.results.forEach(result => {
+            const label = result.label;
+            if (!latestRunsMap.has(label) || runDate > new Date(latestRunsMap.get(label).date)) {
+                latestRunsMap.set(label, {
+                    run,
+                    result
+                });
+            }
+        });
+    });
+
+    return latestRunsMap;
+}
+
+function generateExtraInfo(latestRunsLookup, data) {
+    const labels = data.datasets ? data.datasets.map(dataset => dataset.label) : [data.label];
+
+    return labels.map(label => {
+        const latestRun = latestRunsLookup.get(label);
+
+        if (latestRun) {
+            return `<div class="extra-info-entry">
+                        <strong>${label}:</strong> ${formatCommand(latestRun.result)}<br>
+                        <em>Description:</em> ${latestRun.result.description}
+                    </div>`;
+        }
+        return `<div class="extra-info-entry">
+                        <strong>${label}:</strong> No data available
+                </div>`;
+    }).join('');
+}
+
+function formatCommand(run) {
+    const envVars = Object.entries(run.env || {}).map(([key, value]) => `${key}=${value}`).join(' ');
+    let command = run.command ? [...run.command] : [];
+
+    return `${envVars} ${command.join(' ')}`.trim();
+}
+
+function downloadChart(canvasId, label) {
+    const chart = chartInstances.get(canvasId);
+    if (chart) {
+        const link = document.createElement('a');
+        link.href = chart.toBase64Image('image/png', 1)
+        link.download = `${label}.png`;
+        link.click();
+    }
+}
+
+// URL and filtering functions
+function getQueryParam(param) {
+    const urlParams = new URLSearchParams(window.location.search);
+    return urlParams.get(param);
+}
+
+function updateURL() {
+    const url = new URL(window.location);
+    const regex = document.getElementById('bench-filter').value;
+    const activeSuites = getActiveSuites();
+    const activeRunsList = Array.from(activeRuns);
+
+    if (regex) {
+        url.searchParams.set('regex', regex);
+    } else {
+        url.searchParams.delete('regex');
+    }
+
+    if (activeSuites.length > 0 && activeSuites.length != suiteNames.size) {
+        url.searchParams.set('suites', activeSuites.join(','));
+    } else {
+        url.searchParams.delete('suites');
+    }
+
+    // Handle the runs parameter
+    if (activeRunsList.length > 0) {
+        // Check if the active runs are the same as default runs
+        const defaultRuns = new Set(defaultCompareNames || []);
+        const isDefaultRuns = activeRunsList.length === defaultRuns.size &&
+            activeRunsList.every(run => defaultRuns.has(run));
+
+        if (isDefaultRuns) {
+            // If it's just the default runs, omit the parameter entirely
+            url.searchParams.delete('runs');
+        } else {
+            url.searchParams.set('runs', activeRunsList.join(','));
+        }
+    } else {
+        url.searchParams.delete('runs');
+    }
+
+    history.replaceState(null, '', url);
+}
+
+function filterCharts() {
+    const regexInput = document.getElementById('bench-filter').value;
+    const regex = new RegExp(regexInput, 'i');
+    const activeSuites = getActiveSuites();
+
+    document.querySelectorAll('.chart-container').forEach(container => {
+        const label = container.getAttribute('data-label');
+        const suite = container.getAttribute('data-suite');
+        container.style.display = (regex.test(label) && activeSuites.includes(suite)) ? '' : 'none';
+    });
+
+    updateURL();
+}
+
+function getActiveSuites() {
+    return Array.from(document.querySelectorAll('.suite-checkbox:checked'))
+        .map(checkbox => checkbox.getAttribute('data-suite'));
+}
+
+// Data processing
+function processTimeseriesData(benchmarkRuns) {
+    const resultsByLabel = {};
+
+    benchmarkRuns.forEach(run => {
+        const runDate = run.date ? new Date(run.date) : null;
+        run.results.forEach(result => {
+            if (!resultsByLabel[result.label]) {
+                resultsByLabel[result.label] = {
+                    label: result.label,
+                    suite: result.suite,
+                    unit: result.unit,
+                    lower_is_better: result.lower_is_better,
+                    runs: {}
+                };
+            }
+
+            if (!resultsByLabel[result.label].runs[run.name]) {
+                resultsByLabel[result.label].runs[run.name] = [];
+            }
+
+            resultsByLabel[result.label].runs[run.name].push({
+                date: runDate,
+                value: result.value,
+                stddev: result.stddev,
+                git_hash: run.git_hash,
+                github_repo: run.github_repo
+            });
+        });
+    });
+
+    return Object.values(resultsByLabel);
+}
+
+function processBarChartsData(benchmarkRuns) {
+    const groupedResults = {};
+
+    benchmarkRuns.forEach(run => {
+        run.results.forEach(result => {
+            if (!result.explicit_group) return;
+
+            if (!groupedResults[result.explicit_group]) {
+                groupedResults[result.explicit_group] = {
+                    label: result.explicit_group,
+                    suite: result.suite,
+                    unit: result.unit,
+                    lower_is_better: result.lower_is_better,
+                    labels: [],
+                    datasets: []
+                };
+            }
+
+            const group = groupedResults[result.explicit_group];
+
+            if (!group.labels.includes(run.name)) {
+                group.labels.push(run.name);
+            }
+
+            let dataset = group.datasets.find(d => d.label === result.label);
+            if (!dataset) {
+                dataset = {
+                    label: result.label,
+                    data: new Array(group.labels.length).fill(null)
+                };
+                group.datasets.push(dataset);
+            }
+
+            const runIndex = group.labels.indexOf(run.name);
+            dataset.data[runIndex] = result.value;
+        });
+    });
+
+    return Object.values(groupedResults);
+}
+
+// Setup functions
+function setupRunSelector() {
+    runSelect = document.getElementById('run-select');
+    selectedRunsDiv = document.getElementById('selected-runs');
+
+    allRunNames.forEach(name => {
+        const option = document.createElement('option');
+        option.value = name;
+        option.textContent = name;
+        runSelect.appendChild(option);
+    });
+
+    updateSelectedRuns();
+}
+
+function setupSuiteFilters() {
+    suiteFiltersContainer = document.getElementById('suite-filters');
+
+    benchmarkRuns.forEach(run => {
+        run.results.forEach(result => {
+            suiteNames.add(result.suite);
+        });
+    });
+
+    suiteNames.forEach(suite => {
+        const label = document.createElement('label');
+        const checkbox = document.createElement('input');
+        checkbox.type = 'checkbox';
+        checkbox.className = 'suite-checkbox';
+        checkbox.dataset.suite = suite;
+        checkbox.checked = true;
+        label.appendChild(checkbox);
+        label.appendChild(document.createTextNode(' ' + suite));
+        suiteFiltersContainer.appendChild(label);
+        suiteFiltersContainer.appendChild(document.createTextNode(' '));
+    });
+}
+
+function initializeCharts() {
+    // Process raw data
+    timeseriesData = processTimeseriesData(benchmarkRuns);
+    barChartsData = processBarChartsData(benchmarkRuns);
+    allRunNames = [...new Set(benchmarkRuns.map(run => run.name))];
+
+    // Set up active runs
+    const runsParam = getQueryParam('runs');
+    if (runsParam) {
+        const runsFromUrl = runsParam.split(',');
+
+        // Start with an empty set
+        activeRuns = new Set();
+
+        // Process each run from URL
+        runsFromUrl.forEach(run => {
+            if (run === 'default') {
+                // Special case: include all default runs
+                (defaultCompareNames || []).forEach(defaultRun => {
+                    if (allRunNames.includes(defaultRun)) {
+                        activeRuns.add(defaultRun);
+                    }
+                });
+            } else if (allRunNames.includes(run)) {
+                // Add the specific run if it exists
+                activeRuns.add(run);
+            }
+        });
+    } else {
+        // No runs parameter, use defaults
+        activeRuns = new Set(defaultCompareNames || []);
+    }
+
+    // Setup UI components
+    setupRunSelector();
+    setupSuiteFilters();
+
+    // Apply URL parameters
+    const regexParam = getQueryParam('regex');
+    const suitesParam = getQueryParam('suites');
+
+    if (regexParam) {
+        document.getElementById('bench-filter').value = regexParam;
+    }
+
+    if (suitesParam) {
+        const suites = suitesParam.split(',');
+        document.querySelectorAll('.suite-checkbox').forEach(checkbox => {
+            checkbox.checked = suites.includes(checkbox.getAttribute('data-suite'));
+        });
+    }
+
+    // Setup event listeners
+    document.querySelectorAll('.suite-checkbox').forEach(checkbox => {
+        checkbox.addEventListener('change', filterCharts);
+    });
+    document.getElementById('bench-filter').addEventListener('input', filterCharts);
+
+    // Draw initial charts
+    updateCharts();
+}
+
+// Make functions available globally for onclick handlers
+window.addSelectedRun = addSelectedRun;
+window.removeRun = removeRun;
+
+// Load data based on configuration
+function loadData() {
+    const loadingIndicator = document.getElementById('loading-indicator');
+    loadingIndicator.style.display = 'block'; // Show loading indicator
+
+    if (typeof remoteDataUrl !== 'undefined' && remoteDataUrl !== '') {
+        // Fetch data from remote URL
+        fetch(remoteDataUrl)
+            .then(response => response.json())
+            .then(data => {
+                benchmarkRuns = data;
+                initializeCharts();
+            })
+            .catch(error => {
+                console.error('Error fetching remote data:', error);
+                loadingIndicator.textContent = 'Fetching remote data failed.';
+            })
+            .finally(() => {
+                loadingIndicator.style.display = 'none'; // Hide loading indicator
+            });
+    } else {
+        // Use local data
+        initializeCharts();
+        loadingIndicator.style.display = 'none'; // Hide loading indicator
+    }
+}
+
+// Initialize when DOM is ready
+document.addEventListener('DOMContentLoaded', () => {
+    loadData();
+});
diff --git a/unified-runtime/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
similarity index 83%
rename from unified-runtime/scripts/benchmarks/main.py
rename to devops/scripts/benchmarks/main.py
index 4ad90b39b9001..91f84917f8698 100755
--- a/unified-runtime/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -17,6 +17,7 @@
 from history import BenchmarkHistory
 from utils.utils import prepare_workdir
 from utils.compute_runtime import *
+from presets import enabled_suites, presets
 
 import argparse
 import re
@@ -27,23 +28,27 @@
 
 
 def run_iterations(
-    benchmark: Benchmark, env_vars, iters: int, results: dict[str, list[Result]]
+    benchmark: Benchmark,
+    env_vars,
+    iters: int,
+    results: dict[str, list[Result]],
+    failures: dict[str, str],
 ):
     for iter in range(iters):
-        print(f"running {benchmark.name()}, iteration {iter}... ", end="", flush=True)
+        print(f"running {benchmark.name()}, iteration {iter}... ", flush=True)
         bench_results = benchmark.run(env_vars)
         if bench_results is None:
-            print(f"did not finish (OK for sycl-bench).")
+            failures[benchmark.name()] = "benchmark produced no results!"
             break
 
         for bench_result in bench_results:
-            # TODO: report failures in markdown/html ?
             if not bench_result.passed:
-                print(f"complete ({bench_result.label}: verification FAILED)")
+                failures[bench_result.label] = "verification failed"
+                print(f"complete ({bench_result.label}: verification failed).")
                 continue
 
             print(
-                f"complete ({bench_result.label}: {bench_result.value:.3f} {bench_result.unit})."
+                f"{benchmark.name()} complete ({bench_result.label}: {bench_result.value:.3f} {bench_result.unit})."
             )
 
             bench_result.name = bench_result.label
@@ -149,15 +154,19 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
             SyclBench(directory),
             LlamaCppBench(directory),
             UMFSuite(directory),
-            # TestSuite()
+            TestSuite(),
         ]
         if not options.dry_run
         else []
     )
 
     benchmarks = []
+    failures = {}
 
     for s in suites:
+        if s.name() not in enabled_suites(options.preset):
+            continue
+
         suite_benchmarks = s.benchmarks()
         if filter:
             suite_benchmarks = [
@@ -170,25 +179,26 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
             print(f"Setting up {type(s).__name__}")
             try:
                 s.setup()
-            except:
+            except Exception as e:
+                failures[s.name()] = f"Suite setup failure: {e}"
                 print(f"{type(s).__name__} setup failed. Benchmarks won't be added.")
             else:
                 print(f"{type(s).__name__} setup complete.")
                 benchmarks += suite_benchmarks
 
-    for b in benchmarks:
-        print(b.name())
-
     for benchmark in benchmarks:
         try:
-            print(f"Setting up {benchmark.name()}... ")
+            if options.verbose:
+                print(f"Setting up {benchmark.name()}... ")
             benchmark.setup()
-            print(f"{benchmark.name()} setup complete.")
+            if options.verbose:
+                print(f"{benchmark.name()} setup complete.")
 
         except Exception as e:
             if options.exit_on_failure:
                 raise e
             else:
+                failures[benchmark.name()] = f"Benchmark setup failure: {e}"
                 print(f"failed: {e}")
 
     results = []
@@ -199,7 +209,11 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
             processed: list[Result] = []
             for _ in range(options.iterations_stddev):
                 run_iterations(
-                    benchmark, merged_env_vars, options.iterations, intermediate_results
+                    benchmark,
+                    merged_env_vars,
+                    options.iterations,
+                    intermediate_results,
+                    failures,
                 )
                 valid, processed = process_results(
                     intermediate_results, benchmark.stddev_threshold()
@@ -211,12 +225,16 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
             if options.exit_on_failure:
                 raise e
             else:
+                failures[benchmark.name()] = f"Benchmark run failure: {e}"
                 print(f"failed: {e}")
 
     for benchmark in benchmarks:
-        print(f"tearing down {benchmark.name()}... ", end="", flush=True)
+        # this never has any useful information anyway, so hide it behind verbose
+        if options.verbose:
+            print(f"tearing down {benchmark.name()}... ", flush=True)
         benchmark.teardown()
-        print("complete.")
+        if options.verbose:
+            print("{benchmark.name()} teardown complete.")
 
     this_name = options.current_run_name
     chart_data = {}
@@ -224,7 +242,10 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
     if not options.dry_run:
         chart_data = {this_name: results}
 
-    history = BenchmarkHistory(directory)
+    results_dir = directory
+    if options.custom_results_dir:
+        results_dir = Path(options.custom_results_dir)
+    history = BenchmarkHistory(results_dir)
     # limit how many files we load.
     # should this be configurable?
     history.load(1000)
@@ -241,7 +262,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
 
     if options.output_markdown:
         markdown_content = generate_markdown(
-            this_name, chart_data, options.output_markdown
+            this_name, chart_data, failures, options.output_markdown
         )
 
         with open("benchmark_results.md", "w") as file:
@@ -262,14 +283,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
             compare_names.append(saved_name)
 
     if options.output_html:
-        html_content = generate_html(history.runs, "intel/llvm", compare_names)
-
-        with open("benchmark_results.html", "w") as file:
-            file.write(html_content)
-
-        print(
-            f"HTML with benchmark results has been written to {os.getcwd()}/benchmark_results.html"
-        )
+        generate_html(history.runs, compare_names)
 
 
 def validate_and_parse_env_args(env_args):
@@ -305,6 +319,11 @@ def validate_and_parse_env_args(env_args):
         help="Do not rebuild the benchmarks from scratch.",
         action="store_true",
     )
+    parser.add_argument(
+        "--redownload",
+        help="Always download benchmark data dependencies, even if they already exist.",
+        action="store_true",
+    )
     parser.add_argument(
         "--env",
         type=str,
@@ -347,12 +366,6 @@ def validate_and_parse_env_args(env_args):
         help="Regex pattern to filter benchmarks by name.",
         default=None,
     )
-    parser.add_argument(
-        "--epsilon",
-        type=float,
-        help="Threshold to consider change of performance significant",
-        default=options.epsilon,
-    )
     parser.add_argument(
         "--verbose", help="Print output of all the commands.", action="store_true"
     )
@@ -379,7 +392,11 @@ def validate_and_parse_env_args(env_args):
         help="Specify whether markdown output should fit the content size limit for request validation",
     )
     parser.add_argument(
-        "--output-html", help="Create HTML output", action="store_true", default=False
+        "--output-html",
+        help="Create HTML output. Local output is for direct local viewing of the html file, remote is for server deployment.",
+        nargs="?",
+        const=options.output_html,
+        choices=["local", "remote"],
     )
     parser.add_argument(
         "--dry-run",
@@ -423,6 +440,19 @@ def validate_and_parse_env_args(env_args):
         help="Directory for cublas library",
         default=None,
     )
+    parser.add_argument(
+        "--preset",
+        type=str,
+        choices=[p for p in presets.keys()],
+        help="Benchmark preset to run.",
+        default=options.preset,
+    )
+    parser.add_argument(
+        "--results-dir",
+        type=str,
+        help="Specify a custom results directory",
+        default=options.custom_results_dir,
+    )
 
     args = parser.parse_args()
     additional_env_vars = validate_and_parse_env_args(args.env)
@@ -430,10 +460,10 @@ def validate_and_parse_env_args(env_args):
     options.workdir = args.benchmark_directory
     options.verbose = args.verbose
     options.rebuild = not args.no_rebuild
+    options.redownload = args.redownload
     options.sycl = args.sycl
     options.iterations = args.iterations
     options.timeout = args.timeout
-    options.epsilon = args.epsilon
     options.ur = args.ur
     options.ur_adapter = args.adapter
     options.exit_on_failure = args.exit_on_failure
@@ -448,6 +478,8 @@ def validate_and_parse_env_args(env_args):
     options.current_run_name = args.relative_perf
     options.cudnn_directory = args.cudnn_directory
     options.cublas_directory = args.cublas_directory
+    options.preset = args.preset
+    options.custom_results_dir = args.results_dir
 
     if args.build_igc and args.compute_runtime is None:
         parser.error("--build-igc requires --compute-runtime to be set")
diff --git a/unified-runtime/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
similarity index 83%
rename from unified-runtime/scripts/benchmarks/options.py
rename to devops/scripts/benchmarks/options.py
index 2e92675264544..7600942acd1e5 100644
--- a/unified-runtime/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -1,6 +1,7 @@
 from dataclasses import dataclass, field
 from enum import Enum
 
+from presets import presets
 
 class Compare(Enum):
     LATEST = "latest"
@@ -21,6 +22,7 @@ class Options:
     ur_adapter: str = None
     umf: str = None
     rebuild: bool = True
+    redownload: bool = False
     benchmark_cwd: str = "INVALID"
     timeout: float = 600
     iterations: int = 3
@@ -28,18 +30,18 @@ class Options:
     compare: Compare = Compare.LATEST
     compare_max: int = 10  # average/median over how many results
     output_markdown: MarkdownSize = MarkdownSize.SHORT
-    output_html: bool = False
+    output_html: str = "local"
     dry_run: bool = False
-    # these two should probably be merged into one setting
     stddev_threshold: float = 0.02
-    epsilon: float = 0.02
     iterations_stddev: int = 5
     build_compute_runtime: bool = False
     extra_ld_libraries: list[str] = field(default_factory=list)
     extra_env_vars: dict = field(default_factory=dict)
-    compute_runtime_tag: str = "25.05.32567.12"
+    compute_runtime_tag: str = "25.05.32567.18"
     build_igc: bool = False
     current_run_name: str = "This PR"
+    preset: str = "Full"
+    custom_results_dir = None
 
 
 options = Options()
diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py
new file mode 100644
index 0000000000000..53dd4b1e8f968
--- /dev/null
+++ b/devops/scripts/benchmarks/output_html.py
@@ -0,0 +1,48 @@
+# Copyright (C) 2024 Intel Corporation
+# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
+# See LICENSE.TXT
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+import json
+import os
+from options import options
+
+
+def generate_html(benchmark_runs: list, compare_names: list[str]):
+    # create path to data.js in html folder
+    html_path = os.path.join(os.path.dirname(__file__), "html")
+    benchmark_runs.sort(key=lambda run: run.date, reverse=True)
+
+    if options.output_html == "local":
+        data_path = os.path.join(html_path, "data.js")
+        # Write data to js file
+        # We can't store this as a standalone json file because it needs to be inline in the html
+        with open(data_path, "w") as f:
+            f.write("benchmarkRuns = [\n")
+            # it might be tempting to just to create a list and convert
+            # that to a json, but that leads to json being serialized twice.
+            for i, run in enumerate(benchmark_runs):
+                if i > 0:
+                    f.write(",\n")
+                f.write(run.to_json())
+
+            f.write("\n];\n\n")  # terminates benchmarkRuns
+
+            f.write("defaultCompareNames = ")
+            json.dump(compare_names, f)
+            f.write(";\n")  # terminates defaultCompareNames
+
+        print(f"See {os.getcwd()}/html/index.html for the results.")
+    else:
+        data_path = os.path.join(html_path, "data.json")
+        with open(data_path, "w") as f:
+            f.write("[\n")
+            for i, run in enumerate(benchmark_runs):
+                if i > 0:
+                    f.write(",\n")
+                f.write(run.to_json())
+            f.write("\n]\n")
+
+        print(
+            f"Upload {data_path} to a location set in config.js remoteDataUrl argument."
+        )
diff --git a/unified-runtime/scripts/benchmarks/output_markdown.py b/devops/scripts/benchmarks/output_markdown.py
similarity index 93%
rename from unified-runtime/scripts/benchmarks/output_markdown.py
rename to devops/scripts/benchmarks/output_markdown.py
index dd6711cec6365..18b5779473a75 100644
--- a/unified-runtime/scripts/benchmarks/output_markdown.py
+++ b/devops/scripts/benchmarks/output_markdown.py
@@ -5,7 +5,7 @@
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 import collections
-from benches.result import Result
+from utils.result import Result
 from options import options, MarkdownSize
 import ast
 
@@ -138,17 +138,6 @@ def generate_markdown_details(
         env_dict = res.env
         command = res.command
 
-        # If data is collected from already saved results,
-        # the content is parsed as strings
-        if isinstance(res.env, str):
-            # Since the scripts would be used solely on data prepared
-            # by our scripts, this should be safe
-            # However, maybe needs an additional blessing
-            # https://docs.python.org/3/library/ast.html#ast.literal_eval
-            env_dict = ast.literal_eval(res.env)
-        if isinstance(res.command, str):
-            command = ast.literal_eval(res.command)
-
         section = (
             "\n<details>\n"
             f"<summary>{res.label}</summary>\n\n"
@@ -179,7 +168,7 @@ def generate_markdown_details(
             return "\nBenchmark details contain too many chars to display\n"
 
 
-def generate_summary_table_and_chart(
+def generate_summary_table(
     chart_data: dict[str, list[Result]], baseline_name: str, markdown_size: MarkdownSize
 ):
     summary_table = get_chart_markdown_header(
@@ -374,10 +363,27 @@ def generate_summary_table_and_chart(
                 return "\n# Summary\n" "Benchmark output is too large to display\n\n"
 
 
+def generate_failures_section(failures: dict[str, str]) -> str:
+    if not failures:
+        return ""
+
+    section = "\n# Failures\n"
+    section += "| Name | Failure |\n"
+    section += "|---|---|\n"
+
+    for name, failure in failures.items():
+        section += f"| {name} | {failure} |\n"
+
+    return section
+
+
 def generate_markdown(
-    name: str, chart_data: dict[str, list[Result]], markdown_size: MarkdownSize
+    name: str,
+    chart_data: dict[str, list[Result]],
+    failures: dict[str, str],
+    markdown_size: MarkdownSize,
 ):
-    (summary_line, summary_table) = generate_summary_table_and_chart(
+    (summary_line, summary_table) = generate_summary_table(
         chart_data, name, markdown_size
     )
 
@@ -396,4 +402,6 @@ def generate_markdown(
         )
         generated_markdown += "\n# Details\n" f"{markdown_details}\n"
 
-    return generated_markdown
+    failures_section = generate_failures_section(failures)
+
+    return failures_section + generated_markdown
diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py
new file mode 100644
index 0000000000000..7f5dc8d78460a
--- /dev/null
+++ b/devops/scripts/benchmarks/presets.py
@@ -0,0 +1,38 @@
+# Copyright (C) 2024 Intel Corporation
+# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
+# See LICENSE.TXT
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+presets: dict[str, list[str]] = {
+    "Full": [
+        "Compute Benchmarks",
+        "llama.cpp bench",
+        "SYCL-Bench",
+        "Velocity Bench",
+        "UMF",
+    ],
+    "SYCL": [
+        "Compute Benchmarks",
+        "llama.cpp bench",
+        "SYCL-Bench",
+        "Velocity Bench",
+    ],
+    "Minimal": [
+        "Compute Benchmarks",
+    ],
+    "Normal": [
+        "Compute Benchmarks",
+        "llama.cpp bench",
+        "Velocity Bench",
+    ],
+    "Test": [
+        "Test Suite",
+    ],
+}
+
+def enabled_suites(preset: str) -> list[str]:
+    try:
+        return presets[preset]
+    except KeyError:
+        raise ValueError(f"Preset '{preset}' not found.")
+
diff --git a/unified-runtime/scripts/benchmarks/requirements.txt b/devops/scripts/benchmarks/requirements.txt
similarity index 100%
rename from unified-runtime/scripts/benchmarks/requirements.txt
rename to devops/scripts/benchmarks/requirements.txt
diff --git a/unified-runtime/scripts/benchmarks/utils/compute_runtime.py b/devops/scripts/benchmarks/utils/compute_runtime.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/utils/compute_runtime.py
rename to devops/scripts/benchmarks/utils/compute_runtime.py
diff --git a/unified-runtime/scripts/benchmarks/benches/oneapi.py b/devops/scripts/benchmarks/utils/oneapi.py
similarity index 79%
rename from unified-runtime/scripts/benchmarks/benches/oneapi.py
rename to devops/scripts/benchmarks/utils/oneapi.py
index 0547f6646e39e..e1876b5ed37fb 100644
--- a/unified-runtime/scripts/benchmarks/benches/oneapi.py
+++ b/devops/scripts/benchmarks/utils/oneapi.py
@@ -7,29 +7,33 @@
 from utils.utils import download, run
 from options import options
 import os
+import hashlib
 
 
 class OneAPI:
-    # random unique number for benchmark oneAPI installation
-    ONEAPI_BENCHMARK_INSTANCE_ID = 987654
-
     def __init__(self):
         self.oneapi_dir = os.path.join(options.workdir, "oneapi")
         Path(self.oneapi_dir).mkdir(parents=True, exist_ok=True)
-        # delete if some option is set?
+        self.oneapi_instance_id = self.generate_unique_oneapi_id(self.oneapi_dir)
 
         # can we just hardcode these links?
         self.install_package(
             "dnnl",
             "https://registrationcenter-download.intel.com/akdlm/IRC_NAS/87e117ab-039b-437d-9c80-dcd5c9e675d5/intel-onednn-2025.0.0.862_offline.sh",
+            "6866feb5b8dfefd6ff45d6bfabed44f01d7fba8fd452480ae1fd86b92e9481ae052c24842da14f112f672f5c4859945b",
         )
         self.install_package(
             "mkl",
             "https://registrationcenter-download.intel.com/akdlm/IRC_NAS/79153e0f-74d7-45af-b8c2-258941adf58a/intel-onemkl-2025.0.0.940_offline.sh",
+            "122bb84cf943ea27753cb399c81ab2ae218ebd51b789c74d273240157722925ab4d5a43cb0b5de41b854f2c5a59a4002",
         )
         return
 
-    def install_package(self, name, url):
+    def generate_unique_oneapi_id(self, path):
+        hash_object = hashlib.md5(path.encode())
+        return hash_object.hexdigest()
+
+    def install_package(self, name, url, checksum):
         package_path = os.path.join(self.oneapi_dir, name)
         if Path(package_path).exists():
             print(
@@ -37,11 +41,13 @@ def install_package(self, name, url):
             )
             return
 
-        package = download(self.oneapi_dir, url, f"package_{name}.sh")
+        package = download(
+            self.oneapi_dir, url, f"package_{name}.sh", checksum=checksum
+        )
         try:
             print(f"installing {name}")
             run(
-                f"sh {package} -a -s --eula accept --install-dir {self.oneapi_dir} --instance f{self.ONEAPI_BENCHMARK_INSTANCE_ID}"
+                f"sh {package} -a -s --eula accept --install-dir {self.oneapi_dir} --instance {self.oneapi_instance_id}"
             )
         except:
             print("oneAPI installation likely exists already")
diff --git a/unified-runtime/scripts/benchmarks/benches/result.py b/devops/scripts/benchmarks/utils/result.py
similarity index 69%
rename from unified-runtime/scripts/benchmarks/benches/result.py
rename to devops/scripts/benchmarks/utils/result.py
index 52a098d91c24a..4e65a3b8aa582 100644
--- a/unified-runtime/scripts/benchmarks/benches/result.py
+++ b/devops/scripts/benchmarks/utils/result.py
@@ -3,9 +3,9 @@
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from typing import Optional
-from dataclasses_json import dataclass_json
+from dataclasses_json import config, dataclass_json
 from datetime import datetime
 
 
@@ -14,8 +14,8 @@
 class Result:
     label: str
     value: float
-    command: str
-    env: str
+    command: list[str]
+    env: dict[str, str]
     stdout: str
     passed: bool = True
     unit: str = ""
@@ -26,9 +26,8 @@ class Result:
     # values below should not be set by the benchmark
     name: str = ""
     lower_is_better: bool = True
-    git_hash: str = ""
-    date: Optional[datetime] = None
     suite: str = "Unknown"
+    description: str = "No description provided."
 
 
 @dataclass_json
@@ -37,4 +36,8 @@ class BenchmarkRun:
     results: list[Result]
     name: str = "This PR"
     git_hash: str = ""
-    date: datetime = None
+    github_repo: str = None
+    date: datetime = field(
+        default=None,
+        metadata=config(encoder=datetime.isoformat, decoder=datetime.fromisoformat),
+    )
diff --git a/unified-runtime/scripts/benchmarks/utils/utils.py b/devops/scripts/benchmarks/utils/utils.py
similarity index 81%
rename from unified-runtime/scripts/benchmarks/utils/utils.py
rename to devops/scripts/benchmarks/utils/utils.py
index 3a516e8d724f7..2d5fad6cd8917 100644
--- a/unified-runtime/scripts/benchmarks/utils/utils.py
+++ b/devops/scripts/benchmarks/utils/utils.py
@@ -12,6 +12,7 @@
 import urllib  # nosec B404
 from options import options
 from pathlib import Path
+import hashlib
 
 
 def run(
@@ -45,6 +46,12 @@ def run(
 
         env.update(env_vars)
 
+        if options.verbose:
+            command_str = " ".join(command)
+            env_str = " ".join(f"{key}={value}" for key, value in env_vars.items())
+            full_command_str = f"{env_str} {command_str}".strip()
+            print(f"Running: {full_command_str}")
+
         result = subprocess.run(
             command,
             cwd=cwd,
@@ -107,7 +114,7 @@ def prepare_workdir(dir, version):
                 shutil.rmtree(dir)
         else:
             raise Exception(
-                f"The directory {dir} exists but is a benchmark work directory."
+                f"The directory {dir} exists but is not a benchmark work directory."
             )
 
     os.makedirs(dir)
@@ -128,11 +135,26 @@ def create_build_path(directory, name):
     return build_path
 
 
-def download(dir, url, file, untar=False, unzip=False):
+def calculate_checksum(file_path):
+    sha_hash = hashlib.sha384()
+    with open(file_path, "rb") as f:
+        for byte_block in iter(lambda: f.read(4096), b""):
+            sha_hash.update(byte_block)
+    return sha_hash.hexdigest()
+
+
+def download(dir, url, file, untar=False, unzip=False, checksum=""):
     data_file = os.path.join(dir, file)
     if not Path(data_file).exists():
         print(f"{data_file} does not exist, downloading")
         urllib.request.urlretrieve(url, data_file)
+        calculated_checksum = calculate_checksum(data_file)
+        if calculated_checksum != checksum:
+            print(
+                f"Checksum mismatch: expected {checksum}, got {calculated_checksum}. Refusing to continue."
+            )
+            exit(1)
+
         if untar:
             file = tarfile.open(data_file)
             file.extractall(dir)
diff --git a/unified-runtime/scripts/benchmarks/workflow.png b/devops/scripts/benchmarks/workflow.png
similarity index 100%
rename from unified-runtime/scripts/benchmarks/workflow.png
rename to devops/scripts/benchmarks/workflow.png
diff --git a/unified-runtime/.github/scripts/get_system_info.sh b/devops/scripts/get_system_info.sh
similarity index 100%
rename from unified-runtime/.github/scripts/get_system_info.sh
rename to devops/scripts/get_system_info.sh
diff --git a/unified-runtime/scripts/benchmarks/benchmark_results.html.template b/unified-runtime/scripts/benchmarks/benchmark_results.html.template
deleted file mode 100644
index 1deeedad66b00..0000000000000
--- a/unified-runtime/scripts/benchmarks/benchmark_results.html.template
+++ /dev/null
@@ -1,192 +0,0 @@
-<!DOCTYPE html>
-<html>
-<head>
-    <meta charset="utf-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1">
-    <title>Benchmark Results</title>
-    <style>
-        body {
-            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
-            margin: 0;
-            padding: 16px;
-            background: #f8f9fa;
-        }
-        .container {
-            max-width: 1100px;
-            margin: 0 auto;
-        }
-        h1, h2 {
-            color: #212529;
-            text-align: center;
-            margin-bottom: 24px;
-            font-weight: 500;
-        }
-        .chart {
-            background: white;
-            border-radius: 8px;
-            padding: 24px;
-            margin-bottom: 24px;
-            box-shadow: 0 1px 3px rgba(0,0,0,0.1);
-            overflow-x: auto;
-        }
-        .chart > div {
-            min-width: 600px;
-            margin: 0 auto;
-        }
-        @media (max-width: 768px) {
-            body {
-                padding: 12px;
-            }
-            .chart {
-                padding: 16px;
-                border-radius: 6px;
-            }
-            h1 {
-                font-size: 24px;
-                margin-bottom: 16px;
-            }
-        }
-        .filter-container {
-            text-align: center;
-            margin-bottom: 24px;
-        }
-        .filter-container input {
-            padding: 8px;
-            font-size: 16px;
-            border: 1px solid #ccc;
-            border-radius: 4px;
-            width: 400px;
-            max-width: 100%;
-        }
-        .suite-filter-container {
-            text-align: center;
-            margin-bottom: 24px;
-            padding: 16px;
-            background: #e9ecef;
-            border-radius: 8px;
-        }
-        .suite-checkbox {
-            margin: 0 8px;
-        }
-        details {
-            margin-bottom: 24px;
-        }
-        summary {
-            font-size: 18px;
-            font-weight: 500;
-            cursor: pointer;
-            padding: 12px;
-            background: #e9ecef;
-            border-radius: 8px;
-            user-select: none;
-        }
-        summary:hover {
-            background: #dee2e6;
-        }
-    </style>
-    <script>
-        function getQueryParam(param) {
-            const urlParams = new URLSearchParams(window.location.search);
-            return urlParams.get(param);
-        }
-
-        function filterCharts() {
-            const regexInput = document.getElementById('bench-filter').value;
-            const regex = new RegExp(regexInput, 'i');
-            const activeSuites = Array.from(document.querySelectorAll('.suite-checkbox:checked')).map(checkbox => checkbox.getAttribute('data-suite'));
-            const charts = document.querySelectorAll('.chart');
-
-            charts.forEach(chart => {
-                const label = chart.getAttribute('data-label');
-                const suite = chart.getAttribute('data-suite');
-                if (regex.test(label) && activeSuites.includes(suite)) {
-                    chart.style.display = '';
-                } else {
-                    chart.style.display = 'none';
-                }
-            });
-
-            updateURL();
-        }
-
-        function updateURL() {
-            const url = new URL(window.location);
-            const regex = document.getElementById('bench-filter').value;
-            const activeSuites = Array.from(document.querySelectorAll('.suite-checkbox:checked')).map(checkbox => checkbox.getAttribute('data-suite'));
-
-            if (regex) {
-                url.searchParams.set('regex', regex);
-            } else {
-                url.searchParams.delete('regex');
-            }
-
-            if (activeSuites.length > 0) {
-                url.searchParams.set('suites', activeSuites.join(','));
-            } else {
-                url.searchParams.delete('suites');
-            }
-
-            history.replaceState(null, '', url);
-        }
-
-        document.addEventListener('DOMContentLoaded', (event) => {
-            const regexParam = getQueryParam('regex');
-            const suitesParam = getQueryParam('suites');
-
-            if (regexParam) {
-                document.getElementById('bench-filter').value = regexParam;
-            }
-
-            const suiteCheckboxes = document.querySelectorAll('.suite-checkbox');
-            if (suitesParam) {
-                const suites = suitesParam.split(',');
-                suiteCheckboxes.forEach(checkbox => {
-                    if (suites.includes(checkbox.getAttribute('data-suite'))) {
-                        checkbox.checked = true;
-                    } else {
-                        checkbox.checked = false;
-                    }
-                });
-            } else {
-                suiteCheckboxes.forEach(checkbox => {
-                    checkbox.checked = true;
-                });
-            }
-            filterCharts();
-
-            suiteCheckboxes.forEach(checkbox => {
-                checkbox.addEventListener('change', () => {
-                    filterCharts();
-                });
-            });
-
-            document.getElementById('bench-filter').addEventListener('input', () => {
-                filterCharts();
-            });
-        });
-    </script>
-</head>
-<body>
-    <div class="container">
-        <h1>Benchmark Results</h1>
-        <div class="filter-container">
-            <input type="text" id="bench-filter" placeholder="Regex...">
-        </div>
-        <div class="suite-filter-container">
-            ${suite_checkboxes_html}
-        </div>
-        <details class="timeseries">
-            <summary>Historical Results</summary>
-            <div class="charts">
-                ${timeseries_charts_html}
-            </div>
-        </details>
-        <details class="bar-charts">
-            <summary>Comparisons</summary>
-            <div class="charts">
-                ${bar_charts_html}
-            </div>
-        </details>
-    </div>
-</body>
-</html>
diff --git a/unified-runtime/scripts/benchmarks/output_html.py b/unified-runtime/scripts/benchmarks/output_html.py
deleted file mode 100644
index 4ba395bc3aac6..0000000000000
--- a/unified-runtime/scripts/benchmarks/output_html.py
+++ /dev/null
@@ -1,340 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
-# See LICENSE.TXT
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-import re
-import os
-from pathlib import Path
-import matplotlib.pyplot as plt
-import mpld3
-from collections import defaultdict
-from dataclasses import dataclass
-import matplotlib.dates as mdates
-from benches.result import BenchmarkRun, Result
-import numpy as np
-from string import Template
-
-
-@dataclass
-class BenchmarkMetadata:
-    unit: str
-    suite: str
-    lower_is_better: bool
-
-
-@dataclass
-class BenchmarkSeries:
-    label: str
-    metadata: BenchmarkMetadata
-    runs: list[BenchmarkRun]
-
-
-@dataclass
-class BenchmarkChart:
-    label: str
-    suite: str
-    html: str
-
-
-def tooltip_css() -> str:
-    return ".mpld3-tooltip{background:white;padding:8px;border:1px solid #ddd;border-radius:4px;font-family:monospace;white-space:pre;}"
-
-
-def create_time_series_chart(
-    benchmarks: list[BenchmarkSeries], github_repo: str
-) -> list[BenchmarkChart]:
-    plt.close("all")
-
-    num_benchmarks = len(benchmarks)
-    if num_benchmarks == 0:
-        return []
-
-    html_charts = []
-
-    for _, benchmark in enumerate(benchmarks):
-        fig, ax = plt.subplots(figsize=(10, 4))
-
-        all_values = []
-        all_stddevs = []
-
-        for run in benchmark.runs:
-            sorted_points = sorted(run.results, key=lambda x: x.date)
-            dates = [point.date for point in sorted_points]
-            values = [point.value for point in sorted_points]
-            stddevs = [point.stddev for point in sorted_points]
-
-            all_values.extend(values)
-            all_stddevs.extend(stddevs)
-
-            ax.errorbar(dates, values, yerr=stddevs, fmt="-", label=run.name, alpha=0.5)
-            scatter = ax.scatter(dates, values, picker=True)
-
-            tooltip_labels = [
-                f"Date: {point.date.strftime('%Y-%m-%d %H:%M:%S')}\n"
-                f"Value: {point.value:.2f} {benchmark.metadata.unit}\n"
-                f"Stddev: {point.stddev:.2f} {benchmark.metadata.unit}\n"
-                f"Git Hash: {point.git_hash}"
-                for point in sorted_points
-            ]
-
-            targets = [
-                f"https://github.com/{github_repo}/commit/{point.git_hash}"
-                for point in sorted_points
-            ]
-
-            tooltip = mpld3.plugins.PointHTMLTooltip(
-                scatter, tooltip_labels, css=tooltip_css(), targets=targets
-            )
-            mpld3.plugins.connect(fig, tooltip)
-
-        ax.set_title(benchmark.label, pad=20)
-        performance_indicator = (
-            "lower is better"
-            if benchmark.metadata.lower_is_better
-            else "higher is better"
-        )
-        ax.text(
-            0.5,
-            1.05,
-            f"({performance_indicator})",
-            ha="center",
-            transform=ax.transAxes,
-            style="italic",
-            fontsize=7,
-            color="#666666",
-        )
-
-        ax.set_xlabel("")
-        unit = benchmark.metadata.unit
-        ax.set_ylabel(f"Value ({unit})" if unit else "Value")
-        ax.grid(True, alpha=0.2)
-        ax.legend(bbox_to_anchor=(1, 1), loc="upper left")
-        ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter("%Y-%m-%d %H:%M:%S"))
-
-        plt.tight_layout()
-        html_charts.append(
-            BenchmarkChart(
-                html=mpld3.fig_to_html(fig),
-                label=benchmark.label,
-                suite=benchmark.metadata.suite,
-            )
-        )
-        plt.close(fig)
-
-    return html_charts
-
-
-@dataclass
-class ExplicitGroup:
-    name: str
-    nnames: int
-    metadata: BenchmarkMetadata
-    runs: dict[str, dict[str, Result]]
-
-
-def create_explicit_groups(
-    benchmark_runs: list[BenchmarkRun], compare_names: list[str]
-) -> list[ExplicitGroup]:
-    groups = {}
-
-    for run in benchmark_runs:
-        if run.name in compare_names:
-            for res in run.results:
-                if res.explicit_group != "":
-                    if res.explicit_group not in groups:
-                        groups[res.explicit_group] = ExplicitGroup(
-                            name=res.explicit_group,
-                            nnames=len(compare_names),
-                            metadata=BenchmarkMetadata(
-                                unit=res.unit,
-                                lower_is_better=res.lower_is_better,
-                                suite=res.suite,
-                            ),
-                            runs={},
-                        )
-
-                    group = groups[res.explicit_group]
-                    if res.label not in group.runs:
-                        group.runs[res.label] = {name: None for name in compare_names}
-
-                    if group.runs[res.label][run.name] is None:
-                        group.runs[res.label][run.name] = res
-
-    return list(groups.values())
-
-
-def create_grouped_bar_charts(groups: list[ExplicitGroup]) -> list[BenchmarkChart]:
-    plt.close("all")
-
-    html_charts = []
-
-    for group in groups:
-        fig, ax = plt.subplots(figsize=(10, 6))
-
-        x = np.arange(group.nnames)
-        x_labels = []
-        width = 0.8 / len(group.runs)
-
-        max_height = 0
-
-        for i, (run_name, run_results) in enumerate(group.runs.items()):
-            offset = width * i
-
-            positions = x + offset
-            x_labels = run_results.keys()
-            valid_data = [r.value if r is not None else 0 for r in run_results.values()]
-            rects = ax.bar(positions, valid_data, width, label=run_name)
-            # This is a hack to disable all bar_label. Setting labels to empty doesn't work.
-            # We create our own labels below for each bar, this works better in mpld3.
-            ax.bar_label(rects, fmt="")
-
-            for rect, run, res in zip(rects, run_results.keys(), run_results.values()):
-                if res is None:
-                    continue
-
-                height = rect.get_height()
-                if height > max_height:
-                    max_height = height
-
-                ax.text(
-                    rect.get_x() + rect.get_width() / 2.0,
-                    height + 1,
-                    f"{res.value:.1f}",
-                    ha="center",
-                    va="bottom",
-                    fontsize=9,
-                )
-
-                tooltip_labels = [
-                    f"Date: {res.date.strftime('%Y-%m-%d %H:%M:%S')}\n"
-                    f"Run: {run}\n"
-                    f"Label: {res.label}\n"
-                    f"Value: {res.value:.2f} {res.unit}\n"
-                    f"Stddev: {res.stddev:.2f} {res.unit}\n"
-                ]
-                tooltip = mpld3.plugins.LineHTMLTooltip(
-                    rect, tooltip_labels, css=tooltip_css()
-                )
-                mpld3.plugins.connect(ax.figure, tooltip)
-
-        # normally we'd just set legend to be outside
-        # the chart, but this is not supported by mpld3.
-        # instead, we adjust the y axis to account for
-        # the height of the bars.
-        legend_height = len(group.runs) * 0.1
-        ax.set_ylim(0, max_height * (1 + legend_height))
-
-        ax.set_xticks([])
-        ax.grid(True, axis="y", alpha=0.2)
-        ax.set_ylabel(f"Value ({group.metadata.unit})")
-        ax.legend(loc="upper left")
-        ax.set_title(group.name, pad=20)
-        performance_indicator = (
-            "lower is better" if group.metadata.lower_is_better else "higher is better"
-        )
-        ax.text(
-            0.5,
-            1.03,
-            f"({performance_indicator})",
-            ha="center",
-            transform=ax.transAxes,
-            style="italic",
-            fontsize=7,
-            color="#666666",
-        )
-
-        for idx, label in enumerate(x_labels):
-            # this is a hack to get labels to show above the legend
-            # we normalize the idx to transAxes transform and offset it a little.
-            x_norm = (idx + 0.3 - ax.get_xlim()[0]) / (
-                ax.get_xlim()[1] - ax.get_xlim()[0]
-            )
-            ax.text(x_norm, 1.03, label, transform=ax.transAxes, color="#666666")
-
-        plt.tight_layout()
-        html_charts.append(
-            BenchmarkChart(
-                label=group.name,
-                html=mpld3.fig_to_html(fig),
-                suite=group.metadata.suite,
-            )
-        )
-        plt.close(fig)
-
-    return html_charts
-
-
-def process_benchmark_data(
-    benchmark_runs: list[BenchmarkRun], compare_names: list[str]
-) -> list[BenchmarkSeries]:
-    benchmark_metadata: dict[str, BenchmarkMetadata] = {}
-    run_map: dict[str, dict[str, list[Result]]] = defaultdict(lambda: defaultdict(list))
-
-    for run in benchmark_runs:
-        if run.name not in compare_names:
-            continue
-
-        for result in run.results:
-            if result.label not in benchmark_metadata:
-                benchmark_metadata[result.label] = BenchmarkMetadata(
-                    unit=result.unit,
-                    lower_is_better=result.lower_is_better,
-                    suite=result.suite,
-                )
-
-            result.date = run.date
-            result.git_hash = run.git_hash
-            run_map[result.label][run.name].append(result)
-
-    benchmark_series = []
-    for label, metadata in benchmark_metadata.items():
-        runs = [
-            BenchmarkRun(name=run_name, results=results)
-            for run_name, results in run_map[label].items()
-        ]
-        benchmark_series.append(
-            BenchmarkSeries(label=label, metadata=metadata, runs=runs)
-        )
-
-    return benchmark_series
-
-
-def generate_html(
-    benchmark_runs: list[BenchmarkRun], github_repo: str, compare_names: list[str]
-) -> str:
-    benchmarks = process_benchmark_data(benchmark_runs, compare_names)
-
-    timeseries = create_time_series_chart(benchmarks, github_repo)
-    timeseries_charts_html = "\n".join(
-        f'<div class="chart" data-label="{ts.label}" data-suite="{ts.suite}"><div>{ts.html}</div></div>'
-        for ts in timeseries
-    )
-
-    explicit_groups = create_explicit_groups(benchmark_runs, compare_names)
-
-    bar_charts = create_grouped_bar_charts(explicit_groups)
-    bar_charts_html = "\n".join(
-        f'<div class="chart" data-label="{bc.label}" data-suite="{bc.suite}"><div>{bc.html}</div></div>'
-        for bc in bar_charts
-    )
-
-    suite_names = {t.suite for t in timeseries}
-    suite_checkboxes_html = " ".join(
-        f'<label><input type="checkbox" class="suite-checkbox" data-suite="{suite}" checked> {suite}</label>'
-        for suite in suite_names
-    )
-
-    script_path = os.path.dirname(os.path.realpath(__file__))
-    results_template_path = Path(script_path, "benchmark_results.html.template")
-    with open(results_template_path, "r") as file:
-        html_template = file.read()
-
-    template = Template(html_template)
-    data = {
-        "suite_checkboxes_html": suite_checkboxes_html,
-        "timeseries_charts_html": timeseries_charts_html,
-        "bar_charts_html": bar_charts_html,
-    }
-
-    return template.substitute(data)