Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 19 additions & 10 deletions devops/scripts/benchmarks/benches/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ def __init__(self, directory, suite):
def name(self) -> str:
pass

def display_name(self) -> str:
"""Returns a user-friendly name for display in charts.
By default returns the same as name(), but can be overridden.
"""
return self.name()

@abstractmethod
def setup(self):
pass
Expand Down Expand Up @@ -145,18 +151,21 @@ def get_tags(self) -> list[str]:
def range(self) -> tuple[float, float]:
return None

def get_metadata(self) -> BenchmarkMetadata:
def get_metadata(self) -> dict[str, BenchmarkMetadata]:
range = self.range()

return BenchmarkMetadata(
type="benchmark",
description=self.description(),
notes=self.notes(),
unstable=self.unstable(),
tags=self.get_tags(),
range_min=range[0] if range else None,
range_max=range[1] if range else None,
)
return {
self.name(): BenchmarkMetadata(
type="benchmark",
description=self.description(),
notes=self.notes(),
unstable=self.unstable(),
tags=self.get_tags(),
range_min=range[0] if range else None,
range_max=range[1] if range else None,
display_name=self.display_name(),
)
}


class Suite(ABC):
Expand Down
83 changes: 81 additions & 2 deletions devops/scripts/benchmarks/benches/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import os
import csv
import io
import copy
from utils.utils import run, git_clone, create_build_path
from .base import Benchmark, Suite
from utils.result import BenchmarkMetadata, Result
Expand Down Expand Up @@ -317,6 +318,7 @@ def __init__(self, bench, runtime: RUNTIMES, ioq, MeasureCompletion=0, UseEvents
self.runtime = runtime
self.MeasureCompletion = MeasureCompletion
self.UseEvents = UseEvents
self.NumKernels = 10
super().__init__(
bench, f"api_overhead_benchmark_{runtime.value}", "SubmitKernel"
)
Expand All @@ -334,6 +336,16 @@ def name(self):

return f"api_overhead_benchmark_{self.runtime.value} SubmitKernel {order}{completion_str}{events_str}"

def display_name(self) -> str:
order = "in order" if self.ioq else "out of order"
info = []
if self.MeasureCompletion:
info.append("with measure completion")
if self.UseEvents:
info.append("using events")
additional_info = f" {' '.join(info)}" if info else ""
return f"{self.runtime.value.upper()} SubmitKernel {order}{additional_info}, NumKernels {self.NumKernels}"

def explicit_group(self):
order = "In Order" if self.ioq else "Out Of Order"
completion_str = " With Completion" if self.MeasureCompletion else ""
Expand All @@ -354,7 +366,7 @@ def description(self) -> str:

return (
f"Measures CPU time overhead of submitting {order} kernels through {runtime_name} API{completion_desc}. "
f"Runs 10 simple kernels with minimal execution time to isolate API overhead from kernel execution time."
f"Runs {self.NumKernels} simple kernels with minimal execution time to isolate API overhead from kernel execution time."
)

def range(self) -> tuple[float, float]:
Expand All @@ -366,11 +378,23 @@ def bin_args(self) -> list[str]:
f"--MeasureCompletion={self.MeasureCompletion}",
"--iterations=100000",
"--Profiling=0",
"--NumKernels=10",
f"--NumKernels={self.NumKernels}",
"--KernelExecTime=1",
f"--UseEvents={self.UseEvents}",
]

def get_metadata(self) -> dict[str, BenchmarkMetadata]:
metadata_dict = super().get_metadata()

# Create CPU count variant with modified display name
cpu_count_name = self.name() + " CPU count"
cpu_count_metadata = copy.deepcopy(metadata_dict[self.name()])
cpu_count_display_name = self.display_name() + ", CPU count"
cpu_count_metadata.display_name = cpu_count_display_name
metadata_dict[cpu_count_name] = cpu_count_metadata

return metadata_dict


class ExecImmediateCopyQueue(ComputeBenchmark):
def __init__(self, bench, ioq, isCopyOnly, source, destination, size):
Expand All @@ -385,6 +409,10 @@ def name(self):
order = "in order" if self.ioq else "out of order"
return f"api_overhead_benchmark_sycl ExecImmediateCopyQueue {order} from {self.source} to {self.destination}, size {self.size}"

def display_name(self) -> str:
order = "in order" if self.ioq else "out of order"
return f"SYCL ExecImmediateCopyQueue {order} from {self.source} to {self.destination}, size {self.size}"

def description(self) -> str:
order = "in-order" if self.ioq else "out-of-order"
operation = "copy-only" if self.isCopyOnly else "copy and command submission"
Expand Down Expand Up @@ -419,6 +447,9 @@ def __init__(self, bench, isCopyOnly, source, destination, size):
def name(self):
return f"memory_benchmark_sycl QueueInOrderMemcpy from {self.source} to {self.destination}, size {self.size}"

def display_name(self) -> str:
return f"SYCL QueueInOrderMemcpy from {self.source} to {self.destination}, size {self.size}"

def description(self) -> str:
operation = "copy-only" if self.isCopyOnly else "copy and command submission"
return (
Expand Down Expand Up @@ -450,6 +481,9 @@ def __init__(self, bench, source, destination, size):
def name(self):
return f"memory_benchmark_sycl QueueMemcpy from {self.source} to {self.destination}, size {self.size}"

def display_name(self) -> str:
return f"SYCL QueueMemcpy from {self.source} to {self.destination}, size {self.size}"

def description(self) -> str:
return (
f"Measures general SYCL queue memory copy performance from {self.source} to "
Expand Down Expand Up @@ -478,6 +512,9 @@ def __init__(self, bench, type, size, placement):
def name(self):
return f"memory_benchmark_sycl StreamMemory, placement {self.placement}, type {self.type}, size {self.size}"

def display_name(self) -> str:
return f"SYCL StreamMemory, placement {self.placement}, type {self.type}, size {self.size}"

def description(self) -> str:
return (
f"Measures {self.placement} memory bandwidth using {self.type} pattern with "
Expand Down Expand Up @@ -511,6 +548,9 @@ def __init__(self, bench):
def name(self):
return f"miscellaneous_benchmark_sycl VectorSum"

def display_name(self) -> str:
return f"SYCL VectorSum"

def description(self) -> str:
return (
"Measures performance of vector addition across 3D grid (512x256x256 elements) "
Expand Down Expand Up @@ -565,6 +605,19 @@ def name(self):
+ (" without copy offload" if not self.useCopyOffload else "")
)

def display_name(self) -> str:
info = []
if not self.useEvents:
info.append("without events")
if not self.useCopyOffload:
info.append("without copy offload")
additional_info = f", {' '.join(info)}" if info else ""
return (
f"UR MemcpyExecute, opsPerThread {self.numOpsPerThread}, "
f"numThreads {self.numThreads}, allocSize {self.allocSize}, srcUSM {self.srcUSM}, "
f"dstUSM {self.dstUSM}{additional_info}"
)

def explicit_group(self):
return (
"MemcpyExecute opsPerThread: "
Expand Down Expand Up @@ -624,6 +677,9 @@ def description(self) -> str:
def name(self):
return f"graph_api_benchmark_{self.runtime.value} SinKernelGraph graphs:{self.withGraphs}, numKernels:{self.numKernels}"

def display_name(self) -> str:
return f"{self.runtime.value.upper()} SinKernelGraph, graphs {self.withGraphs}, numKernels {self.numKernels}"

def unstable(self) -> str:
return "This benchmark combines both eager and graph execution, and may not be representative of real use cases."

Expand Down Expand Up @@ -672,6 +728,9 @@ def description(self) -> str:
def name(self):
return f"graph_api_benchmark_{self.runtime.value} SubmitGraph numKernels:{self.numKernels} ioq {self.inOrderQueue} measureCompletion {self.measureCompletionTime}"

def display_name(self) -> str:
return f"{self.runtime.value.upper()} SubmitGraph, numKernels {self.numKernels}, ioq {self.inOrderQueue}, measureCompletion {self.measureCompletionTime}"

def get_tags(self):
return [
"graph",
Expand Down Expand Up @@ -710,6 +769,11 @@ def description(self) -> str:
def name(self):
return f"ulls_benchmark_{self.runtime.value} EmptyKernel wgc:{self.wgc}, wgs:{self.wgs}"

def display_name(self) -> str:
return (
f"{self.runtime.value.upper()} EmptyKernel, wgc {self.wgc}, wgs {self.wgs}"
)

def get_tags(self):
return [runtime_to_tag_name(self.runtime), "micro", "latency", "submit"]

Expand Down Expand Up @@ -751,6 +815,9 @@ def description(self) -> str:
def name(self):
return f"ulls_benchmark_{self.runtime.value} KernelSwitch count {self.count} kernelTime {self.kernelTime}"

def display_name(self) -> str:
return f"{self.runtime.value.upper()} KernelSwitch, count {self.count}, kernelTime {self.kernelTime}"

def get_tags(self):
return [runtime_to_tag_name(self.runtime), "micro", "latency", "submit"]

Expand Down Expand Up @@ -787,6 +854,12 @@ def name(self):
f"usmMemoryPlacement:{self.usm_memory_placement} size:{self.size} measureMode:{self.measure_mode}"
)

def display_name(self) -> str:
return (
f"{self.runtime.value.upper()} UsmMemoryAllocation, "
f"usmMemoryPlacement {self.usm_memory_placement}, size {self.size}, measureMode {self.measure_mode}"
)

def explicit_group(self):
return f"UsmMemoryAllocation"

Expand Down Expand Up @@ -839,6 +912,12 @@ def name(self):
f"usmMemoryPlacement:{self.usm_memory_placement} allocationCount:{self.allocation_count} size:{self.size} measureMode:{self.measure_mode}"
)

def display_name(self) -> str:
return (
f"{self.runtime.value.upper()} UsmBatchMemoryAllocation, "
f"usmMemoryPlacement {self.usm_memory_placement}, allocationCount {self.allocation_count}, size {self.size}, measureMode {self.measure_mode}"
)

def explicit_group(self):
return f"UsmBatchMemoryAllocation"

Expand Down
Loading
Loading