Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 51 additions & 84 deletions devops/scripts/benchmarks/benches/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ def git_url(self) -> str:
return "https://github.com/intel/compute-benchmarks.git"

def git_hash(self) -> str:
# Sep 25, 2025
return "7ba2e629404e34c635a46f28550a0952717d120f"
# Oct 9, 2025
return "32805b4b6f8dafb4a97f21c4c85bb2f6963f8dbb"

def setup(self) -> None:
if options.sycl is None:
Expand Down Expand Up @@ -152,7 +152,7 @@ def benchmarks(self) -> list[Benchmark]:
kernel_exec_time,
)
)
if runtime == RUNTIMES.SYCL:
if runtime in (RUNTIMES.SYCL, RUNTIMES.SYCL_PREVIEW, RUNTIMES.UR):
# Create CPU count variant
benches.append(
SubmitKernel(
Expand Down Expand Up @@ -203,6 +203,11 @@ def benchmarks(self) -> list[Benchmark]:
measure_completion_time,
use_events,
) in submit_graph_params:
# Non-sycl runtimes have to be run with emulated graphs,
# see: https://github.com/intel/compute-benchmarks/commit/d81d5d602739482b9070c872a28c0b5ebb41de70
emulate_graphs = (
0 if runtime in (RUNTIMES.SYCL, RUNTIMES.SYCL_PREVIEW) else 1
)
benches.append(
GraphApiSubmitGraph(
self,
Expand All @@ -211,6 +216,7 @@ def benchmarks(self) -> list[Benchmark]:
num_kernels,
measure_completion_time,
use_events,
emulate_graphs,
useHostTasks=0,
)
)
Expand All @@ -224,6 +230,7 @@ def benchmarks(self) -> list[Benchmark]:
num_kernels,
measure_completion_time,
use_events,
emulate_graphs,
useHostTasks=0,
profiler_type=PROFILERS.CPU_COUNTER,
)
Expand Down Expand Up @@ -294,14 +301,6 @@ def benchmarks(self) -> list[Benchmark]:
return benches


def parse_unit_type(compute_unit):
if "[count]" in compute_unit:
return "instr"
elif "[us]" in compute_unit:
return "μs"
return compute_unit.replace("[", "").replace("]", "")


class ComputeBenchmark(Benchmark):
def __init__(
self,
Expand Down Expand Up @@ -330,6 +329,17 @@ def benchmark_bin(self) -> Path:
"""Returns the path to the benchmark binary"""
return self.bench.project.build_dir / "bin" / self.bench_name

def cpu_count_str(self, separator: str = "") -> str:
# Note: SYCL CI currently relies on this "CPU count" value.
# Please update /devops/scripts/benchmarks/compare.py if this value
# is changed. See compare.py usage (w.r.t. --regression-filter) in
# /devops/actions/run-tests/benchmarks/action.yml.
return (
f"{separator} CPU count"
if self.profiler_type == PROFILERS.CPU_COUNTER
else ""
)

def get_iters(self, run_trace: TracingType):
"""Returns the number of iterations to run for the given tracing type."""
return (
Expand Down Expand Up @@ -412,27 +422,23 @@ def run(
)
parsed_results = self.parse_output(result)
ret = []
for label, median, stddev, unit in parsed_results:
extra_label = " CPU count" if parse_unit_type(unit) == "instr" else ""
# Note: SYCL CI currently relies on this "CPU count" value.
# Please update /devops/scripts/benchmarks/compare.py if this value
# is changed. See compare.py usage (w.r.t. --regression-filter) in
# /devops/actions/run-tests/benchmarks/action.yml.
for median, stddev in parsed_results:
unit = "instr" if self.profiler_type == PROFILERS.CPU_COUNTER else "μs"
ret.append(
Result(
label=self.name() + extra_label,
label=self.name(),
value=median,
stddev=stddev,
command=command,
env=env_vars,
unit=parse_unit_type(unit),
unit=unit,
git_url=self.bench.git_url(),
git_hash=self.bench.git_hash(),
)
)
return ret

def parse_output(self, output):
def parse_output(self, output: str) -> list[tuple[float, float]]:
csv_file = io.StringIO(output)
reader = csv.reader(csv_file)
next(reader, None)
Expand All @@ -442,16 +448,14 @@ def parse_output(self, output):
if data_row is None:
break
try:
label = data_row[0]
mean = float(data_row[1])
median = float(data_row[2])
# compute benchmarks report stddev as %
stddev = mean * (float(data_row[3].strip("%")) / 100.0)
if not math.isfinite(stddev):
stddev = 0.0 # Default to 0.0 if stddev is invalid

unit = data_row[7]
results.append((label, median, stddev, unit))
results.append((median, stddev))
except (ValueError, IndexError) as e:
raise ValueError(f"Error parsing output: {e}")
if len(results) == 0:
Expand Down Expand Up @@ -532,7 +536,7 @@ def name(self):
f" KernelExecTime={self.KernelExecTime}" if self.KernelExecTime != 1 else ""
)

return f"api_overhead_benchmark_{self.runtime.value} SubmitKernel {order}{completion_str}{events_str}{kernel_exec_time_str}"
return f"api_overhead_benchmark_{self.runtime.value} SubmitKernel {order}{completion_str}{events_str}{kernel_exec_time_str}{self.cpu_count_str()}"

def display_name(self) -> str:
order = "in order" if self.ioq else "out of order"
Expand All @@ -544,7 +548,7 @@ def display_name(self) -> str:
if self.KernelExecTime != 1:
info.append(f"KernelExecTime={self.KernelExecTime}")
additional_info = f" {' '.join(info)}" if info else ""
return f"{self.runtime.value.upper()} SubmitKernel {order}{additional_info}, NumKernels {self.NumKernels}"
return f"{self.runtime.value.upper()} SubmitKernel {order}{additional_info}, NumKernels {self.NumKernels}{self.cpu_count_str(separator=',')}"

def explicit_group(self):
order = "in order" if self.ioq else "out of order"
Expand All @@ -553,7 +557,7 @@ def explicit_group(self):

kernel_exec_time_str = f" long kernel" if self.KernelExecTime != 1 else ""

return f"SubmitKernel {order}{completion_str}{events_str}{kernel_exec_time_str}"
return f"SubmitKernel {order}{completion_str}{events_str}{kernel_exec_time_str}{self.cpu_count_str(separator=',')}"

def description(self) -> str:
order = "in-order" if self.ioq else "out-of-order"
Expand All @@ -571,34 +575,16 @@ def range(self) -> tuple[float, float]:

def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
iters = self.get_iters(run_trace)
bin_args = [
return [
f"--iterations={iters}",
f"--Ioq={self.ioq}",
f"--MeasureCompletion={self.MeasureCompletion}",
"--Profiling=0",
f"--NumKernels={self.NumKernels}",
f"--KernelExecTime={self.KernelExecTime}",
f"--UseEvents={self.UseEvents}",
f"--profilerType={self.profiler_type.value}",
]
if self.runtime == RUNTIMES.SYCL:
bin_args.append(f"--profilerType={self.profiler_type.value}")
return bin_args

def get_metadata(self) -> dict[str, BenchmarkMetadata]:
metadata_dict = super().get_metadata()

# Create CPU count variant with modified display name and explicit_group
cpu_count_name = self.name() + " CPU count"
cpu_count_metadata = copy.deepcopy(metadata_dict[self.name()])
cpu_count_display_name = self.display_name() + ", CPU count"
cpu_count_explicit_group = (
self.explicit_group() + ", CPU count" if self.explicit_group() else ""
)
cpu_count_metadata.display_name = cpu_count_display_name
cpu_count_metadata.explicit_group = cpu_count_explicit_group
metadata_dict[cpu_count_name] = cpu_count_metadata

return metadata_dict


class ExecImmediateCopyQueue(ComputeBenchmark):
Expand All @@ -622,11 +608,11 @@ def __init__(

def name(self):
order = "in order" if self.ioq else "out of order"
return f"api_overhead_benchmark_sycl ExecImmediateCopyQueue {order} from {self.source} to {self.destination}, size {self.size}"
return f"api_overhead_benchmark_sycl ExecImmediateCopyQueue {order} from {self.source} to {self.destination}, size {self.size}{self.cpu_count_str()}"

def display_name(self) -> str:
order = "in order" if self.ioq else "out of order"
return f"SYCL ExecImmediateCopyQueue {order} from {self.source} to {self.destination}, size {self.size}"
return f"SYCL ExecImmediateCopyQueue {order} from {self.source} to {self.destination}, size {self.size}{self.cpu_count_str(separator=',')}"

def description(self) -> str:
order = "in-order" if self.ioq else "out-of-order"
Expand Down Expand Up @@ -671,10 +657,10 @@ def __init__(self, bench, isCopyOnly, source, destination, size, profiler_type):
)

def name(self):
return f"memory_benchmark_sycl QueueInOrderMemcpy from {self.source} to {self.destination}, size {self.size}"
return f"memory_benchmark_sycl QueueInOrderMemcpy from {self.source} to {self.destination}, size {self.size}{self.cpu_count_str()}"

def display_name(self) -> str:
return f"SYCL QueueInOrderMemcpy from {self.source} to {self.destination}, size {self.size}"
return f"SYCL QueueInOrderMemcpy from {self.source} to {self.destination}, size {self.size}{self.cpu_count_str(separator=',')}"

def description(self) -> str:
operation = "copy-only" if self.isCopyOnly else "copy and command submission"
Expand Down Expand Up @@ -713,10 +699,10 @@ def __init__(self, bench, source, destination, size, profiler_type):
)

def name(self):
return f"memory_benchmark_sycl QueueMemcpy from {self.source} to {self.destination}, size {self.size}"
return f"memory_benchmark_sycl QueueMemcpy from {self.source} to {self.destination}, size {self.size}{self.cpu_count_str()}"

def display_name(self) -> str:
return f"SYCL QueueMemcpy from {self.source} to {self.destination}, size {self.size}"
return f"SYCL QueueMemcpy from {self.source} to {self.destination}, size {self.size}{self.cpu_count_str(separator=',')}"

def description(self) -> str:
return (
Expand Down Expand Up @@ -974,6 +960,7 @@ def __init__(
numKernels,
measureCompletionTime,
useEvents,
emulate_graphs,
useHostTasks,
profiler_type=PROFILERS.TIMER,
):
Expand All @@ -982,6 +969,7 @@ def __init__(
self.measureCompletionTime = measureCompletionTime
self.useEvents = useEvents
self.useHostTasks = useHostTasks
self.emulateGraphs = emulate_graphs
self.ioq_str = "in order" if self.inOrderQueue else "out of order"
self.measure_str = (
" with measure completion" if self.measureCompletionTime else ""
Expand All @@ -1003,7 +991,7 @@ def supported_runtimes(self) -> list[RUNTIMES]:
return super().supported_runtimes() + [RUNTIMES.SYCL_PREVIEW]

def explicit_group(self):
return f"SubmitGraph {self.ioq_str}{self.measure_str}{self.use_events_str}{self.host_tasks_str}, {self.numKernels} kernels"
return f"SubmitGraph {self.ioq_str}{self.measure_str}{self.use_events_str}{self.host_tasks_str}, {self.numKernels} kernels{self.cpu_count_str(separator=',')}"

def description(self) -> str:
return (
Expand All @@ -1012,10 +1000,10 @@ def description(self) -> str:
)

def name(self):
return f"graph_api_benchmark_{self.runtime.value} SubmitGraph{self.use_events_str}{self.host_tasks_str} numKernels:{self.numKernels} ioq {self.inOrderQueue} measureCompletion {self.measureCompletionTime}"
return f"graph_api_benchmark_{self.runtime.value} SubmitGraph{self.use_events_str}{self.host_tasks_str} numKernels:{self.numKernels} ioq {self.inOrderQueue} measureCompletion {self.measureCompletionTime}{self.cpu_count_str()}"

def display_name(self) -> str:
return f"{self.runtime.value.upper()} SubmitGraph {self.ioq_str}{self.measure_str}{self.use_events_str}{self.host_tasks_str}, {self.numKernels} kernels"
return f"{self.runtime.value.upper()} SubmitGraph {self.ioq_str}{self.measure_str}{self.use_events_str}{self.host_tasks_str}, {self.numKernels} kernels{self.cpu_count_str(separator=',')}"

def get_tags(self):
return [
Expand All @@ -1028,7 +1016,7 @@ def get_tags(self):

def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
iters = self.get_iters(run_trace)
bin_args = [
return [
f"--iterations={iters}",
f"--NumKernels={self.numKernels}",
f"--MeasureCompletionTime={self.measureCompletionTime}",
Expand All @@ -1038,26 +1026,9 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
f"--UseEvents={self.useEvents}",
"--UseExplicit=0",
f"--UseHostTasks={self.useHostTasks}",
f"--profilerType={self.profiler_type.value}",
f"--EmulateGraphs={self.emulateGraphs}",
]
if self.runtime == RUNTIMES.SYCL:
bin_args.append(f"--profilerType={self.profiler_type.value}")
return bin_args

def get_metadata(self) -> dict[str, BenchmarkMetadata]:
metadata_dict = super().get_metadata()

# Create CPU count variant with modified display name and explicit_group
cpu_count_name = self.name() + " CPU count"
cpu_count_metadata = copy.deepcopy(metadata_dict[self.name()])
cpu_count_display_name = self.display_name() + ", CPU count"
cpu_count_explicit_group = (
self.explicit_group() + ", CPU count" if self.explicit_group() else ""
)
cpu_count_metadata.display_name = cpu_count_display_name
cpu_count_metadata.explicit_group = cpu_count_explicit_group
metadata_dict[cpu_count_name] = cpu_count_metadata

return metadata_dict


class UllsEmptyKernel(ComputeBenchmark):
Expand All @@ -1081,32 +1052,28 @@ def supported_runtimes(self) -> list[RUNTIMES]:
return [RUNTIMES.SYCL, RUNTIMES.LEVEL_ZERO]

def explicit_group(self):
return f"EmptyKernel, wgc: {self.wgc}, wgs: {self.wgs}"
return f"EmptyKernel, wgc: {self.wgc}, wgs: {self.wgs}{self.cpu_count_str(separator=',')}"

def description(self) -> str:
return ""

def name(self):
return f"ulls_benchmark_{self.runtime.value} EmptyKernel wgc:{self.wgc}, wgs:{self.wgs}"
return f"ulls_benchmark_{self.runtime.value} EmptyKernel wgc:{self.wgc}, wgs:{self.wgs}{self.cpu_count_str()}"

def display_name(self) -> str:
return (
f"{self.runtime.value.upper()} EmptyKernel, wgc {self.wgc}, wgs {self.wgs}"
)
return f"{self.runtime.value.upper()} EmptyKernel, wgc {self.wgc}, wgs {self.wgs}{self.cpu_count_str(separator=',')}"

def get_tags(self):
return [runtime_to_tag_name(self.runtime), "micro", "latency", "submit"]

def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
iters = self.get_iters(run_trace)
bin_args = [
return [
f"--iterations={iters}",
f"--wgs={self.wgs}",
f"--wgc={self.wgc}",
f"--profilerType={self.profiler_type.value}",
]
if self.runtime == RUNTIMES.SYCL:
bin_args.append(f"--profilerType={self.profiler_type.value}")
return bin_args


class UllsKernelSwitch(ComputeBenchmark):
Expand Down