diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py index 3eab381e7cf16..6a2aea460ddb9 100644 --- a/devops/scripts/benchmarks/benches/compute.py +++ b/devops/scripts/benchmarks/benches/compute.py @@ -61,8 +61,8 @@ def git_url(self) -> str: return "https://github.com/intel/compute-benchmarks.git" def git_hash(self) -> str: - # Sep 25, 2025 - return "7ba2e629404e34c635a46f28550a0952717d120f" + # Oct 9, 2025 + return "32805b4b6f8dafb4a97f21c4c85bb2f6963f8dbb" def setup(self) -> None: if options.sycl is None: @@ -152,7 +152,7 @@ def benchmarks(self) -> list[Benchmark]: kernel_exec_time, ) ) - if runtime == RUNTIMES.SYCL: + if runtime in (RUNTIMES.SYCL, RUNTIMES.SYCL_PREVIEW, RUNTIMES.UR): # Create CPU count variant benches.append( SubmitKernel( @@ -203,6 +203,11 @@ def benchmarks(self) -> list[Benchmark]: measure_completion_time, use_events, ) in submit_graph_params: + # Non-sycl runtimes have to be run with emulated graphs, + # see: https://github.com/intel/compute-benchmarks/commit/d81d5d602739482b9070c872a28c0b5ebb41de70 + emulate_graphs = ( + 0 if runtime in (RUNTIMES.SYCL, RUNTIMES.SYCL_PREVIEW) else 1 + ) benches.append( GraphApiSubmitGraph( self, @@ -211,6 +216,7 @@ def benchmarks(self) -> list[Benchmark]: num_kernels, measure_completion_time, use_events, + emulate_graphs, useHostTasks=0, ) ) @@ -224,6 +230,7 @@ def benchmarks(self) -> list[Benchmark]: num_kernels, measure_completion_time, use_events, + emulate_graphs, useHostTasks=0, profiler_type=PROFILERS.CPU_COUNTER, ) @@ -294,14 +301,6 @@ def benchmarks(self) -> list[Benchmark]: return benches -def parse_unit_type(compute_unit): - if "[count]" in compute_unit: - return "instr" - elif "[us]" in compute_unit: - return "μs" - return compute_unit.replace("[", "").replace("]", "") - - class ComputeBenchmark(Benchmark): def __init__( self, @@ -330,6 +329,17 @@ def benchmark_bin(self) -> Path: """Returns the path to the benchmark binary""" return self.bench.project.build_dir / "bin" / self.bench_name + def cpu_count_str(self, separator: str = "") -> str: + # Note: SYCL CI currently relies on this "CPU count" value. + # Please update /devops/scripts/benchmarks/compare.py if this value + # is changed. See compare.py usage (w.r.t. --regression-filter) in + # /devops/actions/run-tests/benchmarks/action.yml. + return ( + f"{separator} CPU count" + if self.profiler_type == PROFILERS.CPU_COUNTER + else "" + ) + def get_iters(self, run_trace: TracingType): """Returns the number of iterations to run for the given tracing type.""" return ( @@ -412,27 +422,23 @@ def run( ) parsed_results = self.parse_output(result) ret = [] - for label, median, stddev, unit in parsed_results: - extra_label = " CPU count" if parse_unit_type(unit) == "instr" else "" - # Note: SYCL CI currently relies on this "CPU count" value. - # Please update /devops/scripts/benchmarks/compare.py if this value - # is changed. See compare.py usage (w.r.t. --regression-filter) in - # /devops/actions/run-tests/benchmarks/action.yml. + for median, stddev in parsed_results: + unit = "instr" if self.profiler_type == PROFILERS.CPU_COUNTER else "μs" ret.append( Result( - label=self.name() + extra_label, + label=self.name(), value=median, stddev=stddev, command=command, env=env_vars, - unit=parse_unit_type(unit), + unit=unit, git_url=self.bench.git_url(), git_hash=self.bench.git_hash(), ) ) return ret - def parse_output(self, output): + def parse_output(self, output: str) -> list[tuple[float, float]]: csv_file = io.StringIO(output) reader = csv.reader(csv_file) next(reader, None) @@ -442,7 +448,6 @@ def parse_output(self, output): if data_row is None: break try: - label = data_row[0] mean = float(data_row[1]) median = float(data_row[2]) # compute benchmarks report stddev as % @@ -450,8 +455,7 @@ def parse_output(self, output): if not math.isfinite(stddev): stddev = 0.0 # Default to 0.0 if stddev is invalid - unit = data_row[7] - results.append((label, median, stddev, unit)) + results.append((median, stddev)) except (ValueError, IndexError) as e: raise ValueError(f"Error parsing output: {e}") if len(results) == 0: @@ -532,7 +536,7 @@ def name(self): f" KernelExecTime={self.KernelExecTime}" if self.KernelExecTime != 1 else "" ) - return f"api_overhead_benchmark_{self.runtime.value} SubmitKernel {order}{completion_str}{events_str}{kernel_exec_time_str}" + return f"api_overhead_benchmark_{self.runtime.value} SubmitKernel {order}{completion_str}{events_str}{kernel_exec_time_str}{self.cpu_count_str()}" def display_name(self) -> str: order = "in order" if self.ioq else "out of order" @@ -544,7 +548,7 @@ def display_name(self) -> str: if self.KernelExecTime != 1: info.append(f"KernelExecTime={self.KernelExecTime}") additional_info = f" {' '.join(info)}" if info else "" - return f"{self.runtime.value.upper()} SubmitKernel {order}{additional_info}, NumKernels {self.NumKernels}" + return f"{self.runtime.value.upper()} SubmitKernel {order}{additional_info}, NumKernels {self.NumKernels}{self.cpu_count_str(separator=',')}" def explicit_group(self): order = "in order" if self.ioq else "out of order" @@ -553,7 +557,7 @@ def explicit_group(self): kernel_exec_time_str = f" long kernel" if self.KernelExecTime != 1 else "" - return f"SubmitKernel {order}{completion_str}{events_str}{kernel_exec_time_str}" + return f"SubmitKernel {order}{completion_str}{events_str}{kernel_exec_time_str}{self.cpu_count_str(separator=',')}" def description(self) -> str: order = "in-order" if self.ioq else "out-of-order" @@ -571,7 +575,7 @@ def range(self) -> tuple[float, float]: def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: iters = self.get_iters(run_trace) - bin_args = [ + return [ f"--iterations={iters}", f"--Ioq={self.ioq}", f"--MeasureCompletion={self.MeasureCompletion}", @@ -579,26 +583,8 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: f"--NumKernels={self.NumKernels}", f"--KernelExecTime={self.KernelExecTime}", f"--UseEvents={self.UseEvents}", + f"--profilerType={self.profiler_type.value}", ] - if self.runtime == RUNTIMES.SYCL: - bin_args.append(f"--profilerType={self.profiler_type.value}") - return bin_args - - def get_metadata(self) -> dict[str, BenchmarkMetadata]: - metadata_dict = super().get_metadata() - - # Create CPU count variant with modified display name and explicit_group - cpu_count_name = self.name() + " CPU count" - cpu_count_metadata = copy.deepcopy(metadata_dict[self.name()]) - cpu_count_display_name = self.display_name() + ", CPU count" - cpu_count_explicit_group = ( - self.explicit_group() + ", CPU count" if self.explicit_group() else "" - ) - cpu_count_metadata.display_name = cpu_count_display_name - cpu_count_metadata.explicit_group = cpu_count_explicit_group - metadata_dict[cpu_count_name] = cpu_count_metadata - - return metadata_dict class ExecImmediateCopyQueue(ComputeBenchmark): @@ -622,11 +608,11 @@ def __init__( def name(self): order = "in order" if self.ioq else "out of order" - return f"api_overhead_benchmark_sycl ExecImmediateCopyQueue {order} from {self.source} to {self.destination}, size {self.size}" + return f"api_overhead_benchmark_sycl ExecImmediateCopyQueue {order} from {self.source} to {self.destination}, size {self.size}{self.cpu_count_str()}" def display_name(self) -> str: order = "in order" if self.ioq else "out of order" - return f"SYCL ExecImmediateCopyQueue {order} from {self.source} to {self.destination}, size {self.size}" + return f"SYCL ExecImmediateCopyQueue {order} from {self.source} to {self.destination}, size {self.size}{self.cpu_count_str(separator=',')}" def description(self) -> str: order = "in-order" if self.ioq else "out-of-order" @@ -671,10 +657,10 @@ def __init__(self, bench, isCopyOnly, source, destination, size, profiler_type): ) def name(self): - return f"memory_benchmark_sycl QueueInOrderMemcpy from {self.source} to {self.destination}, size {self.size}" + return f"memory_benchmark_sycl QueueInOrderMemcpy from {self.source} to {self.destination}, size {self.size}{self.cpu_count_str()}" def display_name(self) -> str: - return f"SYCL QueueInOrderMemcpy from {self.source} to {self.destination}, size {self.size}" + return f"SYCL QueueInOrderMemcpy from {self.source} to {self.destination}, size {self.size}{self.cpu_count_str(separator=',')}" def description(self) -> str: operation = "copy-only" if self.isCopyOnly else "copy and command submission" @@ -713,10 +699,10 @@ def __init__(self, bench, source, destination, size, profiler_type): ) def name(self): - return f"memory_benchmark_sycl QueueMemcpy from {self.source} to {self.destination}, size {self.size}" + return f"memory_benchmark_sycl QueueMemcpy from {self.source} to {self.destination}, size {self.size}{self.cpu_count_str()}" def display_name(self) -> str: - return f"SYCL QueueMemcpy from {self.source} to {self.destination}, size {self.size}" + return f"SYCL QueueMemcpy from {self.source} to {self.destination}, size {self.size}{self.cpu_count_str(separator=',')}" def description(self) -> str: return ( @@ -974,6 +960,7 @@ def __init__( numKernels, measureCompletionTime, useEvents, + emulate_graphs, useHostTasks, profiler_type=PROFILERS.TIMER, ): @@ -982,6 +969,7 @@ def __init__( self.measureCompletionTime = measureCompletionTime self.useEvents = useEvents self.useHostTasks = useHostTasks + self.emulateGraphs = emulate_graphs self.ioq_str = "in order" if self.inOrderQueue else "out of order" self.measure_str = ( " with measure completion" if self.measureCompletionTime else "" @@ -1003,7 +991,7 @@ def supported_runtimes(self) -> list[RUNTIMES]: return super().supported_runtimes() + [RUNTIMES.SYCL_PREVIEW] def explicit_group(self): - return f"SubmitGraph {self.ioq_str}{self.measure_str}{self.use_events_str}{self.host_tasks_str}, {self.numKernels} kernels" + return f"SubmitGraph {self.ioq_str}{self.measure_str}{self.use_events_str}{self.host_tasks_str}, {self.numKernels} kernels{self.cpu_count_str(separator=',')}" def description(self) -> str: return ( @@ -1012,10 +1000,10 @@ def description(self) -> str: ) def name(self): - return f"graph_api_benchmark_{self.runtime.value} SubmitGraph{self.use_events_str}{self.host_tasks_str} numKernels:{self.numKernels} ioq {self.inOrderQueue} measureCompletion {self.measureCompletionTime}" + return f"graph_api_benchmark_{self.runtime.value} SubmitGraph{self.use_events_str}{self.host_tasks_str} numKernels:{self.numKernels} ioq {self.inOrderQueue} measureCompletion {self.measureCompletionTime}{self.cpu_count_str()}" def display_name(self) -> str: - return f"{self.runtime.value.upper()} SubmitGraph {self.ioq_str}{self.measure_str}{self.use_events_str}{self.host_tasks_str}, {self.numKernels} kernels" + return f"{self.runtime.value.upper()} SubmitGraph {self.ioq_str}{self.measure_str}{self.use_events_str}{self.host_tasks_str}, {self.numKernels} kernels{self.cpu_count_str(separator=',')}" def get_tags(self): return [ @@ -1028,7 +1016,7 @@ def get_tags(self): def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: iters = self.get_iters(run_trace) - bin_args = [ + return [ f"--iterations={iters}", f"--NumKernels={self.numKernels}", f"--MeasureCompletionTime={self.measureCompletionTime}", @@ -1038,26 +1026,9 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: f"--UseEvents={self.useEvents}", "--UseExplicit=0", f"--UseHostTasks={self.useHostTasks}", + f"--profilerType={self.profiler_type.value}", + f"--EmulateGraphs={self.emulateGraphs}", ] - if self.runtime == RUNTIMES.SYCL: - bin_args.append(f"--profilerType={self.profiler_type.value}") - return bin_args - - def get_metadata(self) -> dict[str, BenchmarkMetadata]: - metadata_dict = super().get_metadata() - - # Create CPU count variant with modified display name and explicit_group - cpu_count_name = self.name() + " CPU count" - cpu_count_metadata = copy.deepcopy(metadata_dict[self.name()]) - cpu_count_display_name = self.display_name() + ", CPU count" - cpu_count_explicit_group = ( - self.explicit_group() + ", CPU count" if self.explicit_group() else "" - ) - cpu_count_metadata.display_name = cpu_count_display_name - cpu_count_metadata.explicit_group = cpu_count_explicit_group - metadata_dict[cpu_count_name] = cpu_count_metadata - - return metadata_dict class UllsEmptyKernel(ComputeBenchmark): @@ -1081,32 +1052,28 @@ def supported_runtimes(self) -> list[RUNTIMES]: return [RUNTIMES.SYCL, RUNTIMES.LEVEL_ZERO] def explicit_group(self): - return f"EmptyKernel, wgc: {self.wgc}, wgs: {self.wgs}" + return f"EmptyKernel, wgc: {self.wgc}, wgs: {self.wgs}{self.cpu_count_str(separator=',')}" def description(self) -> str: return "" def name(self): - return f"ulls_benchmark_{self.runtime.value} EmptyKernel wgc:{self.wgc}, wgs:{self.wgs}" + return f"ulls_benchmark_{self.runtime.value} EmptyKernel wgc:{self.wgc}, wgs:{self.wgs}{self.cpu_count_str()}" def display_name(self) -> str: - return ( - f"{self.runtime.value.upper()} EmptyKernel, wgc {self.wgc}, wgs {self.wgs}" - ) + return f"{self.runtime.value.upper()} EmptyKernel, wgc {self.wgc}, wgs {self.wgs}{self.cpu_count_str(separator=',')}" def get_tags(self): return [runtime_to_tag_name(self.runtime), "micro", "latency", "submit"] def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: iters = self.get_iters(run_trace) - bin_args = [ + return [ f"--iterations={iters}", f"--wgs={self.wgs}", f"--wgc={self.wgc}", + f"--profilerType={self.profiler_type.value}", ] - if self.runtime == RUNTIMES.SYCL: - bin_args.append(f"--profilerType={self.profiler_type.value}") - return bin_args class UllsKernelSwitch(ComputeBenchmark):