-
Notifications
You must be signed in to change notification settings - Fork 791
[Benchmarks] Use combo profiler in UR SubmitKernel scenarios #20295
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 11 commits
Commits
Show all changes
14 commits
Select commit
Hold shift + click to select a range
af11dfa
[Benchmarks] Use combo profiler in UR SubmitKernel scenarios
PatKamin 65d85c8
[Benchmarks] Measure cpu instructions optionally
PatKamin ee9bd83
More explicit 'CPU count' separator usage
PatKamin 27da24e
Apply review comments changes
PatKamin d36416c
Remove previous way of naming in Result
PatKamin 7255b63
Merge remote-tracking branch 'upstream/sycl' into cpu-count-benches
PatKamin 4ed0f63
Remove unused parse_unit_type() method
PatKamin c7b1b2b
Revert adding profiler-type option
PatKamin 74552d9
Add EmulateGraph parameter to SubmitGraph benches
PatKamin 7f416b5
Merge remote-tracking branch 'upstream/sycl' into cpu-count-benches
PatKamin 1aee5e0
Don't create new names
PatKamin d5abaea
Add comment on emulated graphs
PatKamin 08a5914
Add cpu counter runs for syclpreview variant of SubmitKernel benchmarks
PatKamin 832a7ec
Merge remote-tracking branch 'upstream/sycl' into cpu-count-benches
PatKamin File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -61,8 +61,8 @@ def git_url(self) -> str: | |
return "https://github.com/intel/compute-benchmarks.git" | ||
|
||
def git_hash(self) -> str: | ||
# Sep 25, 2025 | ||
return "7ba2e629404e34c635a46f28550a0952717d120f" | ||
# Oct 9, 2025 | ||
return "32805b4b6f8dafb4a97f21c4c85bb2f6963f8dbb" | ||
|
||
def setup(self) -> None: | ||
if options.sycl is None: | ||
|
@@ -152,7 +152,7 @@ def benchmarks(self) -> list[Benchmark]: | |
kernel_exec_time, | ||
) | ||
) | ||
if runtime == RUNTIMES.SYCL: | ||
if runtime in (RUNTIMES.SYCL, RUNTIMES.UR): | ||
# Create CPU count variant | ||
benches.append( | ||
SubmitKernel( | ||
|
@@ -203,6 +203,9 @@ def benchmarks(self) -> list[Benchmark]: | |
measure_completion_time, | ||
use_events, | ||
) in submit_graph_params: | ||
emulate_graphs = ( | ||
lslusarczyk marked this conversation as resolved.
Show resolved
Hide resolved
|
||
0 if runtime in (RUNTIMES.SYCL, RUNTIMES.SYCL_PREVIEW) else 1 | ||
) | ||
benches.append( | ||
GraphApiSubmitGraph( | ||
self, | ||
|
@@ -211,6 +214,7 @@ def benchmarks(self) -> list[Benchmark]: | |
num_kernels, | ||
measure_completion_time, | ||
use_events, | ||
emulate_graphs, | ||
useHostTasks=0, | ||
) | ||
) | ||
|
@@ -224,6 +228,7 @@ def benchmarks(self) -> list[Benchmark]: | |
num_kernels, | ||
measure_completion_time, | ||
use_events, | ||
emulate_graphs, | ||
useHostTasks=0, | ||
profiler_type=PROFILERS.CPU_COUNTER, | ||
) | ||
|
@@ -294,14 +299,6 @@ def benchmarks(self) -> list[Benchmark]: | |
return benches | ||
|
||
|
||
def parse_unit_type(compute_unit): | ||
if "[count]" in compute_unit: | ||
return "instr" | ||
elif "[us]" in compute_unit: | ||
return "μs" | ||
return compute_unit.replace("[", "").replace("]", "") | ||
|
||
|
||
class ComputeBenchmark(Benchmark): | ||
def __init__( | ||
self, | ||
|
@@ -330,6 +327,17 @@ def benchmark_bin(self) -> Path: | |
"""Returns the path to the benchmark binary""" | ||
return self.bench.project.build_dir / "bin" / self.bench_name | ||
|
||
def cpu_count_str(self, separator: str = "") -> str: | ||
# Note: SYCL CI currently parses for on this "CPU count" value. | ||
|
||
# Please update /devops/scripts/benchmarks/compare.py if this value | ||
# is changed. See compare.py usage (w.r.t. --regression-filter) in | ||
# /devops/actions/run-tests/benchmarks/action.yml. | ||
return ( | ||
f"{separator} CPU count" | ||
if self.profiler_type == PROFILERS.CPU_COUNTER | ||
else "" | ||
) | ||
|
||
def get_iters(self, run_trace: TracingType): | ||
"""Returns the number of iterations to run for the given tracing type.""" | ||
return ( | ||
|
@@ -412,27 +420,23 @@ def run( | |
) | ||
parsed_results = self.parse_output(result) | ||
ret = [] | ||
for label, median, stddev, unit in parsed_results: | ||
extra_label = " CPU count" if parse_unit_type(unit) == "instr" else "" | ||
# Note: SYCL CI currently parses for on this "CPU count" value. | ||
# Please update /devops/scripts/benchmarks/compare.py if this value | ||
# is changed. See compare.py usage (w.r.t. --regression-filter) in | ||
# /devops/actions/run-tests/benchmarks/action.yml. | ||
for median, stddev in parsed_results: | ||
unit = "instr" if self.profiler_type == PROFILERS.CPU_COUNTER else "μs" | ||
ret.append( | ||
Result( | ||
label=self.name() + extra_label, | ||
label=self.name(), | ||
value=median, | ||
stddev=stddev, | ||
command=command, | ||
env=env_vars, | ||
lslusarczyk marked this conversation as resolved.
Show resolved
Hide resolved
|
||
unit=parse_unit_type(unit), | ||
unit=unit, | ||
git_url=self.bench.git_url(), | ||
git_hash=self.bench.git_hash(), | ||
) | ||
) | ||
return ret | ||
|
||
def parse_output(self, output): | ||
def parse_output(self, output: str) -> list[tuple[float, float]]: | ||
csv_file = io.StringIO(output) | ||
reader = csv.reader(csv_file) | ||
next(reader, None) | ||
|
@@ -442,16 +446,14 @@ def parse_output(self, output): | |
if data_row is None: | ||
break | ||
try: | ||
label = data_row[0] | ||
mean = float(data_row[1]) | ||
median = float(data_row[2]) | ||
# compute benchmarks report stddev as % | ||
stddev = mean * (float(data_row[3].strip("%")) / 100.0) | ||
if not math.isfinite(stddev): | ||
stddev = 0.0 # Default to 0.0 if stddev is invalid | ||
|
||
unit = data_row[7] | ||
results.append((label, median, stddev, unit)) | ||
results.append((median, stddev)) | ||
except (ValueError, IndexError) as e: | ||
raise ValueError(f"Error parsing output: {e}") | ||
if len(results) == 0: | ||
|
@@ -532,7 +534,7 @@ def name(self): | |
f" KernelExecTime={self.KernelExecTime}" if self.KernelExecTime != 1 else "" | ||
) | ||
|
||
return f"api_overhead_benchmark_{self.runtime.value} SubmitKernel {order}{completion_str}{events_str}{kernel_exec_time_str}" | ||
return f"api_overhead_benchmark_{self.runtime.value} SubmitKernel {order}{completion_str}{events_str}{kernel_exec_time_str}{self.cpu_count_str()}" | ||
|
||
def display_name(self) -> str: | ||
order = "in order" if self.ioq else "out of order" | ||
|
@@ -544,7 +546,7 @@ def display_name(self) -> str: | |
if self.KernelExecTime != 1: | ||
info.append(f"KernelExecTime={self.KernelExecTime}") | ||
additional_info = f" {' '.join(info)}" if info else "" | ||
return f"{self.runtime.value.upper()} SubmitKernel {order}{additional_info}, NumKernels {self.NumKernels}" | ||
return f"{self.runtime.value.upper()} SubmitKernel {order}{additional_info}, NumKernels {self.NumKernels}{self.cpu_count_str(separator=',')}" | ||
|
||
def explicit_group(self): | ||
order = "in order" if self.ioq else "out of order" | ||
|
@@ -553,7 +555,7 @@ def explicit_group(self): | |
|
||
kernel_exec_time_str = f" long kernel" if self.KernelExecTime != 1 else "" | ||
|
||
return f"SubmitKernel {order}{completion_str}{events_str}{kernel_exec_time_str}" | ||
return f"SubmitKernel {order}{completion_str}{events_str}{kernel_exec_time_str}{self.cpu_count_str(separator=',')}" | ||
|
||
def description(self) -> str: | ||
order = "in-order" if self.ioq else "out-of-order" | ||
|
@@ -571,34 +573,16 @@ def range(self) -> tuple[float, float]: | |
|
||
def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: | ||
iters = self.get_iters(run_trace) | ||
bin_args = [ | ||
return [ | ||
f"--iterations={iters}", | ||
f"--Ioq={self.ioq}", | ||
f"--MeasureCompletion={self.MeasureCompletion}", | ||
"--Profiling=0", | ||
f"--NumKernels={self.NumKernels}", | ||
f"--KernelExecTime={self.KernelExecTime}", | ||
f"--UseEvents={self.UseEvents}", | ||
f"--profilerType={self.profiler_type.value}", | ||
] | ||
if self.runtime == RUNTIMES.SYCL: | ||
bin_args.append(f"--profilerType={self.profiler_type.value}") | ||
return bin_args | ||
|
||
def get_metadata(self) -> dict[str, BenchmarkMetadata]: | ||
metadata_dict = super().get_metadata() | ||
|
||
# Create CPU count variant with modified display name and explicit_group | ||
cpu_count_name = self.name() + " CPU count" | ||
cpu_count_metadata = copy.deepcopy(metadata_dict[self.name()]) | ||
cpu_count_display_name = self.display_name() + ", CPU count" | ||
cpu_count_explicit_group = ( | ||
self.explicit_group() + ", CPU count" if self.explicit_group() else "" | ||
) | ||
cpu_count_metadata.display_name = cpu_count_display_name | ||
cpu_count_metadata.explicit_group = cpu_count_explicit_group | ||
metadata_dict[cpu_count_name] = cpu_count_metadata | ||
|
||
return metadata_dict | ||
|
||
|
||
class ExecImmediateCopyQueue(ComputeBenchmark): | ||
|
@@ -622,11 +606,11 @@ def __init__( | |
|
||
def name(self): | ||
order = "in order" if self.ioq else "out of order" | ||
return f"api_overhead_benchmark_sycl ExecImmediateCopyQueue {order} from {self.source} to {self.destination}, size {self.size}" | ||
return f"api_overhead_benchmark_sycl ExecImmediateCopyQueue {order} from {self.source} to {self.destination}, size {self.size}{self.cpu_count_str()}" | ||
lslusarczyk marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
def display_name(self) -> str: | ||
order = "in order" if self.ioq else "out of order" | ||
lslusarczyk marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return f"SYCL ExecImmediateCopyQueue {order} from {self.source} to {self.destination}, size {self.size}" | ||
return f"SYCL ExecImmediateCopyQueue {order} from {self.source} to {self.destination}, size {self.size}{self.cpu_count_str(separator=',')}" | ||
|
||
def description(self) -> str: | ||
order = "in-order" if self.ioq else "out-of-order" | ||
|
@@ -671,10 +655,10 @@ def __init__(self, bench, isCopyOnly, source, destination, size, profiler_type): | |
) | ||
|
||
def name(self): | ||
return f"memory_benchmark_sycl QueueInOrderMemcpy from {self.source} to {self.destination}, size {self.size}" | ||
return f"memory_benchmark_sycl QueueInOrderMemcpy from {self.source} to {self.destination}, size {self.size}{self.cpu_count_str()}" | ||
|
||
def display_name(self) -> str: | ||
return f"SYCL QueueInOrderMemcpy from {self.source} to {self.destination}, size {self.size}" | ||
return f"SYCL QueueInOrderMemcpy from {self.source} to {self.destination}, size {self.size}{self.cpu_count_str(separator=',')}" | ||
|
||
def description(self) -> str: | ||
operation = "copy-only" if self.isCopyOnly else "copy and command submission" | ||
|
@@ -713,10 +697,10 @@ def __init__(self, bench, source, destination, size, profiler_type): | |
) | ||
|
||
def name(self): | ||
return f"memory_benchmark_sycl QueueMemcpy from {self.source} to {self.destination}, size {self.size}" | ||
return f"memory_benchmark_sycl QueueMemcpy from {self.source} to {self.destination}, size {self.size}{self.cpu_count_str()}" | ||
|
||
def display_name(self) -> str: | ||
return f"SYCL QueueMemcpy from {self.source} to {self.destination}, size {self.size}" | ||
return f"SYCL QueueMemcpy from {self.source} to {self.destination}, size {self.size}{self.cpu_count_str(separator=',')}" | ||
|
||
def description(self) -> str: | ||
return ( | ||
|
@@ -974,6 +958,7 @@ def __init__( | |
numKernels, | ||
measureCompletionTime, | ||
useEvents, | ||
emulate_graphs, | ||
useHostTasks, | ||
profiler_type=PROFILERS.TIMER, | ||
): | ||
|
@@ -982,6 +967,7 @@ def __init__( | |
self.measureCompletionTime = measureCompletionTime | ||
self.useEvents = useEvents | ||
self.useHostTasks = useHostTasks | ||
self.emulateGraphs = emulate_graphs | ||
self.ioq_str = "in order" if self.inOrderQueue else "out of order" | ||
self.measure_str = ( | ||
" with measure completion" if self.measureCompletionTime else "" | ||
|
@@ -1003,7 +989,7 @@ def supported_runtimes(self) -> list[RUNTIMES]: | |
return super().supported_runtimes() + [RUNTIMES.SYCL_PREVIEW] | ||
|
||
def explicit_group(self): | ||
return f"SubmitGraph {self.ioq_str}{self.measure_str}{self.use_events_str}{self.host_tasks_str}, {self.numKernels} kernels" | ||
return f"SubmitGraph {self.ioq_str}{self.measure_str}{self.use_events_str}{self.host_tasks_str}, {self.numKernels} kernels{self.cpu_count_str(separator=',')}" | ||
|
||
def description(self) -> str: | ||
return ( | ||
|
@@ -1012,10 +998,10 @@ def description(self) -> str: | |
) | ||
|
||
def name(self): | ||
return f"graph_api_benchmark_{self.runtime.value} SubmitGraph{self.use_events_str}{self.host_tasks_str} numKernels:{self.numKernels} ioq {self.inOrderQueue} measureCompletion {self.measureCompletionTime}" | ||
return f"graph_api_benchmark_{self.runtime.value} SubmitGraph{self.use_events_str}{self.host_tasks_str} numKernels:{self.numKernels} ioq {self.inOrderQueue} measureCompletion {self.measureCompletionTime}{self.cpu_count_str()}" | ||
|
||
def display_name(self) -> str: | ||
return f"{self.runtime.value.upper()} SubmitGraph {self.ioq_str}{self.measure_str}{self.use_events_str}{self.host_tasks_str}, {self.numKernels} kernels" | ||
return f"{self.runtime.value.upper()} SubmitGraph {self.ioq_str}{self.measure_str}{self.use_events_str}{self.host_tasks_str}, {self.numKernels} kernels{self.cpu_count_str(separator=',')}" | ||
|
||
def get_tags(self): | ||
return [ | ||
|
@@ -1028,7 +1014,7 @@ def get_tags(self): | |
|
||
def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: | ||
iters = self.get_iters(run_trace) | ||
bin_args = [ | ||
return [ | ||
f"--iterations={iters}", | ||
f"--NumKernels={self.numKernels}", | ||
f"--MeasureCompletionTime={self.measureCompletionTime}", | ||
|
@@ -1038,26 +1024,9 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: | |
f"--UseEvents={self.useEvents}", | ||
"--UseExplicit=0", | ||
f"--UseHostTasks={self.useHostTasks}", | ||
f"--profilerType={self.profiler_type.value}", | ||
f"--EmulateGraphs={self.emulateGraphs}", | ||
] | ||
if self.runtime == RUNTIMES.SYCL: | ||
bin_args.append(f"--profilerType={self.profiler_type.value}") | ||
return bin_args | ||
|
||
def get_metadata(self) -> dict[str, BenchmarkMetadata]: | ||
metadata_dict = super().get_metadata() | ||
|
||
# Create CPU count variant with modified display name and explicit_group | ||
cpu_count_name = self.name() + " CPU count" | ||
cpu_count_metadata = copy.deepcopy(metadata_dict[self.name()]) | ||
cpu_count_display_name = self.display_name() + ", CPU count" | ||
cpu_count_explicit_group = ( | ||
self.explicit_group() + ", CPU count" if self.explicit_group() else "" | ||
) | ||
cpu_count_metadata.display_name = cpu_count_display_name | ||
cpu_count_metadata.explicit_group = cpu_count_explicit_group | ||
metadata_dict[cpu_count_name] = cpu_count_metadata | ||
|
||
return metadata_dict | ||
|
||
|
||
class UllsEmptyKernel(ComputeBenchmark): | ||
|
@@ -1081,32 +1050,28 @@ def supported_runtimes(self) -> list[RUNTIMES]: | |
return [RUNTIMES.SYCL, RUNTIMES.LEVEL_ZERO] | ||
|
||
def explicit_group(self): | ||
return f"EmptyKernel, wgc: {self.wgc}, wgs: {self.wgs}" | ||
return f"EmptyKernel, wgc: {self.wgc}, wgs: {self.wgs}{self.cpu_count_str(separator=',')}" | ||
|
||
def description(self) -> str: | ||
return "" | ||
|
||
def name(self): | ||
return f"ulls_benchmark_{self.runtime.value} EmptyKernel wgc:{self.wgc}, wgs:{self.wgs}" | ||
return f"ulls_benchmark_{self.runtime.value} EmptyKernel wgc:{self.wgc}, wgs:{self.wgs}{self.cpu_count_str()}" | ||
|
||
def display_name(self) -> str: | ||
return ( | ||
f"{self.runtime.value.upper()} EmptyKernel, wgc {self.wgc}, wgs {self.wgs}" | ||
) | ||
return f"{self.runtime.value.upper()} EmptyKernel, wgc {self.wgc}, wgs {self.wgs}{self.cpu_count_str(separator=',')}" | ||
|
||
def get_tags(self): | ||
return [runtime_to_tag_name(self.runtime), "micro", "latency", "submit"] | ||
|
||
def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: | ||
iters = self.get_iters(run_trace) | ||
bin_args = [ | ||
return [ | ||
f"--iterations={iters}", | ||
f"--wgs={self.wgs}", | ||
f"--wgc={self.wgc}", | ||
f"--profilerType={self.profiler_type.value}", | ||
] | ||
if self.runtime == RUNTIMES.SYCL: | ||
bin_args.append(f"--profilerType={self.profiler_type.value}") | ||
return bin_args | ||
|
||
|
||
class UllsKernelSwitch(ComputeBenchmark): | ||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.