Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 31 additions & 15 deletions devops/scripts/benchmarks/benches/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,20 @@ def benchmarks(self) -> list[Benchmark]:
for runtime in self.enabled_runtimes():
for in_order_queue in [0, 1]:
for measure_completion in [0, 1]:
benches.append(
SubmitKernel(self, runtime, in_order_queue, measure_completion)
)
for enqueue_functions in [0, 1]:
# only SYCL backend supports enqueue functions
if enqueue_functions == 1 and runtime != RUNTIMES.SYCL:
continue

benches.append(
SubmitKernel(
self,
runtime,
in_order_queue,
measure_completion,
enqueue_functions,
)
)

# Add SinKernelGraph benchmarks
for runtime in self.enabled_runtimes():
Expand Down Expand Up @@ -278,10 +289,13 @@ def teardown(self):


class SubmitKernel(ComputeBenchmark):
def __init__(self, bench, runtime: RUNTIMES, ioq, measure_completion=0):
def __init__(
self, bench, runtime: RUNTIMES, ioq, MeasureCompletion=0, EnqueueFunctions=0
):
self.ioq = ioq
self.runtime = runtime
self.measure_completion = measure_completion
self.MeasureCompletion = MeasureCompletion
self.EnqueueFunctions = EnqueueFunctions
super().__init__(
bench, f"api_overhead_benchmark_{runtime.value}", "SubmitKernel"
)
Expand All @@ -291,12 +305,17 @@ def get_tags(self):

def name(self):
order = "in order" if self.ioq else "out of order"
completion_str = " with measure completion" if self.measure_completion else ""
return f"api_overhead_benchmark_{self.runtime.value} SubmitKernel {order}{completion_str}"
completion_str = " with measure completion" if self.MeasureCompletion else ""
enqueue_str = " using eventless SYCL enqueue" if self.EnqueueFunctions else ""
return f"api_overhead_benchmark_{self.runtime.value} SubmitKernel {order}{completion_str}{enqueue_str}"

def explicit_group(self):
# make eventless enqueue its own group, since only SYCL supports this mode
if self.EnqueueFunctions:
return "Submit Kernel using eventless SYCL enqueue"

order = "In Order" if self.ioq else "Out Of Order"
completion_str = " With Completion" if self.measure_completion else ""
completion_str = " With Completion" if self.MeasureCompletion else ""
return f"SubmitKernel {order}{completion_str}"

def description(self) -> str:
Expand All @@ -305,15 +324,11 @@ def description(self) -> str:

completion_desc = ""
if self.runtime == RUNTIMES.UR:
completion_desc = f", {'including' if self.measure_completion else 'excluding'} kernel completion time"

l0_specific = ""
if self.runtime == RUNTIMES.LEVEL_ZERO:
l0_specific = " Uses immediate command lists"
completion_desc = f", {'including' if self.MeasureCompletion else 'excluding'} kernel completion time"

return (
f"Measures CPU time overhead of submitting {order} kernels through {runtime_name} API{completion_desc}. "
f"Runs 10 simple kernels with minimal execution time to isolate API overhead from kernel execution time. {l0_specific}"
f"Runs 10 simple kernels with minimal execution time to isolate API overhead from kernel execution time."
)

def range(self) -> tuple[float, float]:
Expand All @@ -323,11 +338,12 @@ def bin_args(self) -> list[str]:
return [
f"--Ioq={self.ioq}",
"--DiscardEvents=0",
f"--MeasureCompletion={self.measure_completion}",
f"--MeasureCompletion={self.MeasureCompletion}",
"--iterations=100000",
"--Profiling=0",
"--NumKernels=10",
"--KernelExecTime=1",
f"--EnqueueFunctions={self.EnqueueFunctions}",
]


Expand Down