diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py index a7884c0bf64dc..1862d042fda84 100644 --- a/devops/scripts/benchmarks/benches/compute.py +++ b/devops/scripts/benchmarks/benches/compute.py @@ -127,9 +127,20 @@ def benchmarks(self) -> list[Benchmark]: for runtime in self.enabled_runtimes(): for in_order_queue in [0, 1]: for measure_completion in [0, 1]: - benches.append( - SubmitKernel(self, runtime, in_order_queue, measure_completion) - ) + for enqueue_functions in [0, 1]: + # only SYCL backend supports enqueue functions + if enqueue_functions == 1 and runtime != RUNTIMES.SYCL: + continue + + benches.append( + SubmitKernel( + self, + runtime, + in_order_queue, + measure_completion, + enqueue_functions, + ) + ) # Add SinKernelGraph benchmarks for runtime in self.enabled_runtimes(): @@ -278,10 +289,13 @@ def teardown(self): class SubmitKernel(ComputeBenchmark): - def __init__(self, bench, runtime: RUNTIMES, ioq, measure_completion=0): + def __init__( + self, bench, runtime: RUNTIMES, ioq, MeasureCompletion=0, EnqueueFunctions=0 + ): self.ioq = ioq self.runtime = runtime - self.measure_completion = measure_completion + self.MeasureCompletion = MeasureCompletion + self.EnqueueFunctions = EnqueueFunctions super().__init__( bench, f"api_overhead_benchmark_{runtime.value}", "SubmitKernel" ) @@ -291,12 +305,17 @@ def get_tags(self): def name(self): order = "in order" if self.ioq else "out of order" - completion_str = " with measure completion" if self.measure_completion else "" - return f"api_overhead_benchmark_{self.runtime.value} SubmitKernel {order}{completion_str}" + completion_str = " with measure completion" if self.MeasureCompletion else "" + enqueue_str = " using eventless SYCL enqueue" if self.EnqueueFunctions else "" + return f"api_overhead_benchmark_{self.runtime.value} SubmitKernel {order}{completion_str}{enqueue_str}" def explicit_group(self): + # make eventless enqueue its own group, since only SYCL supports this mode + if self.EnqueueFunctions: + return "Submit Kernel using eventless SYCL enqueue" + order = "In Order" if self.ioq else "Out Of Order" - completion_str = " With Completion" if self.measure_completion else "" + completion_str = " With Completion" if self.MeasureCompletion else "" return f"SubmitKernel {order}{completion_str}" def description(self) -> str: @@ -305,15 +324,11 @@ def description(self) -> str: completion_desc = "" if self.runtime == RUNTIMES.UR: - completion_desc = f", {'including' if self.measure_completion else 'excluding'} kernel completion time" - - l0_specific = "" - if self.runtime == RUNTIMES.LEVEL_ZERO: - l0_specific = " Uses immediate command lists" + completion_desc = f", {'including' if self.MeasureCompletion else 'excluding'} kernel completion time" return ( f"Measures CPU time overhead of submitting {order} kernels through {runtime_name} API{completion_desc}. " - f"Runs 10 simple kernels with minimal execution time to isolate API overhead from kernel execution time. {l0_specific}" + f"Runs 10 simple kernels with minimal execution time to isolate API overhead from kernel execution time." ) def range(self) -> tuple[float, float]: @@ -323,11 +338,12 @@ def bin_args(self) -> list[str]: return [ f"--Ioq={self.ioq}", "--DiscardEvents=0", - f"--MeasureCompletion={self.measure_completion}", + f"--MeasureCompletion={self.MeasureCompletion}", "--iterations=100000", "--Profiling=0", "--NumKernels=10", "--KernelExecTime=1", + f"--EnqueueFunctions={self.EnqueueFunctions}", ]