diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py index 29f87024ddeaa..4a9623b62ca73 100644 --- a/devops/scripts/benchmarks/benches/compute.py +++ b/devops/scripts/benchmarks/benches/compute.py @@ -14,6 +14,7 @@ class RUNTIMES(Enum): + SYCL_PREVIEW = "syclpreview" SYCL = "sycl" LEVEL_ZERO = "l0" UR = "ur" @@ -21,6 +22,7 @@ class RUNTIMES(Enum): def runtime_to_name(runtime: RUNTIMES) -> str: return { + RUNTIMES.SYCL_PREVIEW: "SYCL Preview", RUNTIMES.SYCL: "SYCL", RUNTIMES.LEVEL_ZERO: "Level Zero", RUNTIMES.UR: "Unified Runtime", @@ -29,6 +31,7 @@ def runtime_to_name(runtime: RUNTIMES) -> str: def runtime_to_tag_name(runtime: RUNTIMES) -> str: return { + RUNTIMES.SYCL_PREVIEW: "SYCL", RUNTIMES.SYCL: "SYCL", RUNTIMES.LEVEL_ZERO: "L0", RUNTIMES.UR: "UR", @@ -46,7 +49,7 @@ def git_url(self) -> str: return "https://github.com/intel/compute-benchmarks.git" def git_hash(self) -> str: - return "420842fc3f0c01aac7b328bf192c25d3e7b9fd9e" + return "9c1ed6fd59a7a40f8829251df4b5c0d847591183" def setup(self): if options.sycl is None: @@ -107,10 +110,16 @@ def additionalMetadata(self) -> dict[str, BenchmarkMetadata]: ), } - def enabled_runtimes(self, supported_runtimes=None): + def enabled_runtimes(self, supported_runtimes=None, extra_runtimes=None): # all runtimes in the RUNTIMES enum runtimes = supported_runtimes or list(RUNTIMES) + # filter out SYCL_PREVIEW which is not supported by default in all benchmarks + runtimes = [r for r in runtimes if r != RUNTIMES.SYCL_PREVIEW] + + if extra_runtimes is not None: + runtimes.extend(extra_runtimes) + # Filter out UR if not available if options.ur is None: runtimes = [r for r in runtimes if r != RUNTIMES.UR] @@ -131,21 +140,17 @@ def benchmarks(self) -> list[Benchmark]: benches = [] # Add SubmitKernel benchmarks using loops - for runtime in self.enabled_runtimes(): + for runtime in self.enabled_runtimes(extra_runtimes=[RUNTIMES.SYCL_PREVIEW]): for in_order_queue in [0, 1]: for measure_completion in [0, 1]: - for enqueue_functions in [0, 1]: - # only SYCL backend supports enqueue functions - if enqueue_functions == 1 and runtime != RUNTIMES.SYCL: - continue - + for use_events in [0, 1]: benches.append( SubmitKernel( self, runtime, in_order_queue, measure_completion, - enqueue_functions, + use_events, ) ) @@ -305,13 +310,11 @@ def teardown(self): class SubmitKernel(ComputeBenchmark): - def __init__( - self, bench, runtime: RUNTIMES, ioq, MeasureCompletion=0, EnqueueFunctions=0 - ): + def __init__(self, bench, runtime: RUNTIMES, ioq, MeasureCompletion=0, UseEvents=0): self.ioq = ioq self.runtime = runtime self.MeasureCompletion = MeasureCompletion - self.EnqueueFunctions = EnqueueFunctions + self.UseEvents = UseEvents super().__init__( bench, f"api_overhead_benchmark_{runtime.value}", "SubmitKernel" ) @@ -322,25 +325,30 @@ def get_tags(self): def name(self): order = "in order" if self.ioq else "out of order" completion_str = " with measure completion" if self.MeasureCompletion else "" - enqueue_str = " using eventless SYCL enqueue" if self.EnqueueFunctions else "" - return f"api_overhead_benchmark_{self.runtime.value} SubmitKernel {order}{completion_str}{enqueue_str}" - def explicit_group(self): - # make eventless enqueue its own group, since only SYCL supports this mode - if self.EnqueueFunctions: - return "Submit Kernel using eventless SYCL enqueue" + # this needs to be inversed (i.e., using events is empty string) + # to match the existing already stored results + events_str = " not using events" if not self.UseEvents else "" + + return f"api_overhead_benchmark_{self.runtime.value} SubmitKernel {order}{completion_str}{events_str}" + def explicit_group(self): order = "In Order" if self.ioq else "Out Of Order" completion_str = " With Completion" if self.MeasureCompletion else "" - return f"SubmitKernel {order}{completion_str}" + + # this needs to be inversed (i.e., using events is empty string) + # to match the existing already stored results + events_str = " not using events" if not self.UseEvents else "" + + return f"SubmitKernel {order}{completion_str}{events_str}" def description(self) -> str: order = "in-order" if self.ioq else "out-of-order" runtime_name = runtime_to_name(self.runtime) - completion_desc = "" - if self.runtime == RUNTIMES.UR: - completion_desc = f", {'including' if self.MeasureCompletion else 'excluding'} kernel completion time" + completion_desc = completion_desc = ( + f", {'including' if self.MeasureCompletion else 'excluding'} kernel completion time" + ) return ( f"Measures CPU time overhead of submitting {order} kernels through {runtime_name} API{completion_desc}. " @@ -353,13 +361,12 @@ def range(self) -> tuple[float, float]: def bin_args(self) -> list[str]: return [ f"--Ioq={self.ioq}", - "--DiscardEvents=0", f"--MeasureCompletion={self.MeasureCompletion}", "--iterations=100000", "--Profiling=0", "--NumKernels=10", "--KernelExecTime=1", - f"--EnqueueFunctions={self.EnqueueFunctions}", + f"--UseEvents={self.UseEvents}", ] @@ -620,6 +627,9 @@ def bin_args(self) -> list[str]: ] +# TODO: once L0 SubmitGraph exists, this needs to be cleaned up split benchmarks into more groups, +# set all the parameters (NoEvents 0/1, which should get inverted into UseEvents) and +# unify the benchmark naming scheme with SubmitKernel. class GraphApiSubmitGraph(ComputeBenchmark): def __init__( self, bench, runtime: RUNTIMES, inOrderQueue, numKernels, measureCompletionTime @@ -659,6 +669,7 @@ def bin_args(self) -> list[str]: f"--InOrderQueue={self.inOrderQueue}", "--Profiling=0", "--KernelExecutionTime=1", + "--NoEvents=1", # not all implementations support NoEvents=0 ] diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py index 30dc607aa54a6..ae8ac16c264bf 100644 --- a/devops/scripts/benchmarks/history.py +++ b/devops/scripts/benchmarks/history.py @@ -60,7 +60,6 @@ def extract_timestamp(file_path: Path) -> str: self.runs = benchmark_runs def create_run(self, name: str, results: list[Result]) -> BenchmarkRun: - def git_info_from_path(path: Path) -> (str, str): """ Derives git repo, commit information from git repo located in path.