33# See LICENSE.TXT
44# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
55
6+ from itertools import product
67import os
78import csv
89import io
@@ -24,6 +25,11 @@ class RUNTIMES(Enum):
2425 UR = "ur"
2526
2627
28+ class PROFILERS (Enum ):
29+ TIMER = "timer"
30+ CPU_COUNTER = "cpuCounter"
31+
32+
2733def runtime_to_name (runtime : RUNTIMES ) -> str :
2834 return {
2935 RUNTIMES .SYCL_PREVIEW : "SYCL Preview" ,
@@ -171,69 +177,116 @@ def benchmarks(self) -> list[Benchmark]:
171177
172178 # hand-picked value so that total execution time of the benchmark is
173179 # similar on all architectures
174- long_lernel_exec_time_ioq = [20 ]
180+ long_kernel_exec_time_ioq = [20 ]
175181 # For BMG server, a new value 200 is used, but we have to create metadata
176182 # for both values to keep the dashboard consistent.
177183 # See SubmitKernel.enabled()
178184 long_kernel_exec_time_ooo = [20 , 200 ]
179185
180- # The Combo Profiler is available only for selected sycl benchmarks
181- profiler_types = [ "timer" , "cpuCounter" ]
182-
183- for runtime in list ( RUNTIMES ):
184- # Add SubmitKernel benchmarks using loops
185- for in_order_queue in [ 0 , 1 ]:
186- for measure_completion in [ 0 , 1 ]:
187- for use_events in [ 0 , 1 ]:
188- long_kernel_exec_time = (
189- long_lernel_exec_time_ioq
190- if in_order_queue
191- else long_kernel_exec_time_ooo
192- )
193- for kernel_exec_time in [ 1 , * long_kernel_exec_time ]:
194- for profiler_type in profiler_types :
195- benches . append (
196- SubmitKernel (
197- self ,
198- runtime ,
199- in_order_queue ,
200- measure_completion ,
201- use_events ,
202- kernel_exec_time ,
203- profiler_type ,
204- )
205- )
206-
207- # Add SinKernelGraph benchmarks
208- for with_graphs in [ 0 , 1 ] :
209- for num_kernels in [ 5 , 100 ]:
186+ submit_kernel_params = product (
187+ list ( RUNTIMES ),
188+ [ 0 , 1 ], # in_order_queue
189+ [ 0 , 1 ], # measure_completion
190+ [ 0 , 1 ], # use_events
191+ )
192+ for (
193+ runtime ,
194+ in_order_queue ,
195+ measure_completion ,
196+ use_events ,
197+ ) in submit_kernel_params :
198+ long_kernel_exec_time = (
199+ long_kernel_exec_time_ioq
200+ if in_order_queue
201+ else long_kernel_exec_time_ooo
202+ )
203+ for kernel_exec_time in [ 1 , * long_kernel_exec_time ]:
204+ benches . append (
205+ SubmitKernel (
206+ self ,
207+ runtime ,
208+ in_order_queue ,
209+ measure_completion ,
210+ use_events ,
211+ kernel_exec_time ,
212+ )
213+ )
214+ if runtime == RUNTIMES . SYCL :
215+ # Create CPU count variant
210216 benches .append (
211- GraphApiSinKernelGraph (self , runtime , with_graphs , num_kernels )
217+ SubmitKernel (
218+ self ,
219+ runtime ,
220+ in_order_queue ,
221+ measure_completion ,
222+ use_events ,
223+ kernel_exec_time ,
224+ profiler_type = PROFILERS .CPU_COUNTER ,
225+ )
212226 )
213227
228+ # Add SinKernelGraph benchmarks
229+ sin_kernel_graph_params = product (
230+ list (RUNTIMES ),
231+ [0 , 1 ], # with_graphs
232+ [5 , 100 ], # num_kernels
233+ )
234+ for runtime , with_graphs , num_kernels in sin_kernel_graph_params :
235+ benches .append (
236+ GraphApiSinKernelGraph (self , runtime , with_graphs , num_kernels )
237+ )
238+
214239 # Add ULLS benchmarks
215- for profiler_type in profiler_types :
216- benches .append (UllsEmptyKernel (self , runtime , 1000 , 256 , profiler_type ))
240+ for runtime in list (RUNTIMES ):
241+ if runtime == RUNTIMES .SYCL :
242+ benches .append (
243+ UllsEmptyKernel (
244+ self , runtime , 1000 , 256 , profiler_type = PROFILERS .CPU_COUNTER
245+ )
246+ )
247+ benches .append (UllsEmptyKernel (self , runtime , 1000 , 256 ))
217248 benches .append (UllsKernelSwitch (self , runtime , 8 , 200 , 0 , 0 , 1 , 1 ))
218249
219- # Add GraphApiSubmitGraph benchmarks
220- for in_order_queue in [0 , 1 ]:
221- for num_kernels in self .submit_graph_num_kernels :
222- for measure_completion_time in [0 , 1 ]:
223- for use_events in [0 , 1 ]:
224- for profiler_type in profiler_types :
225- benches .append (
226- GraphApiSubmitGraph (
227- self ,
228- runtime ,
229- in_order_queue ,
230- num_kernels ,
231- measure_completion_time ,
232- profiler_type ,
233- use_events ,
234- useHostTasks = 0 ,
235- )
236- )
250+ # Add GraphApiSubmitGraph benchmarks
251+ submit_graph_params = product (
252+ list (RUNTIMES ),
253+ [0 , 1 ], # in_order_queue
254+ self .submit_graph_num_kernels ,
255+ [0 , 1 ], # measure_completion_time
256+ [0 , 1 ], # use_events
257+ )
258+ for (
259+ runtime ,
260+ in_order_queue ,
261+ num_kernels ,
262+ measure_completion_time ,
263+ use_events ,
264+ ) in submit_graph_params :
265+ benches .append (
266+ GraphApiSubmitGraph (
267+ self ,
268+ runtime ,
269+ in_order_queue ,
270+ num_kernels ,
271+ measure_completion_time ,
272+ use_events ,
273+ useHostTasks = 0 ,
274+ )
275+ )
276+ if runtime == RUNTIMES .SYCL :
277+ # Create CPU count variant
278+ benches .append (
279+ GraphApiSubmitGraph (
280+ self ,
281+ runtime ,
282+ in_order_queue ,
283+ num_kernels ,
284+ measure_completion_time ,
285+ use_events ,
286+ useHostTasks = 0 ,
287+ profiler_type = PROFILERS .CPU_COUNTER ,
288+ )
289+ )
237290
238291 # Add other benchmarks
239292 benches += [
@@ -244,7 +297,7 @@ def benchmarks(self) -> list[Benchmark]:
244297 GraphApiFinalizeGraph (self , RUNTIMES .SYCL , 0 , "Llama" ),
245298 GraphApiFinalizeGraph (self , RUNTIMES .SYCL , 1 , "Llama" ),
246299 ]
247- for profiler_type in profiler_types :
300+ for profiler_type in list ( PROFILERS ) :
248301 benches .append (
249302 QueueInOrderMemcpy (self , 0 , "Device" , "Device" , 1024 , profiler_type )
250303 )
@@ -310,7 +363,12 @@ def parse_unit_type(compute_unit):
310363
311364class ComputeBenchmark (Benchmark ):
312365 def __init__ (
313- self , bench , name , test , runtime : RUNTIMES = None , profiler_type : str = ""
366+ self ,
367+ bench ,
368+ name ,
369+ test ,
370+ runtime : RUNTIMES = None ,
371+ profiler_type : PROFILERS = PROFILERS .TIMER ,
314372 ):
315373 super ().__init__ (bench .directory , bench )
316374 self .bench = bench
@@ -478,7 +536,7 @@ def __init__(
478536 MeasureCompletion = 0 ,
479537 UseEvents = 0 ,
480538 KernelExecTime = 1 ,
481- profiler_type = "" ,
539+ profiler_type = PROFILERS . TIMER ,
482540 ):
483541 self .ioq = ioq
484542 self .MeasureCompletion = MeasureCompletion
@@ -578,7 +636,7 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
578636 f"--UseEvents={ self .UseEvents } " ,
579637 ]
580638 if self .runtime == RUNTIMES .SYCL :
581- bin_args .append (f"--profilerType={ self .profiler_type } " )
639+ bin_args .append (f"--profilerType={ self .profiler_type . value } " )
582640 return bin_args
583641
584642 def get_metadata (self ) -> dict [str , BenchmarkMetadata ]:
@@ -647,7 +705,7 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
647705 f"--dst={ self .destination } " ,
648706 f"--size={ self .size } " ,
649707 "--withCopyOffload=0" ,
650- f"--profilerType={ self .profiler_type } " ,
708+ f"--profilerType={ self .profiler_type . value } " ,
651709 ]
652710
653711
@@ -693,7 +751,7 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
693751 f"--size={ self .size } " ,
694752 "--count=100" ,
695753 "--withCopyOffload=0" ,
696- f"--profilerType={ self .profiler_type } " ,
754+ f"--profilerType={ self .profiler_type . value } " ,
697755 ]
698756
699757
@@ -731,7 +789,7 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
731789 f"--sourcePlacement={ self .source } " ,
732790 f"--destinationPlacement={ self .destination } " ,
733791 f"--size={ self .size } " ,
734- f"--profilerType={ self .profiler_type } " ,
792+ f"--profilerType={ self .profiler_type . value } " ,
735793 ]
736794
737795
@@ -970,9 +1028,9 @@ def __init__(
9701028 inOrderQueue ,
9711029 numKernels ,
9721030 measureCompletionTime ,
973- profiler_type ,
9741031 useEvents ,
9751032 useHostTasks ,
1033+ profiler_type = PROFILERS .TIMER ,
9761034 ):
9771035 self .inOrderQueue = inOrderQueue
9781036 self .numKernels = numKernels
@@ -1037,12 +1095,14 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
10371095 f"--UseHostTasks={ self .useHostTasks } " ,
10381096 ]
10391097 if self .runtime == RUNTIMES .SYCL :
1040- bin_args .append (f"--profilerType={ self .profiler_type } " )
1098+ bin_args .append (f"--profilerType={ self .profiler_type . value } " )
10411099 return bin_args
10421100
10431101
10441102class UllsEmptyKernel (ComputeBenchmark ):
1045- def __init__ (self , bench , runtime : RUNTIMES , wgc , wgs , profiler_type ):
1103+ def __init__ (
1104+ self , bench , runtime : RUNTIMES , wgc , wgs , profiler_type = PROFILERS .TIMER
1105+ ):
10461106 self .wgc = wgc
10471107 self .wgs = wgs
10481108 # iterations per bin_args: --iterations=10000
@@ -1084,7 +1144,7 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
10841144 f"--wgc={ self .wgc } " ,
10851145 ]
10861146 if self .runtime == RUNTIMES .SYCL :
1087- bin_args .append (f"--profilerType={ self .profiler_type } " )
1147+ bin_args .append (f"--profilerType={ self .profiler_type . value } " )
10881148 return bin_args
10891149
10901150
0 commit comments