88import io
99from utils .utils import run , git_clone , create_build_path
1010from .base import Benchmark , Suite
11- from .result import Result
11+ from utils .result import Result
1212from options import options
1313from enum import Enum
1414
15+
1516class ComputeBench (Suite ):
1617 def __init__ (self , directory ):
1718 self .directory = directory
@@ -47,9 +48,8 @@ def setup(self):
4748 f"-Dunified-runtime_DIR={ options .ur } /lib/cmake/unified-runtime" ,
4849 ]
4950
50- print (f"{ self .__class__ .__name__ } : Run { configure_command } " )
5151 run (configure_command , add_sycl = True )
52- print ( f" { self . __class__ . __name__ } : Run cmake --build { build_path } -j" )
52+
5353 run (f"cmake --build { build_path } -j" , add_sycl = True )
5454
5555 self .built = True
@@ -73,16 +73,6 @@ def benchmarks(self) -> list[Benchmark]:
7373 ExecImmediateCopyQueue (self , 0 , 1 , "Device" , "Device" , 1024 ),
7474 ExecImmediateCopyQueue (self , 1 , 1 , "Device" , "Host" , 1024 ),
7575 VectorSum (self ),
76- MemcpyExecute (self , 400 , 1 , 102400 , 10 , 1 , 1 , 1 ),
77- MemcpyExecute (self , 100 , 8 , 102400 , 10 , 1 , 1 , 1 ),
78- MemcpyExecute (self , 400 , 8 , 1024 , 1000 , 1 , 1 , 1 ),
79- MemcpyExecute (self , 10 , 16 , 1024 , 10000 , 1 , 1 , 1 ),
80- MemcpyExecute (self , 400 , 1 , 102400 , 10 , 0 , 1 , 1 ),
81- MemcpyExecute (self , 100 , 8 , 102400 , 10 , 0 , 1 , 1 ),
82- MemcpyExecute (self , 400 , 8 , 1024 , 1000 , 0 , 1 , 1 ),
83- MemcpyExecute (self , 10 , 16 , 1024 , 10000 , 0 , 1 , 1 ),
84- MemcpyExecute (self , 4096 , 1 , 1024 , 10 , 0 , 1 , 0 ),
85- MemcpyExecute (self , 4096 , 4 , 1024 , 10 , 0 , 1 , 0 ),
8676 GraphApiSinKernelGraph (self , RUNTIMES .SYCL , 0 , 5 ),
8777 GraphApiSinKernelGraph (self , RUNTIMES .SYCL , 1 , 5 ),
8878 GraphApiSinKernelGraph (self , RUNTIMES .SYCL , 0 , 100 ),
@@ -98,6 +88,16 @@ def benchmarks(self) -> list[Benchmark]:
9888 SubmitKernelUR (self , 0 , 0 ),
9989 SubmitKernelUR (self , 1 , 0 ),
10090 SubmitKernelUR (self , 1 , 1 ),
91+ MemcpyExecute (self , 400 , 1 , 102400 , 10 , 1 , 1 , 1 ),
92+ MemcpyExecute (self , 100 , 8 , 102400 , 10 , 1 , 1 , 1 ),
93+ MemcpyExecute (self , 400 , 8 , 1024 , 1000 , 1 , 1 , 1 ),
94+ MemcpyExecute (self , 10 , 16 , 1024 , 10000 , 1 , 1 , 1 ),
95+ MemcpyExecute (self , 400 , 1 , 102400 , 10 , 0 , 1 , 1 ),
96+ MemcpyExecute (self , 100 , 8 , 102400 , 10 , 0 , 1 , 1 ),
97+ MemcpyExecute (self , 400 , 8 , 1024 , 1000 , 0 , 1 , 1 ),
98+ MemcpyExecute (self , 10 , 16 , 1024 , 10000 , 0 , 1 , 1 ),
99+ MemcpyExecute (self , 4096 , 1 , 1024 , 10 , 0 , 1 , 0 ),
100+ MemcpyExecute (self , 4096 , 4 , 1024 , 10 , 0 , 1 , 0 ),
101101 GraphApiSinKernelGraph (self , RUNTIMES .UR , 0 , 5 ),
102102 GraphApiSinKernelGraph (self , RUNTIMES .UR , 1 , 5 ),
103103 GraphApiSinKernelGraph (self , RUNTIMES .UR , 0 , 100 ),
@@ -136,6 +136,9 @@ def setup(self):
136136 def explicit_group (self ):
137137 return ""
138138
139+ def description (self ) -> str :
140+ return ""
141+
139142 def run (self , env_vars ) -> list [Result ]:
140143 command = [
141144 f"{ self .benchmark_bin } " ,
@@ -167,6 +170,7 @@ def run(self, env_vars) -> list[Result]:
167170 env = env_vars ,
168171 stdout = result ,
169172 unit = parse_unit_type (unit ),
173+ description = self .description ()
170174 )
171175 )
172176 return ret
@@ -221,6 +225,13 @@ def bin_args(self) -> list[str]:
221225 "--KernelExecTime=1" ,
222226 ]
223227
228+ def description (self ) -> str :
229+ order = "in-order" if self .ioq else "out-of-order"
230+ return (
231+ f"Measures CPU time overhead of submitting { order } kernels through SYCL API."
232+ "Uses 10 simple kernels with minimal execution time to isolate API overhead from kernel execution time."
233+ )
234+
224235
225236class SubmitKernelUR (ComputeBenchmark ):
226237 def __init__ (self , bench , ioq , measureCompletion ):
@@ -237,6 +248,15 @@ def name(self):
237248 def explicit_group (self ):
238249 return "SubmitKernel"
239250
251+ def description (self ) -> str :
252+ order = "in-order" if self .ioq else "out-of-order"
253+ completion = "including" if self .measureCompletion else "excluding"
254+ return (
255+ f"Measures CPU time overhead of submitting { order } kernels through Unified Runtime API, "
256+ f"{ completion } kernel completion time. Uses 10 simple kernels with minimal execution time "
257+ f"to isolate API overhead."
258+ )
259+
240260 def bin_args (self ) -> list [str ]:
241261 return [
242262 f"--Ioq={ self .ioq } " ,
@@ -261,6 +281,14 @@ def name(self):
261281 def explicit_group (self ):
262282 return "SubmitKernel"
263283
284+ def description (self ) -> str :
285+ order = "in-order" if self .ioq else "out-of-order"
286+ return (
287+ f"Measures CPU time overhead of submitting { order } kernels through Level Zero API. "
288+ f"Uses immediate command lists with 10 minimal kernels to isolate submission overhead "
289+ f"from execution time."
290+ )
291+
264292 def bin_args (self ) -> list [str ]:
265293 return [
266294 f"--Ioq={ self .ioq } " ,
@@ -286,6 +314,14 @@ def name(self):
286314 order = "in order" if self .ioq else "out of order"
287315 return f"api_overhead_benchmark_sycl ExecImmediateCopyQueue { order } from { self .source } to { self .destination } , size { self .size } "
288316
317+ def description (self ) -> str :
318+ order = "in-order" if self .ioq else "out-of-order"
319+ operation = "copy-only" if self .isCopyOnly else "copy and command submission"
320+ return (
321+ f"Measures SYCL { order } queue overhead for { operation } from { self .source } to "
322+ f"{ self .destination } memory with { self .size } bytes. Tests immediate execution overheads."
323+ )
324+
289325 def bin_args (self ) -> list [str ]:
290326 return [
291327 "--iterations=100000" ,
@@ -309,6 +345,13 @@ def __init__(self, bench, isCopyOnly, source, destination, size):
309345 def name (self ):
310346 return f"memory_benchmark_sycl QueueInOrderMemcpy from { self .source } to { self .destination } , size { self .size } "
311347
348+ def description (self ) -> str :
349+ operation = "copy-only" if self .isCopyOnly else "copy and command submission"
350+ return (
351+ f"Measures SYCL in-order queue memory copy performance for { operation } from "
352+ f"{ self .source } to { self .destination } with { self .size } bytes, executed 100 times per iteration."
353+ )
354+
312355 def bin_args (self ) -> list [str ]:
313356 return [
314357 "--iterations=10000" ,
@@ -330,6 +373,12 @@ def __init__(self, bench, source, destination, size):
330373 def name (self ):
331374 return f"memory_benchmark_sycl QueueMemcpy from { self .source } to { self .destination } , size { self .size } "
332375
376+ def description (self ) -> str :
377+ return (
378+ f"Measures general SYCL queue memory copy performance from { self .source } to "
379+ f"{ self .destination } with { self .size } bytes per operation."
380+ )
381+
333382 def bin_args (self ) -> list [str ]:
334383 return [
335384 "--iterations=10000" ,
@@ -349,6 +398,12 @@ def __init__(self, bench, type, size, placement):
349398 def name (self ):
350399 return f"memory_benchmark_sycl StreamMemory, placement { self .placement } , type { self .type } , size { self .size } "
351400
401+ def description (self ) -> str :
402+ return (
403+ f"Measures { self .placement } memory bandwidth using { self .type } pattern with "
404+ f"{ self .size } bytes. Higher values (GB/s) indicate better performance."
405+ )
406+
352407 # measurement is in GB/s
353408 def lower_is_better (self ):
354409 return False
@@ -362,6 +417,7 @@ def bin_args(self) -> list[str]:
362417 "--useEvents=0" ,
363418 "--contents=Zeros" ,
364419 "--multiplier=1" ,
420+ "--vectorSize=1" ,
365421 ]
366422
367423
@@ -372,6 +428,12 @@ def __init__(self, bench):
372428 def name (self ):
373429 return f"miscellaneous_benchmark_sycl VectorSum"
374430
431+ def description (self ) -> str :
432+ return (
433+ "Measures performance of vector addition across 3D grid (512x256x256 elements) "
434+ "using SYCL."
435+ )
436+
375437 def bin_args (self ) -> list [str ]:
376438 return [
377439 "--iterations=1000" ,
@@ -408,6 +470,16 @@ def name(self):
408470 + (" without events" if not self .useEvents else "" )
409471 )
410472
473+ def description (self ) -> str :
474+ src_type = "device" if self .srcUSM == 1 else "host"
475+ dst_type = "device" if self .dstUSM == 1 else "host"
476+ events = "with" if self .useEvents else "without"
477+ return (
478+ f"Measures multithreaded memory copy performance with { self .numThreads } threads "
479+ f"each performing { self .numOpsPerThread } operations on { self .allocSize } bytes "
480+ f"from { src_type } to { dst_type } memory { events } events."
481+ )
482+
411483 def bin_args (self ) -> list [str ]:
412484 return [
413485 "--Ioq=1" ,
@@ -441,6 +513,13 @@ def __init__(self, bench, runtime: RUNTIMES, withGraphs, numKernels):
441513 def explicit_group (self ):
442514 return f"SinKernelGraph { self .numKernels } "
443515
516+ def description (self ) -> str :
517+ execution = "using graphs" if self .withGraphs else "without graphs"
518+ return (
519+ f"Measures { self .runtime .value .upper ()} performance when executing { self .numKernels } "
520+ f"sin kernels { execution } . Tests overhead and benefits of graph-based execution."
521+ )
522+
444523 def name (self ):
445524 return f"graph_api_benchmark_{ self .runtime .value } SinKernelGraph graphs:{ self .withGraphs } , numKernels:{ self .numKernels } "
446525
@@ -452,28 +531,3 @@ def bin_args(self) -> list[str]:
452531 "--withCopyOffload=1" ,
453532 "--immediateAppendCmdList=0" ,
454533 ]
455-
456-
457- class GraphApiSubmitExecGraph (ComputeBenchmark ):
458- def __init__ (self , bench , ioq , submit , numKernels ):
459- self .ioq = ioq
460- self .submit = submit
461- self .numKernels = numKernels
462- super ().__init__ (bench , "graph_api_benchmark_sycl" , "SubmitExecGraph" )
463-
464- def name (self ):
465- return f"graph_api_benchmark_sycl SubmitExecGraph ioq:{ self .ioq } , submit:{ self .submit } , numKernels:{ self .numKernels } "
466-
467- def explicit_group (self ):
468- if self .submit :
469- return "SubmitGraph"
470- else :
471- return "ExecGraph"
472-
473- def bin_args (self ) -> list [str ]:
474- return [
475- "--iterations=100" ,
476- f"--measureSubmit={ self .submit } " ,
477- f"--ioq={ self .ioq } " ,
478- f"--numKernels={ self .numKernels } " ,
479- ]
0 commit comments