diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py index 3181c0e51eaf1..8ac1eeb341167 100644 --- a/devops/scripts/benchmarks/benches/compute.py +++ b/devops/scripts/benchmarks/benches/compute.py @@ -49,7 +49,7 @@ def git_url(self) -> str: return "https://github.com/intel/compute-benchmarks.git" def git_hash(self) -> str: - return "49a8c6314875c57fee9b59aea16e721572e3021d" + return "3283b5edb8bf771c519625af741b5db7a37b0111" def setup(self): if options.sycl is None: @@ -196,11 +196,12 @@ def benchmarks(self) -> list[Benchmark]: # Add UR-specific benchmarks if options.ur is not None: benches += [ - MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1, 1), - MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1, 1), - MemcpyExecute(self, 100, 4, 102400, 10, 1, 1, 0, 1), - MemcpyExecute(self, 100, 4, 102400, 10, 1, 1, 0, 0), - MemcpyExecute(self, 4096, 4, 1024, 10, 0, 1, 0, 1), + MemcpyExecute(self, RUNTIMES.UR, 400, 1, 102400, 10, 1, 1, 1, 1, 0), + MemcpyExecute(self, RUNTIMES.UR, 400, 1, 102400, 10, 0, 1, 1, 1, 0), + MemcpyExecute(self, RUNTIMES.UR, 100, 4, 102400, 10, 1, 1, 0, 1, 0), + MemcpyExecute(self, RUNTIMES.UR, 100, 4, 102400, 10, 1, 1, 0, 0, 0), + MemcpyExecute(self, RUNTIMES.UR, 4096, 4, 1024, 10, 0, 1, 0, 1, 0), + MemcpyExecute(self, RUNTIMES.UR, 4096, 4, 1024, 10, 0, 1, 0, 1, 1), UsmMemoryAllocation(self, RUNTIMES.UR, "Device", 256, "Both"), UsmMemoryAllocation(self, RUNTIMES.UR, "Device", 256 * 1024, "Both"), UsmBatchMemoryAllocation(self, RUNTIMES.UR, "Device", 128, 256, "Both"), @@ -211,6 +212,20 @@ def benchmarks(self) -> list[Benchmark]: self, RUNTIMES.UR, "Device", 128, 128 * 1024, "Both" ), ] + benches += [ + MemcpyExecute( + self, RUNTIMES.SYCL_PREVIEW, 4096, 1, 1024, 40, 1, 1, 0, 1, 0 + ), + MemcpyExecute( + self, RUNTIMES.SYCL_PREVIEW, 4096, 1, 1024, 40, 1, 1, 0, 1, 1 + ), + MemcpyExecute( + self, RUNTIMES.SYCL_PREVIEW, 4096, 4, 1024, 10, 1, 1, 0, 1, 0 + ), + MemcpyExecute( + self, RUNTIMES.SYCL_PREVIEW, 4096, 4, 1024, 10, 1, 1, 0, 1, 1 + ), + ] return benches @@ -533,6 +548,7 @@ class MemcpyExecute(ComputeBenchmark): def __init__( self, bench, + runtime: RUNTIMES, numOpsPerThread, numThreads, allocSize, @@ -541,7 +557,9 @@ def __init__( dstUSM, useEvent, useCopyOffload, + useBarrier, ): + self.runtime = runtime self.numOpsPerThread = numOpsPerThread self.numThreads = numThreads self.allocSize = allocSize @@ -550,7 +568,10 @@ def __init__( self.dstUSM = dstUSM self.useEvents = useEvent self.useCopyOffload = useCopyOffload - super().__init__(bench, "multithread_benchmark_ur", "MemcpyExecute") + self.useBarrier = useBarrier + super().__init__( + bench, f"multithread_benchmark_{self.runtime.value}", "MemcpyExecute" + ) def extra_env_vars(self) -> dict: if not self.useCopyOffload: @@ -560,9 +581,10 @@ def extra_env_vars(self) -> dict: def name(self): return ( - f"multithread_benchmark_ur MemcpyExecute opsPerThread:{self.numOpsPerThread}, numThreads:{self.numThreads}, allocSize:{self.allocSize} srcUSM:{self.srcUSM} dstUSM:{self.dstUSM}" + f"multithread_benchmark_{self.runtime.value} MemcpyExecute opsPerThread:{self.numOpsPerThread}, numThreads:{self.numThreads}, allocSize:{self.allocSize} srcUSM:{self.srcUSM} dstUSM:{self.dstUSM}" + (" without events" if not self.useEvents else "") + (" without copy offload" if not self.useCopyOffload else "") + + (" with barrier" if self.useBarrier else "") ) def explicit_group(self): @@ -571,6 +593,8 @@ def explicit_group(self): + str(self.numOpsPerThread) + " numThreads: " + str(self.numThreads) + + " allocSize: " + + str(self.allocSize) ) def description(self) -> str: @@ -578,14 +602,16 @@ def description(self) -> str: dst_type = "device" if self.dstUSM == 1 else "host" events = "with" if self.useEvents else "without" copy_offload = "with" if self.useCopyOffload else "without" + with_barrier = "with" if self.useBarrier else "without" return ( f"Measures multithreaded memory copy performance with {self.numThreads} threads " f"each performing {self.numOpsPerThread} operations on {self.allocSize} bytes " - f"from {src_type} to {dst_type} memory {events} events {copy_offload} driver copy offload." + f"from {src_type} to {dst_type} memory {events} events {copy_offload} driver copy offload " + f"{with_barrier} barrier. " ) def get_tags(self): - return ["memory", "latency", "UR", "micro"] + return ["memory", "latency", runtime_to_tag_name(self.runtime), "micro"] def bin_args(self) -> list[str]: return [ @@ -599,6 +625,7 @@ def bin_args(self) -> list[str]: f"--iterations={self.iterations}", f"--SrcUSM={self.srcUSM}", f"--DstUSM={self.dstUSM}", + f"--UseBarrier={self.useBarrier}", ]