diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py index d83a0d081af57..557f1da7aff42 100644 --- a/devops/scripts/benchmarks/benches/compute.py +++ b/devops/scripts/benchmarks/benches/compute.py @@ -175,6 +175,11 @@ def benchmarks(self) -> list[Benchmark]: MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1), MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1), MemcpyExecute(self, 4096, 4, 1024, 10, 0, 1, 0), + UsmMemoryAllocation(self, RUNTIMES.UR, "Device", 4 * 1024, "Both"), + UsmMemoryAllocation(self, RUNTIMES.UR, "Device", 4 * 1024 * 1024, "Both"), + UsmBatchMemoryAllocation(self, RUNTIMES.UR, "Device", 256, 4 * 1024, "Both"), + UsmBatchMemoryAllocation(self, RUNTIMES.UR, "Device", 32, 4 * 1024 * 1024, "Both"), + UsmRandomMemoryAllocation(self, RUNTIMES.UR, "Device", 256, 4 * 1024, 32 * 1024 * 1024, "LogUniform"), ] return benches @@ -698,3 +703,134 @@ def bin_args(self) -> list[str]: f"--ioq={self.ioq}", f"--ctrBasedEvents={self.ctrBasedEvents}", ] + + +class UsmMemoryAllocation(ComputeBenchmark): + def __init__(self, bench, runtime: RUNTIMES, usm_memory_placement, size, measure_mode): + self.runtime = runtime + self.usm_memory_placement = usm_memory_placement + self.size = size + self.measure_mode = measure_mode + super().__init__( + bench, f"api_overhead_benchmark_{runtime.value}", "UsmMemoryAllocation" + ) + + def get_tags(self): + return [runtime_to_tag_name(self.runtime), "micro", "latency", "memory"] + + def name(self): + return ( + f"api_overhead_benchmark_{self.runtime.value} UsmMemoryAllocation " + f"usmMemoryPlacement:{self.usm_memory_placement} size:{self.size} measureMode:{self.measure_mode}" + ) + + def explicit_group(self): + return f"UsmMemoryAllocation {self.usm_memory_placement} {self.size} {self.measure_mode}" + + def description(self) -> str: + what_is_measured = "Both memory allocation and memory free are timed" + if self.measure_mode == "Allocate": + what_is_measured = "Only memory allocation is timed" + elif self.measure_mode == "Free": + what_is_measured = "Only memory free is timed" + return ( + f"Measures memory allocation overhead by allocating {self.size} bytes of " + f"usm {self.usm_memory_placement} memory and free'ing it immediately. " + f"{what_is_measured}. " + ) + + def bin_args(self) -> list[str]: + return [ + f"--type={self.usm_memory_placement}", + f"--size={self.size}", + f"--measureMode={self.measure_mode}", + "--iterations=1000", + ] + + +class UsmBatchMemoryAllocation(ComputeBenchmark): + def __init__(self, bench, runtime: RUNTIMES, usm_memory_placement, allocation_count, size, measure_mode): + self.runtime = runtime + self.usm_memory_placement = usm_memory_placement + self.allocation_count = allocation_count + self.size = size + self.measure_mode = measure_mode + super().__init__( + bench, f"api_overhead_benchmark_{runtime.value}", "UsmBatchMemoryAllocation" + ) + + def get_tags(self): + return [runtime_to_tag_name(self.runtime), "micro", "latency", "memory"] + + def name(self): + return ( + f"api_overhead_benchmark_{self.runtime.value} UsmBatchMemoryAllocation " + f"usmMemoryPlacement:{self.usm_memory_placement} allocationCount:{self.allocation_count} size:{self.size} measureMode:{self.measure_mode}" + ) + + def explicit_group(self): + return f"UsmBatchMemoryAllocation {self.usm_memory_placement} {self.allocation_count} {self.size} {self.measure_mode}" + + def description(self) -> str: + what_is_measured = "Both memory allocation and memory free are timed" + if self.measure_mode == "Allocate": + what_is_measured = "Only memory allocation is timed" + elif self.measure_mode == "Free": + what_is_measured = "Only memory free is timed" + return ( + f"Measures memory allocation overhead by allocating {self.size} bytes of " + f"usm {self.usm_memory_placement} memory {self.allocation_count} times, then free'ing it all at once. " + f"{what_is_measured}. " + ) + + def bin_args(self) -> list[str]: + return [ + f"--type={self.usm_memory_placement}", + f"--allocationCount={self.allocation_count}", + f"--size={self.size}", + f"--measureMode={self.measure_mode}", + "--iterations=1000", + ] + + +class UsmRandomMemoryAllocation(ComputeBenchmark): + def __init__(self, bench, runtime: RUNTIMES, usm_memory_placement, operation_count, min_size, max_size, size_distribution): + self.runtime = runtime + self.usm_memory_placement = usm_memory_placement + self.operation_count = operation_count + self.min_size = min_size + self.max_size = max_size + self.size_distribution = size_distribution + super().__init__( + bench, f"api_overhead_benchmark_{runtime.value}", "UsmRandomMemoryAllocation" + ) + + def get_tags(self): + return [runtime_to_tag_name(self.runtime), "latency", "memory"] + + def name(self): + return ( + f"api_overhead_benchmark_{self.runtime.value} UsmRandomMemoryAllocation " + f"usmMemoryPlacement:{self.usm_memory_placement} operationCount:{self.operation_count} " + f"minSize:{self.min_size} maxSize:{self.max_size} sizeDistribution:{self.size_distribution}" + ) + + def explicit_group(self): + return f"UsmRandomMemoryAllocation {self.usm_memory_placement} {self.operation_count} {self.min_size} {self.max_size} {self.size_distribution}" + + def description(self) -> str: + return ( + f"Measures memory allocation overhead by performing {self.operation_count} randomized operations of memory allocation and memory free." + f"Usm {self.usm_memory_placement} memory is used. Allocation size is between {self.min_size} and {self.max_size}, " + f"with a {self.size_distribution} distribution. " + ) + + def bin_args(self) -> list[str]: + return [ + f"--type={self.usm_memory_placement}", + f"--operationCount={self.operation_count}", + f"--minSize={self.min_size}", + f"--maxSize={self.max_size}", + f"--sizeDistribution={self.size_distribution}", + "--iterations=1000", + ] diff --git a/unified-runtime/source/common/CMakeLists.txt b/unified-runtime/source/common/CMakeLists.txt index bc3c2aec68791..6c849ea889d88 100644 --- a/unified-runtime/source/common/CMakeLists.txt +++ b/unified-runtime/source/common/CMakeLists.txt @@ -36,7 +36,7 @@ target_include_directories(ur_common PUBLIC message(STATUS "Download Unified Memory Framework from github.com") if (NOT DEFINED UMF_REPO) - set(UMF_REPO "https://github.com/oneapi-src/unified-memory-framework.git") + set(UMF_REPO "https://github.com/bratpiorka/unified-memory-framework.git") endif() if (NOT DEFINED UMF_TAG) @@ -44,7 +44,7 @@ if (NOT DEFINED UMF_TAG) # Author: Ɓukasz Stolarczuk # Date: Tue Mar 18 15:41:37 2025 +0100 # 0.11.0-rc1 release - set(UMF_TAG v0.11.0-rc1) + set(UMF_TAG d1a61df8e689b88dda5c88dbaed5f982a66a7aa7) endif() message(STATUS "Will fetch Unified Memory Framework from ${UMF_REPO}")