From f6c48316c4811fce3b4c7b4de144b7714f12c211 Mon Sep 17 00:00:00 2001 From: Jerry Tian Date: Thu, 23 Oct 2025 22:08:50 -0400 Subject: [PATCH] fix: adapt benchmark memory allocation to device limits Replaced hardcoded 1GB memory allocation with dynamic calculation based on device capabilities. The benchmark now uses the smaller of 1GB or the device's maximum global buffer size, preventing allocation failures on devices with limited memory. Changes: - Calculate target memory as min(1024MB, device.max_global_buffer) - Derive N dynamically from available memory instead of fixed 4096*4096 - Reorder constant definitions for clarity - Add missing newline at end of file This makes the benchmark more robust and portable across different hardware configurations. --- src/main.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 0151dcb..8bc2ff1 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -17,8 +17,9 @@ string fraction(const float x) { } void benchmark_device(const Device_Info& device_info) { - const uint N = 4096u*4096u; // kernel range: N*M*sizeof(float) = 1GB memory allocation const uint M = 16u; // coalescence size + const uint target_memory_mb = min(1024u, device_info.max_global_buffer); // use 1GB or device limit, whichever is smaller + const uint N = (target_memory_mb*1048576u)/(M*sizeof(float)); // calculate N based on available memory const uint N_kernel = 256u; // iterations for kernel calls const uint N_memory = 16u; // iterations for PCIe memory transfers @@ -197,4 +198,4 @@ int main(int argc, char* argv[]) { println("'-----------------------------------------------------------------------------'"); #endif // Linux return 0; -} \ No newline at end of file +}