From 02fc26c9ccc13d85e533afa9dd8cbb0a6856a73c Mon Sep 17 00:00:00 2001 From: Julius Tischbein Date: Wed, 15 Oct 2025 08:15:50 +0200 Subject: [PATCH 1/5] CUDA set scheduling strategy to spinning for cc121 --- ggml/src/ggml-cuda/ggml-cuda.cu | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index da312992c8039..b48ab5ab4d47e 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -207,6 +207,7 @@ static ggml_cuda_device_info ggml_cuda_init() { #endif // GGML_CUDA_FORCE_CUBLAS GGML_LOG_INFO("%s: found %d " GGML_CUDA_NAME " devices:\n", __func__, info.device_count); + bool is_cc121 = false; std::vector> turing_devices_without_mma; for (int id = 0; id < info.device_count; ++id) { int device_vmm = 0; @@ -273,6 +274,9 @@ static ggml_cuda_device_info ggml_cuda_init() { } else if (device_name.substr(0, 21) == "NVIDIA GeForce GTX 16") { turing_devices_without_mma.push_back({ id, device_name }); } + + is_cc121 |= info.devices[id].cc == 1210; + #endif // defined(GGML_USE_HIP) } @@ -293,6 +297,11 @@ static ggml_cuda_device_info ggml_cuda_init() { // configure logging to stdout // CUBLAS_CHECK(cublasLoggerConfigure(1, 1, 0, nullptr)); + // Setting device scheduling strategy for iGPUs to "spinning" to avoid delays in cuda synchronize calls. + if (is_cc121) { + CUDA_CHECK(cudaSetDeviceFlags(cudaDeviceScheduleSpin)); + } + return info; } From a33e305e11059a23efdc3504ac30052a4fd1e923 Mon Sep 17 00:00:00 2001 From: Julius Tischbein Date: Wed, 15 Oct 2025 10:38:22 +0200 Subject: [PATCH 2/5] Using prop.major and prop.minor, include HIP and MUSA --- ggml/src/ggml-cuda/ggml-cuda.cu | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index b48ab5ab4d47e..9e780d2ff78cc 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -230,6 +230,8 @@ static ggml_cuda_device_info ggml_cuda_init() { cudaDeviceProp prop; CUDA_CHECK(cudaGetDeviceProperties(&prop, id)); + is_cc121 |= prop.major == 12 && prop.minor == 1; + info.default_tensor_split[id] = total_vram; total_vram += prop.totalGlobalMem; info.devices[id].integrated = false; // Temporarily disabled due to issues with corrupted output (e.g. #15034) @@ -275,8 +277,6 @@ static ggml_cuda_device_info ggml_cuda_init() { turing_devices_without_mma.push_back({ id, device_name }); } - is_cc121 |= info.devices[id].cc == 1210; - #endif // defined(GGML_USE_HIP) } @@ -298,6 +298,7 @@ static ggml_cuda_device_info ggml_cuda_init() { // CUBLAS_CHECK(cublasLoggerConfigure(1, 1, 0, nullptr)); // Setting device scheduling strategy for iGPUs to "spinning" to avoid delays in cuda synchronize calls. + // This fix is temporary, as the strategy will be the default in later drivers. if (is_cc121) { CUDA_CHECK(cudaSetDeviceFlags(cudaDeviceScheduleSpin)); } From 4a707ecae68bdc0c970e7fa72ab830fbb307b952 Mon Sep 17 00:00:00 2001 From: Julius Tischbein Date: Wed, 15 Oct 2025 11:46:46 +0200 Subject: [PATCH 3/5] Exclude HIP and MUSA --- ggml/src/ggml-cuda/ggml-cuda.cu | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index 9e780d2ff78cc..5eddfe772ca8a 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -207,7 +207,6 @@ static ggml_cuda_device_info ggml_cuda_init() { #endif // GGML_CUDA_FORCE_CUBLAS GGML_LOG_INFO("%s: found %d " GGML_CUDA_NAME " devices:\n", __func__, info.device_count); - bool is_cc121 = false; std::vector> turing_devices_without_mma; for (int id = 0; id < info.device_count; ++id) { int device_vmm = 0; @@ -230,8 +229,6 @@ static ggml_cuda_device_info ggml_cuda_init() { cudaDeviceProp prop; CUDA_CHECK(cudaGetDeviceProperties(&prop, id)); - is_cc121 |= prop.major == 12 && prop.minor == 1; - info.default_tensor_split[id] = total_vram; total_vram += prop.totalGlobalMem; info.devices[id].integrated = false; // Temporarily disabled due to issues with corrupted output (e.g. #15034) @@ -277,6 +274,15 @@ static ggml_cuda_device_info ggml_cuda_init() { turing_devices_without_mma.push_back({ id, device_name }); } + + // Temporary performance fix: + // Setting device scheduling strategy for iGPUs with cc121 to "spinning" to avoid delays in cuda synchronize calls. + // TODO: Check for future drivers the default scheduling strategy and + // remove this call again when cudaDeviceScheduleSpin is default. + if (prop.major == 12 && prop.minor == 1) { + CUDA_CHECK(cudaSetDeviceFlags(cudaDeviceScheduleSpin)); + } + #endif // defined(GGML_USE_HIP) } @@ -297,12 +303,6 @@ static ggml_cuda_device_info ggml_cuda_init() { // configure logging to stdout // CUBLAS_CHECK(cublasLoggerConfigure(1, 1, 0, nullptr)); - // Setting device scheduling strategy for iGPUs to "spinning" to avoid delays in cuda synchronize calls. - // This fix is temporary, as the strategy will be the default in later drivers. - if (is_cc121) { - CUDA_CHECK(cudaSetDeviceFlags(cudaDeviceScheduleSpin)); - } - return info; } From f7ada07019bdc4b0ac3feaae08c0b7ce12f5324b Mon Sep 17 00:00:00 2001 From: Julius Tischbein Date: Wed, 15 Oct 2025 12:49:52 +0200 Subject: [PATCH 4/5] Remove trailing whitespace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Johannes Gäßler --- ggml/src/ggml-cuda/ggml-cuda.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index 5eddfe772ca8a..20ce56cb02397 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -277,7 +277,7 @@ static ggml_cuda_device_info ggml_cuda_init() { // Temporary performance fix: // Setting device scheduling strategy for iGPUs with cc121 to "spinning" to avoid delays in cuda synchronize calls. - // TODO: Check for future drivers the default scheduling strategy and + // TODO: Check for future drivers the default scheduling strategy and // remove this call again when cudaDeviceScheduleSpin is default. if (prop.major == 12 && prop.minor == 1) { CUDA_CHECK(cudaSetDeviceFlags(cudaDeviceScheduleSpin)); From 14652afc91283d192b488cbd5014f1ed0380845b Mon Sep 17 00:00:00 2001 From: Julius Tischbein Date: Wed, 15 Oct 2025 12:50:37 +0200 Subject: [PATCH 5/5] Remove empty line MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Johannes Gäßler --- ggml/src/ggml-cuda/ggml-cuda.cu | 1 - 1 file changed, 1 deletion(-) diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index 20ce56cb02397..a5e77672f6e95 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -274,7 +274,6 @@ static ggml_cuda_device_info ggml_cuda_init() { turing_devices_without_mma.push_back({ id, device_name }); } - // Temporary performance fix: // Setting device scheduling strategy for iGPUs with cc121 to "spinning" to avoid delays in cuda synchronize calls. // TODO: Check for future drivers the default scheduling strategy and