ggml-org · ggerganov · Oct 15, 2025 · Oct 15, 2025 · Oct 15, 2025 · Oct 15, 2025
@@ -207,6 +207,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
 #endif // GGML_CUDA_FORCE_CUBLAS
     GGML_LOG_INFO("%s: found %d " GGML_CUDA_NAME " devices:\n", __func__, info.device_count);
 
+    bool is_cc121 = false;
-    bool is_cc121 = false;
+    bool device_schedule_spin = false;
-    bool is_cc121 = false;
+    bool device_schedule_spin = false;
     std::vector<std::pair<int, std::string>> turing_devices_without_mma;
     for (int id = 0; id < info.device_count; ++id) {
         int device_vmm = 0;
@@ -229,6 +230,8 @@ static ggml_cuda_device_info ggml_cuda_init() {
         cudaDeviceProp prop;
         CUDA_CHECK(cudaGetDeviceProperties(&prop, id));
 
+        is_cc121 |= prop.major == 12 && prop.minor == 1;
-        is_cc121 |= prop.major == 12 && prop.minor == 1;
+        // Depending on the CUDA drivers the DGX Spark can run with a device schedule that prefers low power use.
+        // However, as it is plugged into a wall it should prefer maximum performance.
+        // TODO: add a check for a future driver version where this is fixed to avoid thrashing for > 20 CUDA contexts.
+        device_schedule_spin = prop.major == 12 && prop.minor == 1;
-        is_cc121 |= prop.major == 12 && prop.minor == 1;
+        // Depending on the CUDA drivers the DGX Spark can run with a device schedule that prefers low power use.
+        // However, as it is plugged into a wall it should prefer maximum performance.
+        // TODO: add a check for a future driver version where this is fixed to avoid thrashing for > 20 CUDA contexts.
+        device_schedule_spin = prop.major == 12 && prop.minor == 1;
+
         info.default_tensor_split[id] = total_vram;
         total_vram += prop.totalGlobalMem;
         info.devices[id].integrated = false; // Temporarily disabled due to issues with corrupted output (e.g. #15034)
@@ -273,6 +276,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
         } else if (device_name.substr(0, 21) == "NVIDIA GeForce GTX 16") {
             turing_devices_without_mma.push_back({ id, device_name });
         }
+
 #endif  // defined(GGML_USE_HIP)
     }
 
@@ -293,6 +297,12 @@ static ggml_cuda_device_info ggml_cuda_init() {
     // configure logging to stdout
     // CUBLAS_CHECK(cublasLoggerConfigure(1, 1, 0, nullptr));
 
+    // Setting device scheduling strategy for iGPUs to "spinning" to avoid delays in cuda synchronize calls.
+    // This fix is temporary, as the strategy will be the default in later drivers.    
-    // This fix is temporary, as the strategy will be the default in later drivers.    
+    // This fix is temporary, as the strategy will be the default in later drivers.
-    // This fix is temporary, as the strategy will be the default in later drivers.    
+    // This fix is temporary, as the strategy will be the default in later drivers.
+    if (is_cc121) {
+        CUDA_CHECK(cudaSetDeviceFlags(cudaDeviceScheduleSpin));
+    }
-    // Setting device scheduling strategy for iGPUs to "spinning" to avoid delays in cuda synchronize calls.
-    // This fix is temporary, as the strategy will be the default in later drivers.    
-    if (is_cc121) {
-        CUDA_CHECK(cudaSetDeviceFlags(cudaDeviceScheduleSpin));
-    }
+    if (device_schedule_spin) {
+        CUDA_CHECK(cudaSetDeviceFlags(cudaDeviceScheduleSpin));
+    }
-    // Setting device scheduling strategy for iGPUs to "spinning" to avoid delays in cuda synchronize calls.
-    // This fix is temporary, as the strategy will be the default in later drivers.    
-    if (is_cc121) {
-        CUDA_CHECK(cudaSetDeviceFlags(cudaDeviceScheduleSpin));
-    }
+    if (device_schedule_spin) {
+        CUDA_CHECK(cudaSetDeviceFlags(cudaDeviceScheduleSpin));
+    }
+
     return info;
 }