Skip to content

Commit 7bcfe40

Browse files
committed
fix(gpu): make GPU runtime detection deterministic and ROCm-safe
Signed-off-by: pnkcaht <samzoovsk19@gmail.com>
1 parent 68dd097 commit 7bcfe40

File tree

1 file changed

+29
-17
lines changed

1 file changed

+29
-17
lines changed

cmd/cli/pkg/gpu/gpu.go

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -25,34 +25,46 @@ const (
2525

2626
// ProbeGPUSupport determines whether or not the Docker engine has GPU support.
2727
func ProbeGPUSupport(ctx context.Context, dockerClient client.SystemAPIClient) (GPUSupport, error) {
28-
// Check for ROCm runtime first
29-
if hasROCm, err := HasROCmRuntime(ctx, dockerClient); err == nil && hasROCm {
28+
// Query Docker Engine for its effective configuration.
29+
// Docker Info is the source of truth for which runtimes are actually usable.
30+
info, err := dockerClient.Info(ctx)
31+
if err != nil {
32+
return GPUSupportNone, err
33+
}
34+
35+
// 1. CUDA (NVIDIA)
36+
// NVIDIA remains the highest priority due to its wide adoption and
37+
// first-class support in Docker (>= 19.03).
38+
if _, ok := info.Runtimes["nvidia"]; ok {
39+
return GPUSupportCUDA, nil
40+
}
41+
42+
// 2. ROCm (AMD)
43+
// Explicit ROCm runtime configured in the Docker Engine.
44+
if _, ok := info.Runtimes["rocm"]; ok {
3045
return GPUSupportROCm, nil
3146
}
3247

33-
// Then check for MTHREADS runtime
34-
if hasMTHREADS, err := HasMTHREADSRuntime(ctx, dockerClient); err == nil && hasMTHREADS {
48+
// 3. MUSA (MThreads)
49+
// Used primarily on specific accelerator platforms.
50+
if _, ok := info.Runtimes["mthreads"]; ok {
3551
return GPUSupportMUSA, nil
3652
}
37-
// Check for CANN runtime first
38-
if hasCANN, err := HasCANNRuntime(ctx, dockerClient); err == nil && hasCANN {
53+
54+
// 4. Ascend CANN (Huawei)
55+
// Ascend NPU runtime registered in Docker.
56+
if _, ok := info.Runtimes["cann"]; ok {
3957
return GPUSupportCANN, nil
4058
}
41-
// Then search for nvidia-container-runtime on PATH
42-
if _, err := exec.LookPath("nvidia-container-runtime"); err == nil {
43-
return GPUSupportCUDA, nil
44-
}
4559

46-
// Next look for explicitly configured nvidia runtime. This is not required in Docker 19.03+ but
47-
// may be configured on some systems
48-
hasNvidia, err := HasNVIDIARuntime(ctx, dockerClient)
49-
if err != nil {
50-
return GPUSupportNone, err
51-
}
52-
if hasNvidia {
60+
// 5. Legacy fallback
61+
// Older Docker setups may not register the NVIDIA runtime explicitly,
62+
// but still have the legacy nvidia-container-runtime available on PATH.
63+
if _, err := exec.LookPath("nvidia-container-runtime"); err == nil {
5364
return GPUSupportCUDA, nil
5465
}
5566

67+
// No known GPU runtime detected.
5668
return GPUSupportNone, nil
5769
}
5870

0 commit comments

Comments
 (0)