inference: Fix nv-gpu-info path and wrap errors

Piotr Stankiewicz · Piotr Stankiewicz · commit f7f6a008df6a · 2025-07-17T13:38:35.000+02:00
Signed-off-by: Piotr Stankiewicz &lt;piotr.stankiewicz@docker.com&gt;
diff --git a/pkg/gpuinfo/memory_windows.go b/pkg/gpuinfo/memory_windows.go
@@ -20,7 +20,7 @@ func getVRAMSize(modelRuntimeInstallPath string) (uint64, error) {
 		return 1, nil
 	}
 
-	nvGPUInfoBin := filepath.Join(modelRuntimeInstallPath, "com.docker.nv-gpu-info.exe")
+	nvGPUInfoBin := filepath.Join(modelRuntimeInstallPath, "bin", "com.docker.nv-gpu-info.exe")
 
 	ctx, _ := context.WithTimeout(context.Background(), 30*time.Second)
 	cmd := exec.CommandContext(ctx, nvGPUInfoBin)
diff --git a/pkg/inference/backends/llamacpp/llamacpp.go b/pkg/inference/backends/llamacpp/llamacpp.go
@@ -226,19 +226,19 @@ func (l *llamaCpp) GetDiskUsage() (int64, error) {
 func (l *llamaCpp) GetRequiredMemoryForModel(model string, config *inference.BackendConfiguration) (*inference.RequiredMemory, error) {
 	mdl, err := l.modelManager.GetModel(model)
 	if err != nil {
-		return nil, err
+		return nil, fmt.Errorf("getting model(%s): %w", model, err)
 	}
 	mdlPath, err := mdl.GGUFPath()
 	if err != nil {
-		return nil, err
+		return nil, fmt.Errorf("getting gguf path for model(%s): %w", model, err)
 	}
 	mdlGguf, err := parser.ParseGGUFFile(mdlPath)
 	if err != nil {
-		return nil, err
+		return nil, fmt.Errorf("parsing gguf(%s): %w", mdlPath, err)
 	}
 	mdlConfig, err := mdl.Config()
 	if err != nil {
-		return nil, err
+		return nil, fmt.Errorf("accessing model(%s) config: %w", model, err)
 	}
 
 	contextSize := GetContextSize(&mdlConfig, config)

Original file line number	Diff line number	Diff line change
`@@ -20,7 +20,7 @@ func getVRAMSize(modelRuntimeInstallPath string) (uint64, error) {`
`20`	`20`	`return 1, nil`
`21`	`21`	`}`
`22`	`22`
`23`		`- nvGPUInfoBin := filepath.Join(modelRuntimeInstallPath, "com.docker.nv-gpu-info.exe")`
	`23`	`+ nvGPUInfoBin := filepath.Join(modelRuntimeInstallPath, "bin", "com.docker.nv-gpu-info.exe")`
`24`	`24`
`25`	`25`	`ctx, _ := context.WithTimeout(context.Background(), 30*time.Second)`
`26`	`26`	`cmd := exec.CommandContext(ctx, nvGPUInfoBin)`
Original file line number	Diff line number	Diff line change
`@@ -226,19 +226,19 @@ func (l *llamaCpp) GetDiskUsage() (int64, error) {`
`226`	`226`	`func (l llamaCpp) GetRequiredMemoryForModel(model string, config inference.BackendConfiguration) (*inference.RequiredMemory, error) {`
`227`	`227`	`mdl, err := l.modelManager.GetModel(model)`
`228`	`228`	`if err != nil {`
`229`		`- return nil, err`
	`229`	`+ return nil, fmt.Errorf("getting model(%s): %w", model, err)`
`230`	`230`	`}`
`231`	`231`	`mdlPath, err := mdl.GGUFPath()`
`232`	`232`	`if err != nil {`
`233`		`- return nil, err`
	`233`	`+ return nil, fmt.Errorf("getting gguf path for model(%s): %w", model, err)`
`234`	`234`	`}`
`235`	`235`	`mdlGguf, err := parser.ParseGGUFFile(mdlPath)`
`236`	`236`	`if err != nil {`
`237`		`- return nil, err`
	`237`	`+ return nil, fmt.Errorf("parsing gguf(%s): %w", mdlPath, err)`
`238`	`238`	`}`
`239`	`239`	`mdlConfig, err := mdl.Config()`
`240`	`240`	`if err != nil {`
`241`		`- return nil, err`
	`241`	`+ return nil, fmt.Errorf("accessing model(%s) config: %w", model, err)`
`242`	`242`	`}`
`243`	`243`
`244`	`244`	`contextSize := GetContextSize(&mdlConfig, config)`