Skip to content

Commit ea63d60

Browse files
author
Piotr Stankiewicz
committed
inference: Fix nv-gpu-info path and wrap errors
Signed-off-by: Piotr Stankiewicz <[email protected]>
1 parent 9372ac3 commit ea63d60

File tree

3 files changed

+10
-5
lines changed

3 files changed

+10
-5
lines changed

pkg/gpuinfo/memory_windows.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ func getVRAMSize(modelRuntimeInstallPath string) (uint64, error) {
2020
return 1, nil
2121
}
2222

23-
nvGPUInfoBin := filepath.Join(modelRuntimeInstallPath, "com.docker.nv-gpu-info.exe")
23+
nvGPUInfoBin := filepath.Join(modelRuntimeInstallPath, "bin", "com.docker.nv-gpu-info.exe")
2424

2525
ctx, _ := context.WithTimeout(context.Background(), 30*time.Second)
2626
cmd := exec.CommandContext(ctx, nvGPUInfoBin)

pkg/inference/backends/llamacpp/llamacpp.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -226,19 +226,19 @@ func (l *llamaCpp) GetDiskUsage() (int64, error) {
226226
func (l *llamaCpp) GetRequiredMemoryForModel(model string, config *inference.BackendConfiguration) (*inference.RequiredMemory, error) {
227227
mdl, err := l.modelManager.GetModel(model)
228228
if err != nil {
229-
return nil, err
229+
return nil, fmt.Errorf("getting model(%s): %w", model, err)
230230
}
231231
mdlPath, err := mdl.GGUFPath()
232232
if err != nil {
233-
return nil, err
233+
return nil, fmt.Errorf("getting gguf path for model(%s): %w", model, err)
234234
}
235235
mdlGguf, err := parser.ParseGGUFFile(mdlPath)
236236
if err != nil {
237-
return nil, err
237+
return nil, fmt.Errorf("parsing gguf(%s): %w", mdlPath, err)
238238
}
239239
mdlConfig, err := mdl.Config()
240240
if err != nil {
241-
return nil, err
241+
return nil, fmt.Errorf("accessing model(%s) config: %w", model, err)
242242
}
243243

244244
contextSize := GetContextSize(&mdlConfig, config)

pkg/inference/scheduling/loader.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,9 +145,12 @@ func newLoader(
145145
}
146146

147147
// Compute the amount of available memory.
148+
// TODO(p1-0tr): improve error handling
148149
vramSize, err := gpuInfo.GetVRAMSize()
149150
if err != nil {
150151
log.Warnf("Could not read VRAM size: %s", err)
152+
} else {
153+
log.Infof("Running on system with %dMB VRAM", vramSize/1024.0/1024.0)
151154
}
152155
hostInfo, err := sysinfo.Host()
153156
if err != nil {
@@ -156,6 +159,8 @@ func newLoader(
156159
ramSize, err := hostInfo.Memory()
157160
if err != nil {
158161
log.Warnf("Could not read host RAM size: %s", err)
162+
} else {
163+
log.Infof("Running on system with %dMB RAM", ramSize.Total/1024.0/1024.0)
159164
}
160165

161166
totalMemory := memory{

0 commit comments

Comments
 (0)