Skip to content

Commit acef9b4

Browse files
committed
ggml: Use assigned layers when reporting loading stats
Reporting params.NumGPULayers can be misleading because it is the requested number of layers, not the actual number that is loaded. While they are often the same, there are cases where they might mismatch, such as if the GPU backend is missing.
1 parent 9a43994 commit acef9b4

File tree

1 file changed

+11
-9
lines changed

1 file changed

+11
-9
lines changed

ml/backend/ggml/ggml.go

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -356,23 +356,25 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
356356
}
357357

358358
// Mimic llama runner logs summarizing layers and memory
359-
slog.Info(fmt.Sprintf("offloading %d repeating layers to GPU", max(0, params.NumGPULayers-1)))
360359
gpuLayers := 0
360+
for _, layer := range layers {
361+
if C.ggml_backend_dev_type(layer.d) == C.GGML_BACKEND_DEVICE_TYPE_GPU {
362+
gpuLayers++
363+
}
364+
}
365+
slog.Info(fmt.Sprintf("offloading %d repeating layers to GPU", gpuLayers))
366+
361367
switch C.ggml_backend_dev_type(output.d) {
362-
case 0: // CPU
368+
case C.GGML_BACKEND_DEVICE_TYPE_CPU:
363369
slog.Info("offloading output layer to CPU")
364-
case 1: // GPU
370+
case C.GGML_BACKEND_DEVICE_TYPE_GPU:
365371
slog.Info("offloading output layer to GPU")
366372
gpuLayers++
367-
case 2: // ACCEL
373+
case C.GGML_BACKEND_DEVICE_TYPE_ACCEL:
368374
slog.Info("offloading output layer to ACCEL")
369375
}
370-
for _, layer := range layers {
371-
if C.ggml_backend_dev_type(layer.d) == 1 {
372-
gpuLayers++
373-
}
374-
}
375376
slog.Info(fmt.Sprintf("offloaded %d/%d layers to GPU", gpuLayers, len(layers)+1))
377+
376378
for bs := range maps.Values(bbs) {
377379
slog.Info("model weights", "buffer", C.GoString(C.ggml_backend_buffer_name(bs)), "size", format.HumanBytes2(uint64(C.ggml_backend_buffer_get_size(bs))))
378380
}

0 commit comments

Comments
 (0)