Skip to content

Commit 8c27393

Browse files
author
Piotr Stankiewicz
committed
VRAM size getter for linux
Signed-off-by: Piotr Stankiewicz <piotr.stankiewicz@docker.com>
1 parent 73bf98c commit 8c27393

File tree

5 files changed

+96
-2
lines changed

5 files changed

+96
-2
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ COPY --link . .
2727
# Build the Go binary (static build)
2828
RUN --mount=type=cache,target=/go/pkg/mod \
2929
--mount=type=cache,target=/root/.cache/go-build \
30-
CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o model-runner ./main.go
30+
CGO_ENABLED=1 GOOS=linux go build -ldflags="-s -w" -o model-runner ./main.go
3131

3232
# --- Get llama.cpp binary ---
3333
FROM docker/docker-model-backend-llamacpp:${LLAMA_SERVER_VERSION}-${LLAMA_SERVER_VARIANT} AS llama-server

pkg/inference/scheduling/loader.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ func newLoader(
126126
// Compute the amount of available memory.
127127
vramSize, err := getVRAMSize() // FIXME(p1-0tr): only implemented on macOS for now
128128
if err != nil {
129-
return nil // FIXME(p1-0tr): should forward the error
129+
log.Warnf("Could not read VRAM size: %s", err)
130130
}
131131
totalMemory := vramSize
132132

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
package scheduling
2+
3+
/*
4+
#cgo LDFLAGS: -ldl
5+
#include "nvidia.h"
6+
*/
7+
import "C"
8+
import "errors"
9+
10+
// getVRAMSize returns total system GPU memory in bytes
11+
func getVRAMSize() (uint64, error) {
12+
vramSize := C.getVRAMSize()
13+
if vramSize == 0 {
14+
return 0, errors.New("could not get nvidia VRAM size")
15+
}
16+
return uint64(vramSize), nil
17+
}

pkg/inference/scheduling/nvidia.c

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
// +build linux
2+
3+
#include "nvidia.h"
4+
5+
typedef enum {
6+
NVML_SUCCESS = 0
7+
} nvmlReturn_t;
8+
9+
typedef struct {
10+
unsigned long long total;
11+
unsigned long long free;
12+
unsigned long long used;
13+
} nvmlMemory_t;
14+
15+
typedef void* nvmlDevice_t;
16+
17+
size_t getVRAMSize() {
18+
void* handle;
19+
nvmlReturn_t (*nvmlInit)(void);
20+
nvmlReturn_t (*nvmlShutdown)(void);
21+
nvmlReturn_t (*nvmlDeviceGetHandleByIndex)(unsigned int index, nvmlDevice_t* device);
22+
nvmlReturn_t (*nvmlDeviceGetMemoryInfo)(nvmlDevice_t device, nvmlMemory_t* memory);
23+
24+
nvmlReturn_t result;
25+
nvmlDevice_t device;
26+
nvmlMemory_t memory;
27+
28+
// Try to load libnvidia-ml.so.1 first, then fallback to libnvidia-ml.so
29+
handle = dlopen("libnvidia-ml.so.1", RTLD_LAZY);
30+
if (!handle) {
31+
handle = dlopen("libnvidia-ml.so", RTLD_LAZY);
32+
if (!handle) {
33+
return 0;
34+
}
35+
}
36+
37+
// Load required functions
38+
nvmlInit = dlsym(handle, "nvmlInit");
39+
nvmlShutdown = dlsym(handle, "nvmlShutdown");
40+
nvmlDeviceGetHandleByIndex = dlsym(handle, "nvmlDeviceGetHandleByIndex");
41+
nvmlDeviceGetMemoryInfo = dlsym(handle, "nvmlDeviceGetMemoryInfo");
42+
43+
if (!nvmlInit || !nvmlShutdown || !nvmlDeviceGetHandleByIndex || !nvmlDeviceGetMemoryInfo) {
44+
dlclose(handle);
45+
return 0;
46+
}
47+
48+
result = nvmlInit();
49+
if (result != NVML_SUCCESS) {
50+
dlclose(handle);
51+
return 0;
52+
}
53+
54+
result = nvmlDeviceGetHandleByIndex(0, &device);
55+
if (result != NVML_SUCCESS) {
56+
nvmlShutdown();
57+
dlclose(handle);
58+
return 0;
59+
}
60+
61+
result = nvmlDeviceGetMemoryInfo(device, &memory);
62+
if (result != NVML_SUCCESS) {
63+
nvmlShutdown();
64+
dlclose(handle);
65+
return 0;
66+
}
67+
68+
nvmlShutdown();
69+
dlclose(handle);
70+
return memory.total;
71+
}

pkg/inference/scheduling/nvidia.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
// +build linux
2+
3+
#include <stddef.h>
4+
#include <dlfcn.h>
5+
6+
size_t getVRAMSize();

0 commit comments

Comments
 (0)