VRAM size getter for linux

Piotr Stankiewicz · Piotr Stankiewicz · commit 8c27393a6ac1 · 2025-07-11T14:44:23.000+02:00
Signed-off-by: Piotr Stankiewicz &lt;piotr.stankiewicz@docker.com&gt;
diff --git a/Dockerfile b/Dockerfile
@@ -27,7 +27,7 @@ COPY --link . .
 # Build the Go binary (static build)
 RUN --mount=type=cache,target=/go/pkg/mod \
     --mount=type=cache,target=/root/.cache/go-build \
-    CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o model-runner ./main.go
+    CGO_ENABLED=1 GOOS=linux go build -ldflags="-s -w" -o model-runner ./main.go
 
 # --- Get llama.cpp binary ---
 FROM docker/docker-model-backend-llamacpp:${LLAMA_SERVER_VERSION}-${LLAMA_SERVER_VARIANT} AS llama-server
diff --git a/pkg/inference/scheduling/loader.go b/pkg/inference/scheduling/loader.go
@@ -126,7 +126,7 @@ func newLoader(
 	// Compute the amount of available memory.
 	vramSize, err := getVRAMSize() // FIXME(p1-0tr): only implemented on macOS for now
 	if err != nil {
-		return nil // FIXME(p1-0tr): should forward the error
+		log.Warnf("Could not read VRAM size: %s", err)
 	}
 	totalMemory := vramSize
 
diff --git a/pkg/inference/scheduling/memory_linux.go b/pkg/inference/scheduling/memory_linux.go
@@ -0,0 +1,17 @@
+package scheduling
+
+/*
+#cgo LDFLAGS: -ldl
+#include "nvidia.h"
+*/
+import "C"
+import "errors"
+
+// getVRAMSize returns total system GPU memory in bytes
+func getVRAMSize() (uint64, error) {
+	vramSize := C.getVRAMSize()
+	if vramSize == 0 {
+		return 0, errors.New("could not get nvidia VRAM size")
+	}
+	return uint64(vramSize), nil
+}
diff --git a/pkg/inference/scheduling/nvidia.c b/pkg/inference/scheduling/nvidia.c
@@ -0,0 +1,71 @@
+// +build linux
+
+#include "nvidia.h"
+
+typedef enum {
+    NVML_SUCCESS = 0
+} nvmlReturn_t;
+
+typedef struct {
+    unsigned long long total;
+    unsigned long long free;
+    unsigned long long used;
+} nvmlMemory_t;
+
+typedef void* nvmlDevice_t;
+
+size_t getVRAMSize() {
+    void* handle;
+    nvmlReturn_t (*nvmlInit)(void);
+    nvmlReturn_t (*nvmlShutdown)(void);
+    nvmlReturn_t (*nvmlDeviceGetHandleByIndex)(unsigned int index, nvmlDevice_t* device);
+    nvmlReturn_t (*nvmlDeviceGetMemoryInfo)(nvmlDevice_t device, nvmlMemory_t* memory);
+    
+    nvmlReturn_t result;
+    nvmlDevice_t device;
+    nvmlMemory_t memory;
+    
+    // Try to load libnvidia-ml.so.1 first, then fallback to libnvidia-ml.so
+    handle = dlopen("libnvidia-ml.so.1", RTLD_LAZY);
+    if (!handle) {
+        handle = dlopen("libnvidia-ml.so", RTLD_LAZY);
+        if (!handle) {
+            return 0;
+        }
+    }
+    
+    // Load required functions
+    nvmlInit = dlsym(handle, "nvmlInit");
+    nvmlShutdown = dlsym(handle, "nvmlShutdown");
+    nvmlDeviceGetHandleByIndex = dlsym(handle, "nvmlDeviceGetHandleByIndex");
+    nvmlDeviceGetMemoryInfo = dlsym(handle, "nvmlDeviceGetMemoryInfo");
+    
+    if (!nvmlInit || !nvmlShutdown || !nvmlDeviceGetHandleByIndex || !nvmlDeviceGetMemoryInfo) {
+        dlclose(handle);
+        return 0;
+    }
+    
+    result = nvmlInit();
+    if (result != NVML_SUCCESS) {
+        dlclose(handle);
+        return 0;
+    }
+    
+    result = nvmlDeviceGetHandleByIndex(0, &device);
+    if (result != NVML_SUCCESS) {
+        nvmlShutdown();
+        dlclose(handle);
+        return 0;
+    }
+    
+    result = nvmlDeviceGetMemoryInfo(device, &memory);
+    if (result != NVML_SUCCESS) {
+        nvmlShutdown();
+        dlclose(handle);
+        return 0;
+    }
+    
+    nvmlShutdown();
+    dlclose(handle);
+    return memory.total;
+}
diff --git a/pkg/inference/scheduling/nvidia.h b/pkg/inference/scheduling/nvidia.h
@@ -0,0 +1,6 @@
+// +build linux
+
+#include <stddef.h>
+#include <dlfcn.h>
+
+size_t getVRAMSize();

Original file line number	Diff line number	Diff line change
`@@ -126,7 +126,7 @@ func newLoader(`
`126`	`126`	`// Compute the amount of available memory.`
`127`	`127`	`vramSize, err := getVRAMSize() // FIXME(p1-0tr): only implemented on macOS for now`
`128`	`128`	`if err != nil {`
`129`		`- return nil // FIXME(p1-0tr): should forward the error`
	`129`	`+ log.Warnf("Could not read VRAM size: %s", err)`
`130`	`130`	`}`
`131`	`131`	`totalMemory := vramSize`
`132`	`132`