ggml: Report ordinal IDs for AMD GPUs on Windows

jessegross · jessegross · commit 35fda7b4af55 · 2025-07-09T10:35:31.000-07:00
We don't get valid UUIDs for AMD GPUs on Windows, so the best option
is to use the ordinal IDs. This brings us in line with what we currently
do on the Ollama server - the only exception is AMD GPUs on Linux, which
falls back to using ordinal IDs. The GGML implementation has no fallback
but it doesn't appear to occur for any of the GPUs that we support.

It's also possible that there are collisions between ordinal IDs for
different libraries - however the only places where we use them are
AMD on Windows and Metal on Mac, which can never occur on the same
system.
diff --git a/llama/patches/0017-ggml-Export-GPU-UUIDs.patch b/llama/patches/0017-ggml-Export-GPU-UUIDs.patch
@@ -7,41 +7,41 @@ This enables matching up devices and information reported by the backend
 with tools (e.g. nvidia-smi) and system management libraries (e.g. nvml).
 ---
  ggml/include/ggml-backend.h      |  1 +
- ggml/src/ggml-cuda/ggml-cuda.cu  | 33 ++++++++++++++++++++++++++++++++
+ ggml/src/ggml-cuda/ggml-cuda.cu  | 39 ++++++++++++++++++++++++++++++++
  ggml/src/ggml-metal/ggml-metal.m |  1 +
- 3 files changed, 35 insertions(+)
+ 3 files changed, 41 insertions(+)
 
 diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
-index 74e46716..a880df33 100644
+index 74e46716..48839339 100644
 --- a/ggml/include/ggml-backend.h
 +++ b/ggml/include/ggml-backend.h
 @@ -152,6 +152,7 @@ extern "C" {
      struct ggml_backend_dev_props {
          const char * name;
          const char * description;
-+        const char * uuid;
++        const char * id;
          size_t memory_free;
          size_t memory_total;
          enum ggml_backend_dev_type type;
 diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
-index cb0d8528..4c829153 100644
+index cb0d8528..d6960174 100644
 --- a/ggml/src/ggml-cuda/ggml-cuda.cu
 +++ b/ggml/src/ggml-cuda/ggml-cuda.cu
 @@ -2884,6 +2884,7 @@ struct ggml_backend_cuda_device_context {
      int device;
      std::string name;
      std::string description;
-+    std::string uuid;
++    std::string id;
  };
  
  static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
 @@ -2896,6 +2897,11 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t
      return ctx->description.c_str();
  }
  
-+static const char * ggml_backend_cuda_device_get_uuid(ggml_backend_dev_t dev) {
++static const char * ggml_backend_cuda_device_get_id(ggml_backend_dev_t dev) {
 +    ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
-+    return ctx->uuid.c_str();
++    return ctx->id.c_str();
 +}
 +
  static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
@@ -51,17 +51,17 @@ index cb0d8528..4c829153 100644
  static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
      props->name        = ggml_backend_cuda_device_get_name(dev);
      props->description = ggml_backend_cuda_device_get_description(dev);
-+    props->uuid        = ggml_backend_cuda_device_get_uuid(dev);
++    props->id          = ggml_backend_cuda_device_get_id(dev);
      props->type        = ggml_backend_cuda_device_get_type(dev);
      ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total);
  
-@@ -3458,6 +3465,32 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
+@@ -3458,6 +3465,38 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
                  CUDA_CHECK(cudaGetDeviceProperties(&prop, i));
                  dev_ctx->description = prop.name;
  
 +                #if !defined(GGML_USE_HIP)
-+                char uuid[64];
-+                snprintf(uuid, sizeof(uuid),
++                char id[64];
++                snprintf(id, sizeof(id),
 +                    "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
 +                    (unsigned char)prop.uuid.bytes[0],
 +                    (unsigned char)prop.uuid.bytes[1],
@@ -80,23 +80,29 @@ index cb0d8528..4c829153 100644
 +                    (unsigned char)prop.uuid.bytes[14],
 +                    (unsigned char)prop.uuid.bytes[15]
 +                  );
-+                dev_ctx->uuid = uuid;
++                dev_ctx->id = id;
 +                #else
-+                dev_ctx->uuid = "GPU-" + std::string(prop.uuid.bytes, 16);
++                #ifdef _WIN32
++                char id[16];
++                snprintf(id, sizeof(id), "%d", i);
++                dev_ctx->id = id;
++                #else
++                dev_ctx->id = "GPU-" + std::string(prop.uuid.bytes, 16);
++                #endif
 +                #endif
 +
                  ggml_backend_dev_t dev = new ggml_backend_device {
                      /* .iface   = */ ggml_backend_cuda_device_interface,
                      /* .reg     = */ &reg,
 diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
-index 1b56f858..ee4f2dcb 100644
+index 1b56f858..a9eeebc6 100644
 --- a/ggml/src/ggml-metal/ggml-metal.m
 +++ b/ggml/src/ggml-metal/ggml-metal.m
 @@ -5703,6 +5703,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen
  static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
      props->name        = ggml_backend_metal_device_get_name(dev);
      props->description = ggml_backend_metal_device_get_description(dev);
-+    props->uuid        = "0";
++    props->id          = "0";
      props->type        = ggml_backend_metal_device_get_type(dev);
      ggml_backend_metal_device_get_memory(dev, &props->memory_free, &props->memory_total);
      props->caps = (struct ggml_backend_dev_caps) {
diff --git a/ml/backend.go b/ml/backend.go
@@ -124,9 +124,9 @@ type DeviceMemory struct {
 	// may not be persistent across instances of the runner.
 	Name string
 
-	// UUID is a unique persistent identifier for the device for matching
-	// with system management libraries
-	UUID string
+	// ID is an identifier for the device for matching with system
+	// management libraries.
+	ID string
 
 	// Weights is the per-layer memory needed for the model weights.
 	Weights []Memory
@@ -156,8 +156,8 @@ func (m DeviceMemory) LogValue() slog.Value {
 		attrs = append(attrs, slog.Any("Graph", m.Graph))
 	}
 
-	if len(attrs) > 0 && m.UUID != "" {
-		attrs = append([]slog.Attr{slog.String("UUID", m.UUID)}, attrs...)
+	if len(attrs) > 0 && m.ID != "" {
+		attrs = append([]slog.Attr{slog.String("ID", m.ID)}, attrs...)
 	}
 
 	return slog.GroupValue(attrs...)
diff --git a/ml/backend/ggml/ggml.go b/ml/backend/ggml/ggml.go
@@ -138,7 +138,7 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
 	requiredMemory.CPU.Name = C.GoString(C.ggml_backend_dev_name(cpuDeviceBufferType.d))
 	var props C.struct_ggml_backend_dev_props
 	C.ggml_backend_dev_get_props(cpuDeviceBufferType.d, &props)
-	requiredMemory.CPU.UUID = C.GoString(props.uuid)
+	requiredMemory.CPU.ID = C.GoString(props.id)
 	requiredMemory.CPU.Weights = make([]ml.Memory, blocks+1)
 	requiredMemory.CPU.Cache = make([]ml.Memory, blocks+1)
 
@@ -155,7 +155,7 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
 		requiredMemory.GPUs[i].Name = C.GoString(C.ggml_backend_dev_name(d))
 		var props C.struct_ggml_backend_dev_props
 		C.ggml_backend_dev_get_props(d, &props)
-		requiredMemory.GPUs[i].UUID = C.GoString(props.uuid)
+		requiredMemory.GPUs[i].ID = C.GoString(props.id)
 		requiredMemory.GPUs[i].Weights = make([]ml.Memory, blocks+1)
 		requiredMemory.GPUs[i].Cache = make([]ml.Memory, blocks+1)
 	}
diff --git a/ml/backend/ggml/ggml/include/ggml-backend.h b/ml/backend/ggml/ggml/include/ggml-backend.h
@@ -152,7 +152,7 @@ extern "C" {
     struct ggml_backend_dev_props {
         const char * name;
         const char * description;
-        const char * uuid;
+        const char * id;
         size_t memory_free;
         size_t memory_total;
         enum ggml_backend_dev_type type;
diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu b/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -2888,7 +2888,7 @@ struct ggml_backend_cuda_device_context {
     int device;
     std::string name;
     std::string description;
-    std::string uuid;
+    std::string id;
 };
 
 static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
@@ -2901,9 +2901,9 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t
     return ctx->description.c_str();
 }
 
-static const char * ggml_backend_cuda_device_get_uuid(ggml_backend_dev_t dev) {
+static const char * ggml_backend_cuda_device_get_id(ggml_backend_dev_t dev) {
     ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
-    return ctx->uuid.c_str();
+    return ctx->id.c_str();
 }
 
 static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
@@ -2920,7 +2920,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
 static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
     props->name        = ggml_backend_cuda_device_get_name(dev);
     props->description = ggml_backend_cuda_device_get_description(dev);
-    props->uuid        = ggml_backend_cuda_device_get_uuid(dev);
+    props->id          = ggml_backend_cuda_device_get_id(dev);
     props->type        = ggml_backend_cuda_device_get_type(dev);
     ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total);
 
@@ -3471,8 +3471,8 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
                 dev_ctx->description = prop.name;
 
                 #if !defined(GGML_USE_HIP)
-                char uuid[64];
-                snprintf(uuid, sizeof(uuid),
+                char id[64];
+                snprintf(id, sizeof(id),
                     "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
                     (unsigned char)prop.uuid.bytes[0],
                     (unsigned char)prop.uuid.bytes[1],
@@ -3491,9 +3491,15 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
                     (unsigned char)prop.uuid.bytes[14],
                     (unsigned char)prop.uuid.bytes[15]
                   );
-                dev_ctx->uuid = uuid;
+                dev_ctx->id = id;
                 #else
-                dev_ctx->uuid = "GPU-" + std::string(prop.uuid.bytes, 16);
+                #ifdef _WIN32
+                char id[16];
+                snprintf(id, sizeof(id), "%d", i);
+                dev_ctx->id = id;
+                #else
+                dev_ctx->id = "GPU-" + std::string(prop.uuid.bytes, 16);
+                #endif
                 #endif
 
                 ggml_backend_dev_t dev = new ggml_backend_device {
diff --git a/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m
@@ -5726,7 +5726,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen
 static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
     props->name        = ggml_backend_metal_device_get_name(dev);
     props->description = ggml_backend_metal_device_get_description(dev);
-    props->uuid        = "0";
+    props->id          = "0";
     props->type        = ggml_backend_metal_device_get_type(dev);
     ggml_backend_metal_device_get_memory(dev, &props->memory_free, &props->memory_total);
     props->caps = (struct ggml_backend_dev_caps) {