Skip to content

Commit 35fda7b

Browse files
committed
ggml: Report ordinal IDs for AMD GPUs on Windows
We don't get valid UUIDs for AMD GPUs on Windows, so the best option is to use the ordinal IDs. This brings us in line with what we currently do on the Ollama server - the only exception is AMD GPUs on Linux, which falls back to using ordinal IDs. The GGML implementation has no fallback but it doesn't appear to occur for any of the GPUs that we support. It's also possible that there are collisions between ordinal IDs for different libraries - however the only places where we use them are AMD on Windows and Metal on Mac, which can never occur on the same system.
1 parent 66fb857 commit 35fda7b

File tree

6 files changed

+45
-33
lines changed

6 files changed

+45
-33
lines changed

llama/patches/0017-ggml-Export-GPU-UUIDs.patch

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,41 +7,41 @@ This enables matching up devices and information reported by the backend
77
with tools (e.g. nvidia-smi) and system management libraries (e.g. nvml).
88
---
99
ggml/include/ggml-backend.h | 1 +
10-
ggml/src/ggml-cuda/ggml-cuda.cu | 33 ++++++++++++++++++++++++++++++++
10+
ggml/src/ggml-cuda/ggml-cuda.cu | 39 ++++++++++++++++++++++++++++++++
1111
ggml/src/ggml-metal/ggml-metal.m | 1 +
12-
3 files changed, 35 insertions(+)
12+
3 files changed, 41 insertions(+)
1313

1414
diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
15-
index 74e46716..a880df33 100644
15+
index 74e46716..48839339 100644
1616
--- a/ggml/include/ggml-backend.h
1717
+++ b/ggml/include/ggml-backend.h
1818
@@ -152,6 +152,7 @@ extern "C" {
1919
struct ggml_backend_dev_props {
2020
const char * name;
2121
const char * description;
22-
+ const char * uuid;
22+
+ const char * id;
2323
size_t memory_free;
2424
size_t memory_total;
2525
enum ggml_backend_dev_type type;
2626
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
27-
index cb0d8528..4c829153 100644
27+
index cb0d8528..d6960174 100644
2828
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
2929
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
3030
@@ -2884,6 +2884,7 @@ struct ggml_backend_cuda_device_context {
3131
int device;
3232
std::string name;
3333
std::string description;
34-
+ std::string uuid;
34+
+ std::string id;
3535
};
3636

3737
static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
3838
@@ -2896,6 +2897,11 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t
3939
return ctx->description.c_str();
4040
}
4141

42-
+static const char * ggml_backend_cuda_device_get_uuid(ggml_backend_dev_t dev) {
42+
+static const char * ggml_backend_cuda_device_get_id(ggml_backend_dev_t dev) {
4343
+ ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
44-
+ return ctx->uuid.c_str();
44+
+ return ctx->id.c_str();
4545
+}
4646
+
4747
static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
@@ -51,17 +51,17 @@ index cb0d8528..4c829153 100644
5151
static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
5252
props->name = ggml_backend_cuda_device_get_name(dev);
5353
props->description = ggml_backend_cuda_device_get_description(dev);
54-
+ props->uuid = ggml_backend_cuda_device_get_uuid(dev);
54+
+ props->id = ggml_backend_cuda_device_get_id(dev);
5555
props->type = ggml_backend_cuda_device_get_type(dev);
5656
ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total);
5757

58-
@@ -3458,6 +3465,32 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
58+
@@ -3458,6 +3465,38 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
5959
CUDA_CHECK(cudaGetDeviceProperties(&prop, i));
6060
dev_ctx->description = prop.name;
6161

6262
+ #if !defined(GGML_USE_HIP)
63-
+ char uuid[64];
64-
+ snprintf(uuid, sizeof(uuid),
63+
+ char id[64];
64+
+ snprintf(id, sizeof(id),
6565
+ "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
6666
+ (unsigned char)prop.uuid.bytes[0],
6767
+ (unsigned char)prop.uuid.bytes[1],
@@ -80,23 +80,29 @@ index cb0d8528..4c829153 100644
8080
+ (unsigned char)prop.uuid.bytes[14],
8181
+ (unsigned char)prop.uuid.bytes[15]
8282
+ );
83-
+ dev_ctx->uuid = uuid;
83+
+ dev_ctx->id = id;
8484
+ #else
85-
+ dev_ctx->uuid = "GPU-" + std::string(prop.uuid.bytes, 16);
85+
+ #ifdef _WIN32
86+
+ char id[16];
87+
+ snprintf(id, sizeof(id), "%d", i);
88+
+ dev_ctx->id = id;
89+
+ #else
90+
+ dev_ctx->id = "GPU-" + std::string(prop.uuid.bytes, 16);
91+
+ #endif
8692
+ #endif
8793
+
8894
ggml_backend_dev_t dev = new ggml_backend_device {
8995
/* .iface = */ ggml_backend_cuda_device_interface,
9096
/* .reg = */ &reg,
9197
diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
92-
index 1b56f858..ee4f2dcb 100644
98+
index 1b56f858..a9eeebc6 100644
9399
--- a/ggml/src/ggml-metal/ggml-metal.m
94100
+++ b/ggml/src/ggml-metal/ggml-metal.m
95101
@@ -5703,6 +5703,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen
96102
static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
97103
props->name = ggml_backend_metal_device_get_name(dev);
98104
props->description = ggml_backend_metal_device_get_description(dev);
99-
+ props->uuid = "0";
105+
+ props->id = "0";
100106
props->type = ggml_backend_metal_device_get_type(dev);
101107
ggml_backend_metal_device_get_memory(dev, &props->memory_free, &props->memory_total);
102108
props->caps = (struct ggml_backend_dev_caps) {

ml/backend.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -124,9 +124,9 @@ type DeviceMemory struct {
124124
// may not be persistent across instances of the runner.
125125
Name string
126126

127-
// UUID is a unique persistent identifier for the device for matching
128-
// with system management libraries
129-
UUID string
127+
// ID is an identifier for the device for matching with system
128+
// management libraries.
129+
ID string
130130

131131
// Weights is the per-layer memory needed for the model weights.
132132
Weights []Memory
@@ -156,8 +156,8 @@ func (m DeviceMemory) LogValue() slog.Value {
156156
attrs = append(attrs, slog.Any("Graph", m.Graph))
157157
}
158158

159-
if len(attrs) > 0 && m.UUID != "" {
160-
attrs = append([]slog.Attr{slog.String("UUID", m.UUID)}, attrs...)
159+
if len(attrs) > 0 && m.ID != "" {
160+
attrs = append([]slog.Attr{slog.String("ID", m.ID)}, attrs...)
161161
}
162162

163163
return slog.GroupValue(attrs...)

ml/backend/ggml/ggml.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
138138
requiredMemory.CPU.Name = C.GoString(C.ggml_backend_dev_name(cpuDeviceBufferType.d))
139139
var props C.struct_ggml_backend_dev_props
140140
C.ggml_backend_dev_get_props(cpuDeviceBufferType.d, &props)
141-
requiredMemory.CPU.UUID = C.GoString(props.uuid)
141+
requiredMemory.CPU.ID = C.GoString(props.id)
142142
requiredMemory.CPU.Weights = make([]ml.Memory, blocks+1)
143143
requiredMemory.CPU.Cache = make([]ml.Memory, blocks+1)
144144

@@ -155,7 +155,7 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
155155
requiredMemory.GPUs[i].Name = C.GoString(C.ggml_backend_dev_name(d))
156156
var props C.struct_ggml_backend_dev_props
157157
C.ggml_backend_dev_get_props(d, &props)
158-
requiredMemory.GPUs[i].UUID = C.GoString(props.uuid)
158+
requiredMemory.GPUs[i].ID = C.GoString(props.id)
159159
requiredMemory.GPUs[i].Weights = make([]ml.Memory, blocks+1)
160160
requiredMemory.GPUs[i].Cache = make([]ml.Memory, blocks+1)
161161
}

ml/backend/ggml/ggml/include/ggml-backend.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ extern "C" {
152152
struct ggml_backend_dev_props {
153153
const char * name;
154154
const char * description;
155-
const char * uuid;
155+
const char * id;
156156
size_t memory_free;
157157
size_t memory_total;
158158
enum ggml_backend_dev_type type;

ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2888,7 +2888,7 @@ struct ggml_backend_cuda_device_context {
28882888
int device;
28892889
std::string name;
28902890
std::string description;
2891-
std::string uuid;
2891+
std::string id;
28922892
};
28932893

28942894
static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
@@ -2901,9 +2901,9 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t
29012901
return ctx->description.c_str();
29022902
}
29032903

2904-
static const char * ggml_backend_cuda_device_get_uuid(ggml_backend_dev_t dev) {
2904+
static const char * ggml_backend_cuda_device_get_id(ggml_backend_dev_t dev) {
29052905
ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
2906-
return ctx->uuid.c_str();
2906+
return ctx->id.c_str();
29072907
}
29082908

29092909
static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
@@ -2920,7 +2920,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
29202920
static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
29212921
props->name = ggml_backend_cuda_device_get_name(dev);
29222922
props->description = ggml_backend_cuda_device_get_description(dev);
2923-
props->uuid = ggml_backend_cuda_device_get_uuid(dev);
2923+
props->id = ggml_backend_cuda_device_get_id(dev);
29242924
props->type = ggml_backend_cuda_device_get_type(dev);
29252925
ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total);
29262926

@@ -3471,8 +3471,8 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
34713471
dev_ctx->description = prop.name;
34723472

34733473
#if !defined(GGML_USE_HIP)
3474-
char uuid[64];
3475-
snprintf(uuid, sizeof(uuid),
3474+
char id[64];
3475+
snprintf(id, sizeof(id),
34763476
"GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
34773477
(unsigned char)prop.uuid.bytes[0],
34783478
(unsigned char)prop.uuid.bytes[1],
@@ -3491,9 +3491,15 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
34913491
(unsigned char)prop.uuid.bytes[14],
34923492
(unsigned char)prop.uuid.bytes[15]
34933493
);
3494-
dev_ctx->uuid = uuid;
3494+
dev_ctx->id = id;
34953495
#else
3496-
dev_ctx->uuid = "GPU-" + std::string(prop.uuid.bytes, 16);
3496+
#ifdef _WIN32
3497+
char id[16];
3498+
snprintf(id, sizeof(id), "%d", i);
3499+
dev_ctx->id = id;
3500+
#else
3501+
dev_ctx->id = "GPU-" + std::string(prop.uuid.bytes, 16);
3502+
#endif
34973503
#endif
34983504

34993505
ggml_backend_dev_t dev = new ggml_backend_device {

ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5726,7 +5726,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen
57265726
static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
57275727
props->name = ggml_backend_metal_device_get_name(dev);
57285728
props->description = ggml_backend_metal_device_get_description(dev);
5729-
props->uuid = "0";
5729+
props->id = "0";
57305730
props->type = ggml_backend_metal_device_get_type(dev);
57315731
ggml_backend_metal_device_get_memory(dev, &props->memory_free, &props->memory_total);
57325732
props->caps = (struct ggml_backend_dev_caps) {

0 commit comments

Comments
 (0)