@@ -6,20 +6,20 @@ Subject: [PATCH] GPU discovery enhancements
66Expose more information about the devices through backend props, and leverage
77management libraries for more accurate VRAM usage reporting if available.
88---
9- ggml/include/ggml-backend.h | 9 +
9+ ggml/include/ggml-backend.h | 11 +
1010 ggml/src/CMakeLists.txt | 2 +
11- ggml/src/ggml-cuda/ggml-cuda.cu | 72 +++++
11+ ggml/src/ggml-cuda/ggml-cuda.cu | 74 +++++
1212 ggml/src/ggml-cuda/vendors/hip.h | 3 +
1313 ggml/src/ggml-impl.h | 8 +
1414 ggml/src/ggml-metal/ggml-metal.cpp | 2 +
1515 ggml/src/mem_hip.cpp | 449 +++++++++++++++++++++++++++++
1616 ggml/src/mem_nvml.cpp | 209 ++++++++++++++
17- 8 files changed, 754 insertions(+)
17+ 8 files changed, 758 insertions(+)
1818 create mode 100644 ggml/src/mem_hip.cpp
1919 create mode 100644 ggml/src/mem_nvml.cpp
2020
2121diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
22- index ba181d09..09ff75f9 100644
22+ index ba181d09d..094fc3c82 100644
2323--- a/ggml/include/ggml-backend.h
2424+++ b/ggml/include/ggml-backend.h
2525@@ -169,6 +169,17 @@ extern "C" {
@@ -41,7 +41,7 @@ index ba181d09..09ff75f9 100644
4141
4242 GGML_API const char * ggml_backend_dev_name(ggml_backend_dev_t device);
4343diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
44- index 0609c650..aefe43bd 100644
44+ index 0609c6503..aefe43bdd 100644
4545--- a/ggml/src/CMakeLists.txt
4646+++ b/ggml/src/CMakeLists.txt
4747@@ -209,6 +209,8 @@ add_library(ggml-base
@@ -54,7 +54,7 @@ index 0609c650..aefe43bd 100644
5454
5555 target_include_directories(ggml-base PRIVATE .)
5656diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
57- index 87c6c34a..6a278b5e 100644
57+ index 87c6c34a4..816597d2f 100644
5858--- a/ggml/src/ggml-cuda/ggml-cuda.cu
5959+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
6060@@ -261,6 +261,16 @@ static ggml_cuda_device_info ggml_cuda_init() {
@@ -161,21 +161,23 @@ index 87c6c34a..6a278b5e 100644
161161 bool host_buffer = getenv("GGML_CUDA_NO_PINNED") == nullptr;
162162 #ifdef GGML_CUDA_NO_PEER_COPY
163163 bool events = false;
164- @@ -4087,6 +4149,8 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
164+ @@ -4087,6 +4149,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
165165 std::lock_guard<std::mutex> lock(mutex);
166166 if (!initialized) {
167167 ggml_backend_cuda_reg_context * ctx = new ggml_backend_cuda_reg_context;
168168+ int driverVersion = 0;
169- + CUDA_CHECK(cudaDriverGetVersion(&driverVersion));
170169
171170 for (int i = 0; i < ggml_cuda_info().device_count; i++) {
172171 ggml_backend_cuda_device_context * dev_ctx = new ggml_backend_cuda_device_context;
173- @@ -4102,6 +4166,14 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
172+ @@ -4102,6 +4165,17 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
174173 snprintf(pci_bus_id, sizeof(pci_bus_id), "%04x:%02x:%02x.0", prop.pciDomainID, prop.pciBusID, prop.pciDeviceID);
175174 dev_ctx->pci_bus_id = pci_bus_id;
176175
177176+ dev_ctx->major = prop.major;
178177+ dev_ctx->minor = prop.minor;
178+ + if (driverVersion == 0) {
179+ + CUDA_CHECK(cudaDriverGetVersion(&driverVersion));
180+ + }
179181+ dev_ctx->driver_major = driverVersion / 1000;
180182+ dev_ctx->driver_minor = (driverVersion - (dev_ctx->driver_major * 1000)) / 10;
181183+ dev_ctx->integrated = prop.integrated;
@@ -186,7 +188,7 @@ index 87c6c34a..6a278b5e 100644
186188 /* .iface = */ ggml_backend_cuda_device_interface,
187189 /* .reg = */ ®,
188190diff --git a/ggml/src/ggml-cuda/vendors/hip.h b/ggml/src/ggml-cuda/vendors/hip.h
189- index 1f06be80..2f9ef2dc 100644
191+ index 1f06be80e..2f9ef2dc0 100644
190192--- a/ggml/src/ggml-cuda/vendors/hip.h
191193+++ b/ggml/src/ggml-cuda/vendors/hip.h
192194@@ -5,6 +5,8 @@
@@ -207,7 +209,7 @@ index 1f06be80..2f9ef2dc 100644
207209 #define cudaErrorPeerAccessAlreadyEnabled hipErrorPeerAccessAlreadyEnabled
208210 #define cudaErrorPeerAccessNotEnabled hipErrorPeerAccessNotEnabled
209211diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h
210- index d0fb3bcc..80597b6e 100644
212+ index d0fb3bcca..80597b6ea 100644
211213--- a/ggml/src/ggml-impl.h
212214+++ b/ggml/src/ggml-impl.h
213215@@ -638,6 +638,14 @@ static inline bool ggml_can_fuse(const struct ggml_cgraph * cgraph, int node_idx
@@ -226,7 +228,7 @@ index d0fb3bcc..80597b6e 100644
226228 }
227229 #endif
228230diff --git a/ggml/src/ggml-metal/ggml-metal.cpp b/ggml/src/ggml-metal/ggml-metal.cpp
229- index f2ff9f32..f356e4a0 100644
231+ index f2ff9f322..f356e4a0a 100644
230232--- a/ggml/src/ggml-metal/ggml-metal.cpp
231233+++ b/ggml/src/ggml-metal/ggml-metal.cpp
232234@@ -535,6 +535,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen
@@ -247,7 +249,7 @@ index f2ff9f32..f356e4a0 100644
247249 /* .host_buffer = */ false,
248250diff --git a/ggml/src/mem_hip.cpp b/ggml/src/mem_hip.cpp
249251new file mode 100644
250- index 00000000..8ef19b8c
252+ index 000000000..8ef19b8cf
251253--- /dev/null
252254+++ b/ggml/src/mem_hip.cpp
253255@@ -0,0 +1,449 @@
@@ -703,7 +705,7 @@ index 00000000..8ef19b8c
703705\ No newline at end of file
704706diff --git a/ggml/src/mem_nvml.cpp b/ggml/src/mem_nvml.cpp
705707new file mode 100644
706- index 00000000..c9073cef
708+ index 000000000..c9073cef0
707709--- /dev/null
708710+++ b/ggml/src/mem_nvml.cpp
709711@@ -0,0 +1,209 @@
0 commit comments