Skip to content

Commit 898acba

Browse files
authored
rpc : add support for multiple devices (#16276)
* rpc : add support for multiple devices Allow rpc-server to expose multiple devices from a single endpoint. Change RPC protocol to include device identifier where needed. closes: #15210 * fixes * use ggml_backend_reg_t * address review comments * fix llama-bench backend report * address review comments, change device naming * fix cmd order
1 parent e29acf7 commit 898acba

File tree

7 files changed

+394
-236
lines changed

7 files changed

+394
-236
lines changed

common/arg.cpp

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1615,18 +1615,14 @@ static void add_rpc_devices(const std::string & servers) {
16151615
if (!rpc_reg) {
16161616
throw std::invalid_argument("failed to find RPC backend");
16171617
}
1618-
typedef ggml_backend_dev_t (*ggml_backend_rpc_add_device_t)(const char * endpoint);
1619-
ggml_backend_rpc_add_device_t ggml_backend_rpc_add_device_fn = (ggml_backend_rpc_add_device_t) ggml_backend_reg_get_proc_address(rpc_reg, "ggml_backend_rpc_add_device");
1620-
if (!ggml_backend_rpc_add_device_fn) {
1621-
throw std::invalid_argument("failed to find RPC device add function");
1618+
typedef ggml_backend_reg_t (*ggml_backend_rpc_add_server_t)(const char * endpoint);
1619+
ggml_backend_rpc_add_server_t ggml_backend_rpc_add_server_fn = (ggml_backend_rpc_add_server_t) ggml_backend_reg_get_proc_address(rpc_reg, "ggml_backend_rpc_add_server");
1620+
if (!ggml_backend_rpc_add_server_fn) {
1621+
throw std::invalid_argument("failed to find RPC add server function");
16221622
}
16231623
for (const auto & server : rpc_servers) {
1624-
ggml_backend_dev_t dev = ggml_backend_rpc_add_device_fn(server.c_str());
1625-
if (dev) {
1626-
ggml_backend_device_register(dev);
1627-
} else {
1628-
throw std::invalid_argument("failed to register RPC device");
1629-
}
1624+
auto reg = ggml_backend_rpc_add_server_fn(server.c_str());
1625+
ggml_backend_register(reg);
16301626
}
16311627
}
16321628

ggml/include/ggml-backend.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,8 @@ extern "C" {
215215
// Backend registry
216216
//
217217

218+
GGML_API void ggml_backend_register(ggml_backend_reg_t reg);
219+
218220
GGML_API void ggml_backend_device_register(ggml_backend_dev_t device);
219221

220222
// Backend (reg) enumeration

ggml/include/ggml-rpc.h

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,26 +7,25 @@
77
extern "C" {
88
#endif
99

10-
#define RPC_PROTO_MAJOR_VERSION 2
10+
#define RPC_PROTO_MAJOR_VERSION 3
1111
#define RPC_PROTO_MINOR_VERSION 0
1212
#define RPC_PROTO_PATCH_VERSION 0
1313
#define GGML_RPC_MAX_SERVERS 16
1414

1515
// backend API
16-
GGML_BACKEND_API ggml_backend_t ggml_backend_rpc_init(const char * endpoint);
16+
GGML_BACKEND_API ggml_backend_t ggml_backend_rpc_init(const char * endpoint, uint32_t device);
1717
GGML_BACKEND_API bool ggml_backend_is_rpc(ggml_backend_t backend);
1818

19-
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint);
19+
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint, uint32_t device);
2020

21-
GGML_BACKEND_API void ggml_backend_rpc_get_device_memory(const char * endpoint, size_t * free, size_t * total);
21+
GGML_BACKEND_API void ggml_backend_rpc_get_device_memory(const char * endpoint, uint32_t device, size_t * free, size_t * total);
2222

23-
GGML_BACKEND_API void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint,
24-
const char * cache_dir,
25-
size_t free_mem, size_t total_mem);
23+
GGML_BACKEND_API void ggml_backend_rpc_start_server(const char * endpoint, const char * cache_dir,
24+
size_t n_threads, size_t n_devices,
25+
ggml_backend_dev_t * devices, size_t * free_mem, size_t * total_mem);
2626

2727
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_reg(void);
28-
29-
GGML_BACKEND_API ggml_backend_dev_t ggml_backend_rpc_add_device(const char * endpoint);
28+
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_add_server(const char * endpoint);
3029

3130
#ifdef __cplusplus
3231
}

ggml/src/ggml-backend-impl.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -209,9 +209,6 @@ extern "C" {
209209
void * context;
210210
};
211211

212-
// Internal backend registry API
213-
GGML_API void ggml_backend_register(ggml_backend_reg_t reg);
214-
215212
// Add backend dynamic loading support to the backend
216213

217214
// Initialize the backend

0 commit comments

Comments
 (0)