Skip to content

Commit 3b5f524

Browse files
committed
rpc : send hash when tensor data is above some fixed threshold
ref #10095
1 parent 960e726 commit 3b5f524

File tree

3 files changed

+235
-16
lines changed

3 files changed

+235
-16
lines changed

examples/rpc/rpc-server.cpp

Lines changed: 48 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,20 +24,30 @@
2424
#endif
2525
#include <string>
2626
#include <stdio.h>
27+
#include <vector>
28+
#include <filesystem>
29+
30+
namespace fs = std::filesystem;
31+
32+
const int MAX_GGUF_PATH = 16;
2733

2834
struct rpc_server_params {
29-
std::string host = "127.0.0.1";
30-
int port = 50052;
31-
size_t backend_mem = 0;
35+
std::string host = "127.0.0.1";
36+
int port = 50052;
37+
size_t backend_mem = 0;
38+
std::string cache_dir = "";
39+
std::vector<std::string> gguf_path;
3240
};
3341

3442
static void print_usage(int /*argc*/, char ** argv, rpc_server_params params) {
3543
fprintf(stderr, "Usage: %s [options]\n\n", argv[0]);
3644
fprintf(stderr, "options:\n");
37-
fprintf(stderr, " -h, --help show this help message and exit\n");
38-
fprintf(stderr, " -H HOST, --host HOST host to bind to (default: %s)\n", params.host.c_str());
39-
fprintf(stderr, " -p PORT, --port PORT port to bind to (default: %d)\n", params.port);
40-
fprintf(stderr, " -m MEM, --mem MEM backend memory size (in MB)\n");
45+
fprintf(stderr, " -h, --help show this help message and exit\n");
46+
fprintf(stderr, " -H HOST, --host HOST host to bind to (default: %s)\n", params.host.c_str());
47+
fprintf(stderr, " -p PORT, --port PORT port to bind to (default: %d)\n", params.port);
48+
fprintf(stderr, " -f PATH, --gguf PATH path to GGUF file\n");
49+
fprintf(stderr, " -d DIR, --cache-dir DIR local cache dir\n");
50+
fprintf(stderr, " -m MEM, --mem MEM backend memory size (in MB)\n");
4151
fprintf(stderr, "\n");
4252
}
4353

@@ -58,6 +68,30 @@ static bool rpc_server_params_parse(int argc, char ** argv, rpc_server_params &
5868
if (params.port <= 0 || params.port > 65535) {
5969
return false;
6070
}
71+
} else if (arg == "-f" || arg == "--gguf") {
72+
if (++i >= argc) {
73+
return false;
74+
}
75+
if (params.gguf_path.size() >= MAX_GGUF_PATH) {
76+
fprintf(stderr, "error: too many GGUF files\n");
77+
return false;
78+
}
79+
fs::path gguf_path(argv[i]);
80+
if (!fs::is_regular_file(gguf_path)) {
81+
fprintf(stderr, "error: GGUF file does not exist: %s\n", gguf_path.c_str());
82+
return false;
83+
}
84+
params.gguf_path.push_back(argv[i]);
85+
} else if (arg == "-d" || arg == "--cache-dir") {
86+
if (++i >= argc) {
87+
return false;
88+
}
89+
fs::path cache_dir(argv[i]);
90+
if (!fs::is_directory(cache_dir)) {
91+
fprintf(stderr, "error: cache dir does not exist: %s\n", cache_dir.c_str());
92+
return false;
93+
}
94+
params.cache_dir = argv[i];
6195
} else if (arg == "-m" || arg == "--mem") {
6296
if (++i >= argc) {
6397
return false;
@@ -164,8 +198,14 @@ int main(int argc, char * argv[]) {
164198
} else {
165199
get_backend_memory(&free_mem, &total_mem);
166200
}
201+
const char * gguf_path[MAX_GGUF_PATH] = {0};
202+
int n_gguf_path = params.gguf_path.size();
203+
for (int i = 0; i < n_gguf_path; i++) {
204+
gguf_path[i] = params.gguf_path[i].c_str();
205+
}
206+
const char * cache_dir = params.cache_dir.empty() ? nullptr : params.cache_dir.c_str();
167207
printf("Starting RPC server on %s, backend memory: %zu MB\n", endpoint.c_str(), free_mem / (1024 * 1024));
168-
ggml_backend_rpc_start_server(backend, endpoint.c_str(), free_mem, total_mem);
208+
ggml_backend_rpc_start_server(backend, endpoint.c_str(), cache_dir, n_gguf_path, gguf_path, free_mem, total_mem);
169209
ggml_backend_free(backend);
170210
return 0;
171211
}

ggml/include/ggml-rpc.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,10 @@ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const c
1717

1818
GGML_BACKEND_API void ggml_backend_rpc_get_device_memory(const char * endpoint, size_t * free, size_t * total);
1919

20-
GGML_BACKEND_API void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint, size_t free_mem, size_t total_mem);
20+
GGML_BACKEND_API void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint,
21+
const char * cache_dir,
22+
int n_gguf_path, const char * gguf_path[],
23+
size_t free_mem, size_t total_mem);
2124

2225
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_reg(void);
2326

0 commit comments

Comments
 (0)