Skip to content

Commit 1eabdb4

Browse files
author
Iwan Kawrakow
committed
Revert "Rpc improvement (#480)"
This reverts commit 8a5f857.
1 parent 8a5f857 commit 1eabdb4

File tree

11 files changed

+484
-1234
lines changed

11 files changed

+484
-1234
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ include(CheckIncludeFileCXX)
66
set(CMAKE_WARN_UNUSED_CLI YES)
77

88
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
9+
910
set(CMAKE_CXX_STANDARD 17)
1011
set(CMAKE_CXX_STANDARD_REQUIRED true)
1112

common/common.cpp

Lines changed: 18 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,7 @@
8181
#endif
8282
#define LLAMA_CURL_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
8383
#endif // LLAMA_USE_CURL
84-
#ifdef GGML_USE_RPC
85-
# include "ggml-rpc.h"
86-
#endif
84+
8785
using json = nlohmann::ordered_json;
8886

8987
//
@@ -1006,35 +1004,6 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
10061004
if (arg == "--rpc") {
10071005
CHECK_ARG
10081006
params.rpc_servers = argv[i];
1009-
std::string servers(params.rpc_servers);
1010-
size_t pos = 0;
1011-
while ((pos = servers.find(",")) != std::string::npos) {
1012-
std::string server = servers.substr(0, pos);
1013-
ggml_backend_rpc_buffer_type(server.c_str());
1014-
servers.erase(0, pos + 1);
1015-
}
1016-
ggml_backend_rpc_buffer_type(servers.c_str());
1017-
return true;
1018-
}
1019-
if (arg == "--override-kv") {
1020-
CHECK_ARG
1021-
if (!string_parse_kv_override(argv[i], params.kv_overrides)) {
1022-
fprintf(stderr, "error: Invalid type for KV override: %s\n", argv[i]);
1023-
invalid_param = true;
1024-
return true;
1025-
}
1026-
return true;
1027-
}
1028-
if (arg == "--override-tensor" || arg == "-ot") {
1029-
CHECK_ARG
1030-
/*for (auto endpoint : params.rpc_servers.split)
1031-
{
1032-
1033-
}*/
1034-
if (!parse_buft_overrides(std::string{ argv[i] }, params.tensor_buft_overrides)) {
1035-
fprintf(stderr, "error: Invalid tensor buffer type override: %s\n", argv[i]);
1036-
invalid_param = true;
1037-
}
10381007
return true;
10391008
}
10401009
if (arg == "--no-mmap") {
@@ -1242,7 +1211,23 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
12421211
sparams.grammar = json_schema_to_grammar(json::parse(argv[i]));
12431212
return true;
12441213
}
1245-
1214+
if (arg == "--override-kv") {
1215+
CHECK_ARG
1216+
if (!string_parse_kv_override(argv[i], params.kv_overrides)) {
1217+
fprintf(stderr, "error: Invalid type for KV override: %s\n", argv[i]);
1218+
invalid_param = true;
1219+
return true;
1220+
}
1221+
return true;
1222+
}
1223+
if (arg == "--override-tensor" || arg == "-ot") {
1224+
CHECK_ARG
1225+
if (!parse_buft_overrides(std::string{argv[i]}, params.tensor_buft_overrides)) {
1226+
fprintf(stderr, "error: Invalid tensor buffer type override: %s\n", argv[i]);
1227+
invalid_param = true;
1228+
}
1229+
return true;
1230+
}
12461231
if (arg == "--offload-policy" || arg == "-op") {
12471232
CHECK_ARG
12481233
auto p = string_split_pairs<int,int>(argv[i], ',');

examples/rpc/CMakeLists.txt

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,2 @@
1-
set(TARGET rpc-server)
2-
add_executable(${TARGET} rpc-server.cpp)
3-
target_link_libraries(${TARGET} PRIVATE ggml)
4-
target_compile_features(${TARGET} PRIVATE cxx_std_17)
1+
add_executable(rpc-server rpc-server.cpp)
2+
target_link_libraries(rpc-server PRIVATE ggml llama)

examples/rpc/rpc-server.cpp

Lines changed: 15 additions & 199 deletions
Original file line numberDiff line numberDiff line change
@@ -5,166 +5,33 @@
55
#ifdef GGML_USE_METAL
66
#include "ggml-metal.h"
77
#endif
8-
#ifdef GGML_USE_VULKAN
9-
#include "ggml-vulkan.h"
10-
#endif
11-
#ifdef GGML_USE_SYCL
12-
#include "ggml-sycl.h"
13-
#endif
148

159
#include "ggml-rpc.h"
1610
#ifdef _WIN32
17-
# define DIRECTORY_SEPARATOR '\\'
18-
# define NOMINMAX
19-
# include <locale>
2011
# include <windows.h>
21-
# include <fcntl.h>
22-
# include <io.h>
2312
#else
24-
# define DIRECTORY_SEPARATOR '/'
2513
# include <unistd.h>
26-
# include <sys/stat.h>
2714
#endif
2815
#include <string>
2916
#include <stdio.h>
30-
#include <algorithm>
31-
#include <thread>
32-
#include <fstream>
33-
#include <filesystem>
34-
#include <codecvt>
35-
36-
namespace fs = std::filesystem;
37-
38-
// NOTE: this is copied from common.cpp to avoid linking with libcommon
39-
// returns true if successful, false otherwise
40-
static bool fs_create_directory_with_parents(const std::string& path) {
41-
#ifdef _WIN32
42-
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
43-
std::wstring wpath = converter.from_bytes(path);
44-
45-
// if the path already exists, check whether it's a directory
46-
const DWORD attributes = GetFileAttributesW(wpath.c_str());
47-
if ((attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_DIRECTORY)) {
48-
return true;
49-
}
50-
51-
size_t pos_slash = 0;
52-
53-
// process path from front to back, procedurally creating directories
54-
while ((pos_slash = path.find('\\', pos_slash)) != std::string::npos) {
55-
const std::wstring subpath = wpath.substr(0, pos_slash);
56-
const wchar_t* test = subpath.c_str();
57-
58-
const bool success = CreateDirectoryW(test, NULL);
59-
if (!success) {
60-
const DWORD error = GetLastError();
61-
62-
// if the path already exists, ensure that it's a directory
63-
if (error == ERROR_ALREADY_EXISTS) {
64-
const DWORD attributes = GetFileAttributesW(subpath.c_str());
65-
if (attributes == INVALID_FILE_ATTRIBUTES || !(attributes & FILE_ATTRIBUTE_DIRECTORY)) {
66-
return false;
67-
}
68-
}
69-
else {
70-
return false;
71-
}
72-
}
73-
74-
pos_slash += 1;
75-
}
76-
77-
return true;
78-
#else
79-
// if the path already exists, check whether it's a directory
80-
struct stat info;
81-
if (stat(path.c_str(), &info) == 0) {
82-
return S_ISDIR(info.st_mode);
83-
}
84-
85-
size_t pos_slash = 1; // skip leading slashes for directory creation
86-
87-
// process path from front to back, procedurally creating directories
88-
while ((pos_slash = path.find('/', pos_slash)) != std::string::npos) {
89-
const std::string subpath = path.substr(0, pos_slash);
90-
struct stat info;
91-
92-
// if the path already exists, ensure that it's a directory
93-
if (stat(subpath.c_str(), &info) == 0) {
94-
if (!S_ISDIR(info.st_mode)) {
95-
return false;
96-
}
97-
}
98-
else {
99-
// create parent directories
100-
const int ret = mkdir(subpath.c_str(), 0755);
101-
if (ret != 0) {
102-
return false;
103-
}
104-
}
105-
106-
pos_slash += 1;
107-
}
108-
109-
return true;
110-
#endif // _WIN32
111-
}
112-
113-
// NOTE: this is copied from common.cpp to avoid linking with libcommon
114-
static std::string fs_get_cache_directory() {
115-
std::string cache_directory = "";
116-
auto ensure_trailing_slash = [](std::string p) {
117-
// Make sure to add trailing slash
118-
if (p.back() != DIRECTORY_SEPARATOR) {
119-
p += DIRECTORY_SEPARATOR;
120-
}
121-
return p;
122-
};
123-
if (getenv("LLAMA_CACHE")) {
124-
cache_directory = std::getenv("LLAMA_CACHE");
125-
}
126-
else {
127-
#if defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)
128-
if (std::getenv("XDG_CACHE_HOME")) {
129-
cache_directory = std::getenv("XDG_CACHE_HOME");
130-
}
131-
else {
132-
cache_directory = std::getenv("HOME") + std::string("/.cache/");
133-
}
134-
#elif defined(__APPLE__)
135-
cache_directory = std::getenv("HOME") + std::string("/Library/Caches/");
136-
#elif defined(_WIN32)
137-
cache_directory = std::getenv("LOCALAPPDATA");
138-
#else
139-
# error Unknown architecture
140-
#endif
141-
cache_directory = ensure_trailing_slash(cache_directory);
142-
cache_directory += "llama.cpp";
143-
}
144-
return ensure_trailing_slash(cache_directory);
145-
}
14617

14718
struct rpc_server_params {
14819
std::string host = "127.0.0.1";
14920
int port = 50052;
15021
size_t backend_mem = 0;
151-
bool use_cache = false;
152-
int n_threads = std::max(1U, std::thread::hardware_concurrency() / 2);
15322
};
15423

155-
static void print_usage(int /*argc*/, char** argv, rpc_server_params params) {
24+
static void print_usage(int /*argc*/, char ** argv, rpc_server_params params) {
15625
fprintf(stderr, "Usage: %s [options]\n\n", argv[0]);
15726
fprintf(stderr, "options:\n");
158-
fprintf(stderr, " -h, --help show this help message and exit\n");
159-
fprintf(stderr, " -t, --threads number of threads for the CPU backend (default: %d)\n", params.n_threads);
160-
fprintf(stderr, " -H HOST, --host HOST host to bind to (default: %s)\n", params.host.c_str());
161-
fprintf(stderr, " -p PORT, --port PORT port to bind to (default: %d)\n", params.port);
162-
fprintf(stderr, " -m MEM, --mem MEM backend memory size (in MB)\n");
163-
fprintf(stderr, " -c, --cache enable local file cache\n");
27+
fprintf(stderr, " -h, --help show this help message and exit\n");
28+
fprintf(stderr, " -H HOST, --host HOST host to bind to (default: %s)\n", params.host.c_str());
29+
fprintf(stderr, " -p PORT, --port PORT port to bind to (default: %d)\n", params.port);
30+
fprintf(stderr, " -m MEM, --mem MEM backend memory size (in MB)\n");
16431
fprintf(stderr, "\n");
16532
}
16633

167-
static bool rpc_server_params_parse(int argc, char** argv, rpc_server_params& params) {
34+
static bool rpc_server_params_parse(int argc, char ** argv, rpc_server_params & params) {
16835
std::string arg;
16936
for (int i = 1; i < argc; i++) {
17037
arg = argv[i];
@@ -173,40 +40,23 @@ static bool rpc_server_params_parse(int argc, char** argv, rpc_server_params& pa
17340
return false;
17441
}
17542
params.host = argv[i];
176-
}
177-
else if (arg == "-t" || arg == "--threads") {
178-
if (++i >= argc) {
179-
return false;
180-
}
181-
params.n_threads = std::stoi(argv[i]);
182-
if (params.n_threads <= 0) {
183-
fprintf(stderr, "error: invalid number of threads: %d\n", params.n_threads);
184-
return false;
185-
}
186-
}
187-
else if (arg == "-p" || arg == "--port") {
43+
} else if (arg == "-p" || arg == "--port") {
18844
if (++i >= argc) {
18945
return false;
19046
}
19147
params.port = std::stoi(argv[i]);
19248
if (params.port <= 0 || params.port > 65535) {
19349
return false;
19450
}
195-
}
196-
else if (arg == "-c" || arg == "--cache") {
197-
params.use_cache = true;
198-
}
199-
else if (arg == "-m" || arg == "--mem") {
51+
} else if (arg == "-m" || arg == "--mem") {
20052
if (++i >= argc) {
20153
return false;
20254
}
20355
params.backend_mem = std::stoul(argv[i]) * 1024 * 1024;
204-
}
205-
else if (arg == "-h" || arg == "--help") {
56+
} else if (arg == "-h" || arg == "--help") {
20657
print_usage(argc, argv, params);
20758
exit(0);
208-
}
209-
else {
59+
} else {
21060
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
21161
print_usage(argc, argv, params);
21262
exit(0);
@@ -215,7 +65,7 @@ static bool rpc_server_params_parse(int argc, char** argv, rpc_server_params& pa
21565
return true;
21666
}
21767

218-
static ggml_backend_t create_backend(const rpc_server_params& params) {
68+
static ggml_backend_t create_backend() {
21969
ggml_backend_t backend = NULL;
22070
#ifdef GGML_USE_CUDA
22171
fprintf(stderr, "%s: using CUDA backend\n", __func__);
@@ -229,36 +79,19 @@ static ggml_backend_t create_backend(const rpc_server_params& params) {
22979
if (!backend) {
23080
fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__);
23181
}
232-
#elif GGML_USE_VULKAN
233-
fprintf(stderr, "%s: using Vulkan backend\n", __func__);
234-
backend = ggml_backend_vk_init(0); // init device 0
235-
if (!backend) {
236-
fprintf(stderr, "%s: ggml_backend_vulkan_init() failed\n", __func__);
237-
}
238-
#elif GGML_USE_SYCL
239-
fprintf(stderr, "%s: using SYCL backend\n", __func__);
240-
backend = ggml_backend_sycl_init(0); // init device 0
241-
if (!backend) {
242-
fprintf(stderr, "%s: ggml_backend_sycl_init() failed\n", __func__);
243-
}
24482
#endif
24583

24684
// if there aren't GPU Backends fallback to CPU backend
24785
if (!backend) {
24886
fprintf(stderr, "%s: using CPU backend\n", __func__);
24987
backend = ggml_backend_cpu_init();
250-
ggml_backend_cpu_set_n_threads(backend, params.n_threads);
25188
}
25289
return backend;
25390
}
25491

25592
static void get_backend_memory(size_t * free_mem, size_t * total_mem) {
25693
#ifdef GGML_USE_CUDA
25794
ggml_backend_cuda_get_device_memory(0, free_mem, total_mem);
258-
#elif GGML_USE_VULKAN
259-
ggml_backend_vk_get_device_memory(0, free_mem, total_mem);
260-
#elif GGML_USE_SYCL
261-
ggml_backend_sycl_get_device_memory(0, free_mem, total_mem);
26295
#else
26396
#ifdef _WIN32
26497
MEMORYSTATUSEX status;
@@ -292,7 +125,7 @@ int main(int argc, char * argv[]) {
292125
fprintf(stderr, "\n");
293126
}
294127

295-
ggml_backend_t backend = create_backend(params);
128+
ggml_backend_t backend = create_backend();
296129
if (!backend) {
297130
fprintf(stderr, "Failed to create backend\n");
298131
return 1;
@@ -302,28 +135,11 @@ int main(int argc, char * argv[]) {
302135
if (params.backend_mem > 0) {
303136
free_mem = params.backend_mem;
304137
total_mem = params.backend_mem;
305-
}
306-
else {
138+
} else {
307139
get_backend_memory(&free_mem, &total_mem);
308140
}
309-
const char * cache_dir = nullptr;
310-
std::string cache_dir_str;
311-
if (params.use_cache) {
312-
cache_dir_str = fs_get_cache_directory() + "rpc/";
313-
if (!fs_create_directory_with_parents(cache_dir_str)) {
314-
fprintf(stderr, "Failed to create cache directory: %s\n", cache_dir_str.c_str());
315-
return 1;
316-
}
317-
cache_dir = cache_dir_str.c_str();
318-
}
319-
printf("Starting RPC server v%d.%d.%d\n",
320-
RPC_PROTO_MAJOR_VERSION,
321-
RPC_PROTO_MINOR_VERSION,
322-
RPC_PROTO_PATCH_VERSION);
323-
printf(" endpoint : %s\n", endpoint.c_str());
324-
printf(" local cache : %s\n", cache_dir ? cache_dir : "n/a");
325-
printf(" backend memory : %zu MB\n", free_mem / (1024 * 1024));
326-
ggml_backend_rpc_start_server(backend, endpoint.c_str(), cache_dir, free_mem, total_mem);
141+
printf("Starting RPC server on %s, backend memory: %zu MB\n", endpoint.c_str(), free_mem / (1024 * 1024));
142+
start_rpc_server(backend, endpoint.c_str(), free_mem, total_mem);
327143
ggml_backend_free(backend);
328144
return 0;
329145
}

0 commit comments

Comments
 (0)