55#ifdef GGML_USE_METAL
66#include " ggml-metal.h"
77#endif
8- #ifdef GGML_USE_VULKAN
9- #include " ggml-vulkan.h"
10- #endif
11- #ifdef GGML_USE_SYCL
12- #include " ggml-sycl.h"
13- #endif
148
159#include " ggml-rpc.h"
1610#ifdef _WIN32
17- # define DIRECTORY_SEPARATOR ' \\ '
18- # define NOMINMAX
19- # include < locale>
2011# include < windows.h>
21- # include < fcntl.h>
22- # include < io.h>
2312#else
24- # define DIRECTORY_SEPARATOR ' /'
2513# include < unistd.h>
26- # include < sys/stat.h>
2714#endif
2815#include < string>
2916#include < stdio.h>
30- #include < algorithm>
31- #include < thread>
32- #include < fstream>
33- #include < filesystem>
34- #include < codecvt>
35-
36- namespace fs = std::filesystem;
37-
38- // NOTE: this is copied from common.cpp to avoid linking with libcommon
39- // returns true if successful, false otherwise
40- static bool fs_create_directory_with_parents (const std::string& path) {
41- #ifdef _WIN32
42- std::wstring_convert<std::codecvt_utf8<wchar_t >> converter;
43- std::wstring wpath = converter.from_bytes (path);
44-
45- // if the path already exists, check whether it's a directory
46- const DWORD attributes = GetFileAttributesW (wpath.c_str ());
47- if ((attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_DIRECTORY)) {
48- return true ;
49- }
50-
51- size_t pos_slash = 0 ;
52-
53- // process path from front to back, procedurally creating directories
54- while ((pos_slash = path.find (' \\ ' , pos_slash)) != std::string::npos) {
55- const std::wstring subpath = wpath.substr (0 , pos_slash);
56- const wchar_t * test = subpath.c_str ();
57-
58- const bool success = CreateDirectoryW (test, NULL );
59- if (!success) {
60- const DWORD error = GetLastError ();
61-
62- // if the path already exists, ensure that it's a directory
63- if (error == ERROR_ALREADY_EXISTS) {
64- const DWORD attributes = GetFileAttributesW (subpath.c_str ());
65- if (attributes == INVALID_FILE_ATTRIBUTES || !(attributes & FILE_ATTRIBUTE_DIRECTORY)) {
66- return false ;
67- }
68- }
69- else {
70- return false ;
71- }
72- }
73-
74- pos_slash += 1 ;
75- }
76-
77- return true ;
78- #else
79- // if the path already exists, check whether it's a directory
80- struct stat info;
81- if (stat (path.c_str (), &info) == 0 ) {
82- return S_ISDIR (info.st_mode );
83- }
84-
85- size_t pos_slash = 1 ; // skip leading slashes for directory creation
86-
87- // process path from front to back, procedurally creating directories
88- while ((pos_slash = path.find (' /' , pos_slash)) != std::string::npos) {
89- const std::string subpath = path.substr (0 , pos_slash);
90- struct stat info;
91-
92- // if the path already exists, ensure that it's a directory
93- if (stat (subpath.c_str (), &info) == 0 ) {
94- if (!S_ISDIR (info.st_mode )) {
95- return false ;
96- }
97- }
98- else {
99- // create parent directories
100- const int ret = mkdir (subpath.c_str (), 0755 );
101- if (ret != 0 ) {
102- return false ;
103- }
104- }
105-
106- pos_slash += 1 ;
107- }
108-
109- return true ;
110- #endif // _WIN32
111- }
112-
113- // NOTE: this is copied from common.cpp to avoid linking with libcommon
114- static std::string fs_get_cache_directory () {
115- std::string cache_directory = " " ;
116- auto ensure_trailing_slash = [](std::string p) {
117- // Make sure to add trailing slash
118- if (p.back () != DIRECTORY_SEPARATOR) {
119- p += DIRECTORY_SEPARATOR;
120- }
121- return p;
122- };
123- if (getenv (" LLAMA_CACHE" )) {
124- cache_directory = std::getenv (" LLAMA_CACHE" );
125- }
126- else {
127- #if defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)
128- if (std::getenv (" XDG_CACHE_HOME" )) {
129- cache_directory = std::getenv (" XDG_CACHE_HOME" );
130- }
131- else {
132- cache_directory = std::getenv (" HOME" ) + std::string (" /.cache/" );
133- }
134- #elif defined(__APPLE__)
135- cache_directory = std::getenv (" HOME" ) + std::string (" /Library/Caches/" );
136- #elif defined(_WIN32)
137- cache_directory = std::getenv (" LOCALAPPDATA" );
138- #else
139- # error Unknown architecture
140- #endif
141- cache_directory = ensure_trailing_slash (cache_directory);
142- cache_directory += " llama.cpp" ;
143- }
144- return ensure_trailing_slash (cache_directory);
145- }
14617
14718struct rpc_server_params {
14819 std::string host = " 127.0.0.1" ;
14920 int port = 50052 ;
15021 size_t backend_mem = 0 ;
151- bool use_cache = false ;
152- int n_threads = std::max(1U , std::thread::hardware_concurrency() / 2 );
15322};
15423
155- static void print_usage (int /* argc*/ , char ** argv, rpc_server_params params) {
24+ static void print_usage (int /* argc*/ , char ** argv, rpc_server_params params) {
15625 fprintf (stderr, " Usage: %s [options]\n\n " , argv[0 ]);
15726 fprintf (stderr, " options:\n " );
158- fprintf (stderr, " -h, --help show this help message and exit\n " );
159- fprintf (stderr, " -t, --threads number of threads for the CPU backend (default: %d)\n " , params.n_threads );
160- fprintf (stderr, " -H HOST, --host HOST host to bind to (default: %s)\n " , params.host .c_str ());
161- fprintf (stderr, " -p PORT, --port PORT port to bind to (default: %d)\n " , params.port );
162- fprintf (stderr, " -m MEM, --mem MEM backend memory size (in MB)\n " );
163- fprintf (stderr, " -c, --cache enable local file cache\n " );
27+ fprintf (stderr, " -h, --help show this help message and exit\n " );
28+ fprintf (stderr, " -H HOST, --host HOST host to bind to (default: %s)\n " , params.host .c_str ());
29+ fprintf (stderr, " -p PORT, --port PORT port to bind to (default: %d)\n " , params.port );
30+ fprintf (stderr, " -m MEM, --mem MEM backend memory size (in MB)\n " );
16431 fprintf (stderr, " \n " );
16532}
16633
167- static bool rpc_server_params_parse (int argc, char ** argv, rpc_server_params& params) {
34+ static bool rpc_server_params_parse (int argc, char ** argv, rpc_server_params & params) {
16835 std::string arg;
16936 for (int i = 1 ; i < argc; i++) {
17037 arg = argv[i];
@@ -173,40 +40,23 @@ static bool rpc_server_params_parse(int argc, char** argv, rpc_server_params& pa
17340 return false ;
17441 }
17542 params.host = argv[i];
176- }
177- else if (arg == " -t" || arg == " --threads" ) {
178- if (++i >= argc) {
179- return false ;
180- }
181- params.n_threads = std::stoi (argv[i]);
182- if (params.n_threads <= 0 ) {
183- fprintf (stderr, " error: invalid number of threads: %d\n " , params.n_threads );
184- return false ;
185- }
186- }
187- else if (arg == " -p" || arg == " --port" ) {
43+ } else if (arg == " -p" || arg == " --port" ) {
18844 if (++i >= argc) {
18945 return false ;
19046 }
19147 params.port = std::stoi (argv[i]);
19248 if (params.port <= 0 || params.port > 65535 ) {
19349 return false ;
19450 }
195- }
196- else if (arg == " -c" || arg == " --cache" ) {
197- params.use_cache = true ;
198- }
199- else if (arg == " -m" || arg == " --mem" ) {
51+ } else if (arg == " -m" || arg == " --mem" ) {
20052 if (++i >= argc) {
20153 return false ;
20254 }
20355 params.backend_mem = std::stoul (argv[i]) * 1024 * 1024 ;
204- }
205- else if (arg == " -h" || arg == " --help" ) {
56+ } else if (arg == " -h" || arg == " --help" ) {
20657 print_usage (argc, argv, params);
20758 exit (0 );
208- }
209- else {
59+ } else {
21060 fprintf (stderr, " error: unknown argument: %s\n " , arg.c_str ());
21161 print_usage (argc, argv, params);
21262 exit (0 );
@@ -215,7 +65,7 @@ static bool rpc_server_params_parse(int argc, char** argv, rpc_server_params& pa
21565 return true ;
21666}
21767
218- static ggml_backend_t create_backend (const rpc_server_params& params ) {
68+ static ggml_backend_t create_backend () {
21969 ggml_backend_t backend = NULL ;
22070#ifdef GGML_USE_CUDA
22171 fprintf (stderr, " %s: using CUDA backend\n " , __func__);
@@ -229,36 +79,19 @@ static ggml_backend_t create_backend(const rpc_server_params& params) {
22979 if (!backend) {
23080 fprintf (stderr, " %s: ggml_backend_metal_init() failed\n " , __func__);
23181 }
232- #elif GGML_USE_VULKAN
233- fprintf (stderr, " %s: using Vulkan backend\n " , __func__);
234- backend = ggml_backend_vk_init (0 ); // init device 0
235- if (!backend) {
236- fprintf (stderr, " %s: ggml_backend_vulkan_init() failed\n " , __func__);
237- }
238- #elif GGML_USE_SYCL
239- fprintf (stderr, " %s: using SYCL backend\n " , __func__);
240- backend = ggml_backend_sycl_init (0 ); // init device 0
241- if (!backend) {
242- fprintf (stderr, " %s: ggml_backend_sycl_init() failed\n " , __func__);
243- }
24482#endif
24583
24684 // if there aren't GPU Backends fallback to CPU backend
24785 if (!backend) {
24886 fprintf (stderr, " %s: using CPU backend\n " , __func__);
24987 backend = ggml_backend_cpu_init ();
250- ggml_backend_cpu_set_n_threads (backend, params.n_threads );
25188 }
25289 return backend;
25390}
25491
25592static void get_backend_memory (size_t * free_mem, size_t * total_mem) {
25693#ifdef GGML_USE_CUDA
25794 ggml_backend_cuda_get_device_memory (0 , free_mem, total_mem);
258- #elif GGML_USE_VULKAN
259- ggml_backend_vk_get_device_memory (0 , free_mem, total_mem);
260- #elif GGML_USE_SYCL
261- ggml_backend_sycl_get_device_memory (0 , free_mem, total_mem);
26295#else
26396 #ifdef _WIN32
26497 MEMORYSTATUSEX status;
@@ -292,7 +125,7 @@ int main(int argc, char * argv[]) {
292125 fprintf (stderr, " \n " );
293126 }
294127
295- ggml_backend_t backend = create_backend (params );
128+ ggml_backend_t backend = create_backend ();
296129 if (!backend) {
297130 fprintf (stderr, " Failed to create backend\n " );
298131 return 1 ;
@@ -302,28 +135,11 @@ int main(int argc, char * argv[]) {
302135 if (params.backend_mem > 0 ) {
303136 free_mem = params.backend_mem ;
304137 total_mem = params.backend_mem ;
305- }
306- else {
138+ } else {
307139 get_backend_memory (&free_mem, &total_mem);
308140 }
309- const char * cache_dir = nullptr ;
310- std::string cache_dir_str;
311- if (params.use_cache ) {
312- cache_dir_str = fs_get_cache_directory () + " rpc/" ;
313- if (!fs_create_directory_with_parents (cache_dir_str)) {
314- fprintf (stderr, " Failed to create cache directory: %s\n " , cache_dir_str.c_str ());
315- return 1 ;
316- }
317- cache_dir = cache_dir_str.c_str ();
318- }
319- printf (" Starting RPC server v%d.%d.%d\n " ,
320- RPC_PROTO_MAJOR_VERSION,
321- RPC_PROTO_MINOR_VERSION,
322- RPC_PROTO_PATCH_VERSION);
323- printf (" endpoint : %s\n " , endpoint.c_str ());
324- printf (" local cache : %s\n " , cache_dir ? cache_dir : " n/a" );
325- printf (" backend memory : %zu MB\n " , free_mem / (1024 * 1024 ));
326- ggml_backend_rpc_start_server (backend, endpoint.c_str (), cache_dir, free_mem, total_mem);
141+ printf (" Starting RPC server on %s, backend memory: %zu MB\n " , endpoint.c_str (), free_mem / (1024 * 1024 ));
142+ start_rpc_server (backend, endpoint.c_str (), free_mem, total_mem);
327143 ggml_backend_free (backend);
328144 return 0 ;
329145}
0 commit comments