2424#endif
2525#include < string>
2626#include < stdio.h>
27+ #include < vector>
28+ #include < filesystem>
29+
30+ namespace fs = std::filesystem;
31+
32+ const int MAX_GGUF_PATH = 16 ;
2733
2834struct rpc_server_params {
29- std::string host = " 127.0.0.1" ;
30- int port = 50052 ;
31- size_t backend_mem = 0 ;
35+ std::string host = " 127.0.0.1" ;
36+ int port = 50052 ;
37+ size_t backend_mem = 0 ;
38+ std::string cache_dir = " " ;
39+ std::vector<std::string> gguf_path;
3240};
3341
3442static void print_usage (int /* argc*/ , char ** argv, rpc_server_params params) {
3543 fprintf (stderr, " Usage: %s [options]\n\n " , argv[0 ]);
3644 fprintf (stderr, " options:\n " );
37- fprintf (stderr, " -h, --help show this help message and exit\n " );
38- fprintf (stderr, " -H HOST, --host HOST host to bind to (default: %s)\n " , params.host .c_str ());
39- fprintf (stderr, " -p PORT, --port PORT port to bind to (default: %d)\n " , params.port );
40- fprintf (stderr, " -m MEM, --mem MEM backend memory size (in MB)\n " );
45+ fprintf (stderr, " -h, --help show this help message and exit\n " );
46+ fprintf (stderr, " -H HOST, --host HOST host to bind to (default: %s)\n " , params.host .c_str ());
47+ fprintf (stderr, " -p PORT, --port PORT port to bind to (default: %d)\n " , params.port );
48+ fprintf (stderr, " -f PATH, --gguf PATH path to GGUF file\n " );
49+ fprintf (stderr, " -d DIR, --cache-dir DIR local cache dir\n " );
50+ fprintf (stderr, " -m MEM, --mem MEM backend memory size (in MB)\n " );
4151 fprintf (stderr, " \n " );
4252}
4353
@@ -58,6 +68,30 @@ static bool rpc_server_params_parse(int argc, char ** argv, rpc_server_params &
5868 if (params.port <= 0 || params.port > 65535 ) {
5969 return false ;
6070 }
71+ } else if (arg == " -f" || arg == " --gguf" ) {
72+ if (++i >= argc) {
73+ return false ;
74+ }
75+ if (params.gguf_path .size () >= MAX_GGUF_PATH) {
76+ fprintf (stderr, " error: too many GGUF files\n " );
77+ return false ;
78+ }
79+ fs::path gguf_path (argv[i]);
80+ if (!fs::is_regular_file (gguf_path)) {
81+ fprintf (stderr, " error: GGUF file does not exist: %s\n " , gguf_path.c_str ());
82+ return false ;
83+ }
84+ params.gguf_path .push_back (argv[i]);
85+ } else if (arg == " -d" || arg == " --cache-dir" ) {
86+ if (++i >= argc) {
87+ return false ;
88+ }
89+ fs::path cache_dir (argv[i]);
90+ if (!fs::is_directory (cache_dir)) {
91+ fprintf (stderr, " error: cache dir does not exist: %s\n " , cache_dir.c_str ());
92+ return false ;
93+ }
94+ params.cache_dir = argv[i];
6195 } else if (arg == " -m" || arg == " --mem" ) {
6296 if (++i >= argc) {
6397 return false ;
@@ -164,8 +198,14 @@ int main(int argc, char * argv[]) {
164198 } else {
165199 get_backend_memory (&free_mem, &total_mem);
166200 }
201+ const char * gguf_path[MAX_GGUF_PATH] = {0 };
202+ int n_gguf_path = params.gguf_path .size ();
203+ for (int i = 0 ; i < n_gguf_path; i++) {
204+ gguf_path[i] = params.gguf_path [i].c_str ();
205+ }
206+ const char * cache_dir = params.cache_dir .empty () ? nullptr : params.cache_dir .c_str ();
167207 printf (" Starting RPC server on %s, backend memory: %zu MB\n " , endpoint.c_str (), free_mem / (1024 * 1024 ));
168- ggml_backend_rpc_start_server (backend, endpoint.c_str (), free_mem, total_mem);
208+ ggml_backend_rpc_start_server (backend, endpoint.c_str (), cache_dir, n_gguf_path, gguf_path, free_mem, total_mem);
169209 ggml_backend_free (backend);
170210 return 0 ;
171211}
0 commit comments