2222
2323#include  " ggml-rpc.h" 
2424#ifdef  _WIN32
25+ #  define  NOMINMAX 
2526#  define  DIRECTORY_SEPARATOR  ' \\ ' 
2627#  include  < locale> 
2728#  include  < windows.h> 
3738#include  < stdio.h> 
3839#include  < vector> 
3940#include  < filesystem> 
41+ #include  < algorithm> 
42+ #include  < thread> 
4043
4144namespace  fs  =  std::filesystem;
4245
@@ -150,12 +153,14 @@ struct rpc_server_params {
150153    int          port        = 50052 ;
151154    size_t       backend_mem = 0 ;
152155    bool         use_cache   = false ;
156+     int          n_threads   = std::max(1U , std::thread::hardware_concurrency()/2 );
153157};
154158
155159static  void  print_usage (int  /* argc*/  , char  ** argv, rpc_server_params params) {
156160    fprintf (stderr, " Usage: %s [options]\n\n "  , argv[0 ]);
157161    fprintf (stderr, " options:\n "  );
158162    fprintf (stderr, "   -h, --help                show this help message and exit\n "  );
163+     fprintf (stderr, "   -t,      --threads        number of threads for the CPU backend (default: %d)\n "  , params.n_threads );
159164    fprintf (stderr, "   -H HOST, --host HOST      host to bind to (default: %s)\n "  , params.host .c_str ());
160165    fprintf (stderr, "   -p PORT, --port PORT      port to bind to (default: %d)\n "  , params.port );
161166    fprintf (stderr, "   -m MEM,  --mem MEM        backend memory size (in MB)\n "  );
@@ -172,6 +177,15 @@ static bool rpc_server_params_parse(int argc, char ** argv, rpc_server_params &
172177                return  false ;
173178            }
174179            params.host  = argv[i];
180+         } else  if  (arg == " -t"   || arg == " --threads"  ) {
181+             if  (++i >= argc) {
182+                 return  false ;
183+             }
184+             params.n_threads  = std::stoi (argv[i]);
185+             if  (params.n_threads  <= 0 ) {
186+                 fprintf (stderr, " error: invalid number of threads: %d\n "  , params.n_threads );
187+                 return  false ;
188+             }
175189        } else  if  (arg == " -p"   || arg == " --port"  ) {
176190            if  (++i >= argc) {
177191                return  false ;
@@ -199,7 +213,7 @@ static bool rpc_server_params_parse(int argc, char ** argv, rpc_server_params &
199213    return  true ;
200214}
201215
202- static  ggml_backend_t  create_backend () {
216+ static  ggml_backend_t  create_backend (const  rpc_server_params & params ) {
203217    ggml_backend_t  backend = NULL ;
204218#ifdef  GGML_USE_CUDA
205219    fprintf (stderr, " %s: using CUDA backend\n "  , __func__);
@@ -231,6 +245,7 @@ static ggml_backend_t create_backend() {
231245    if  (!backend) {
232246        fprintf (stderr, " %s: using CPU backend\n "  , __func__);
233247        backend = ggml_backend_cpu_init ();
248+         ggml_backend_cpu_set_n_threads (backend, params.n_threads );
234249    }
235250    return  backend;
236251}
@@ -275,7 +290,7 @@ int main(int argc, char * argv[]) {
275290        fprintf (stderr, " \n "  );
276291    }
277292
278-     ggml_backend_t  backend = create_backend ();
293+     ggml_backend_t  backend = create_backend (params );
279294    if  (!backend) {
280295        fprintf (stderr, " Failed to create backend\n "  );
281296        return  1 ;
0 commit comments