2222
2323#include " ggml-rpc.h"
2424#ifdef _WIN32
25+ # define NOMINMAX
2526# define DIRECTORY_SEPARATOR ' \\ '
2627# include < locale>
2728# include < windows.h>
3738#include < stdio.h>
3839#include < vector>
3940#include < filesystem>
41+ #include < algorithm>
42+ #include < thread>
4043
4144namespace fs = std::filesystem;
4245
@@ -150,12 +153,14 @@ struct rpc_server_params {
150153 int port = 50052 ;
151154 size_t backend_mem = 0 ;
152155 bool use_cache = false ;
156+ int n_threads = std::max(1U , std::thread::hardware_concurrency()/2 );
153157};
154158
155159static void print_usage (int /* argc*/ , char ** argv, rpc_server_params params) {
156160 fprintf (stderr, " Usage: %s [options]\n\n " , argv[0 ]);
157161 fprintf (stderr, " options:\n " );
158162 fprintf (stderr, " -h, --help show this help message and exit\n " );
163+ fprintf (stderr, " -t, --threads number of threads for the CPU backend (default: %d)\n " , params.n_threads );
159164 fprintf (stderr, " -H HOST, --host HOST host to bind to (default: %s)\n " , params.host .c_str ());
160165 fprintf (stderr, " -p PORT, --port PORT port to bind to (default: %d)\n " , params.port );
161166 fprintf (stderr, " -m MEM, --mem MEM backend memory size (in MB)\n " );
@@ -172,6 +177,15 @@ static bool rpc_server_params_parse(int argc, char ** argv, rpc_server_params &
172177 return false ;
173178 }
174179 params.host = argv[i];
180+ } else if (arg == " -t" || arg == " --threads" ) {
181+ if (++i >= argc) {
182+ return false ;
183+ }
184+ params.n_threads = std::stoi (argv[i]);
185+ if (params.n_threads <= 0 ) {
186+ fprintf (stderr, " error: invalid number of threads: %d\n " , params.n_threads );
187+ return false ;
188+ }
175189 } else if (arg == " -p" || arg == " --port" ) {
176190 if (++i >= argc) {
177191 return false ;
@@ -199,7 +213,7 @@ static bool rpc_server_params_parse(int argc, char ** argv, rpc_server_params &
199213 return true ;
200214}
201215
202- static ggml_backend_t create_backend () {
216+ static ggml_backend_t create_backend (const rpc_server_params & params ) {
203217 ggml_backend_t backend = NULL ;
204218#ifdef GGML_USE_CUDA
205219 fprintf (stderr, " %s: using CUDA backend\n " , __func__);
@@ -231,6 +245,7 @@ static ggml_backend_t create_backend() {
231245 if (!backend) {
232246 fprintf (stderr, " %s: using CPU backend\n " , __func__);
233247 backend = ggml_backend_cpu_init ();
248+ ggml_backend_cpu_set_n_threads (backend, params.n_threads );
234249 }
235250 return backend;
236251}
@@ -275,7 +290,7 @@ int main(int argc, char * argv[]) {
275290 fprintf (stderr, " \n " );
276291 }
277292
278- ggml_backend_t backend = create_backend ();
293+ ggml_backend_t backend = create_backend (params );
279294 if (!backend) {
280295 fprintf (stderr, " Failed to create backend\n " );
281296 return 1 ;
0 commit comments