3737#include < stdio.h>
3838#include < vector>
3939#include < filesystem>
40+ #include < algorithm>
41+ #include < thread>
4042
4143namespace fs = std::filesystem;
4244
@@ -150,12 +152,14 @@ struct rpc_server_params {
150152 int port = 50052 ;
151153 size_t backend_mem = 0 ;
152154 bool use_cache = false ;
155+ int n_threads = std::max(1U , std::thread::hardware_concurrency()/2 );
153156};
154157
155158static void print_usage (int /* argc*/ , char ** argv, rpc_server_params params) {
156159 fprintf (stderr, " Usage: %s [options]\n\n " , argv[0 ]);
157160 fprintf (stderr, " options:\n " );
158161 fprintf (stderr, " -h, --help show this help message and exit\n " );
162+ fprintf (stderr, " -t, --threads number of threads for the CPU backend (default: %d)\n " , params.n_threads );
159163 fprintf (stderr, " -H HOST, --host HOST host to bind to (default: %s)\n " , params.host .c_str ());
160164 fprintf (stderr, " -p PORT, --port PORT port to bind to (default: %d)\n " , params.port );
161165 fprintf (stderr, " -m MEM, --mem MEM backend memory size (in MB)\n " );
@@ -172,6 +176,15 @@ static bool rpc_server_params_parse(int argc, char ** argv, rpc_server_params &
172176 return false ;
173177 }
174178 params.host = argv[i];
179+ } else if (arg == " -t" || arg == " --threads" ) {
180+ if (++i >= argc) {
181+ return false ;
182+ }
183+ params.n_threads = std::stoi (argv[i]);
184+ if (params.n_threads <= 0 ) {
185+ fprintf (stderr, " error: invalid number of threads: %d\n " , params.n_threads );
186+ return false ;
187+ }
175188 } else if (arg == " -p" || arg == " --port" ) {
176189 if (++i >= argc) {
177190 return false ;
@@ -199,7 +212,7 @@ static bool rpc_server_params_parse(int argc, char ** argv, rpc_server_params &
199212 return true ;
200213}
201214
202- static ggml_backend_t create_backend () {
215+ static ggml_backend_t create_backend (const rpc_server_params & params ) {
203216 ggml_backend_t backend = NULL ;
204217#ifdef GGML_USE_CUDA
205218 fprintf (stderr, " %s: using CUDA backend\n " , __func__);
@@ -231,6 +244,7 @@ static ggml_backend_t create_backend() {
231244 if (!backend) {
232245 fprintf (stderr, " %s: using CPU backend\n " , __func__);
233246 backend = ggml_backend_cpu_init ();
247+ ggml_backend_cpu_set_n_threads (backend, params.n_threads );
234248 }
235249 return backend;
236250}
@@ -275,7 +289,7 @@ int main(int argc, char * argv[]) {
275289 fprintf (stderr, " \n " );
276290 }
277291
278- ggml_backend_t backend = create_backend ();
292+ ggml_backend_t backend = create_backend (params );
279293 if (!backend) {
280294 fprintf (stderr, " Failed to create backend\n " );
281295 return 1 ;
0 commit comments