3737#include < stdio.h>
3838#include < vector>
3939#include < filesystem>
40+ #include < thread>
4041
4142namespace fs = std::filesystem;
4243
@@ -150,12 +151,14 @@ struct rpc_server_params {
150151 int port = 50052 ;
151152 size_t backend_mem = 0 ;
152153 bool use_cache = false ;
154+ int n_threads = std::max(1U , std::thread::hardware_concurrency()/2 );
153155};
154156
155157static void print_usage (int /* argc*/ , char ** argv, rpc_server_params params) {
156158 fprintf (stderr, " Usage: %s [options]\n\n " , argv[0 ]);
157159 fprintf (stderr, " options:\n " );
158160 fprintf (stderr, " -h, --help show this help message and exit\n " );
161+ fprintf (stderr, " -t, --threads number of threads for the CPU backend (default: %d)\n " , params.n_threads );
159162 fprintf (stderr, " -H HOST, --host HOST host to bind to (default: %s)\n " , params.host .c_str ());
160163 fprintf (stderr, " -p PORT, --port PORT port to bind to (default: %d)\n " , params.port );
161164 fprintf (stderr, " -m MEM, --mem MEM backend memory size (in MB)\n " );
@@ -172,6 +175,15 @@ static bool rpc_server_params_parse(int argc, char ** argv, rpc_server_params &
172175 return false ;
173176 }
174177 params.host = argv[i];
178+ } else if (arg == " -t" || arg == " --threads" ) {
179+ if (++i >= argc) {
180+ return false ;
181+ }
182+ params.n_threads = std::stoi (argv[i]);
183+ if (params.n_threads <= 0 ) {
184+ fprintf (stderr, " error: invalid number of threads: %d\n " , params.n_threads );
185+ return false ;
186+ }
175187 } else if (arg == " -p" || arg == " --port" ) {
176188 if (++i >= argc) {
177189 return false ;
@@ -199,7 +211,7 @@ static bool rpc_server_params_parse(int argc, char ** argv, rpc_server_params &
199211 return true ;
200212}
201213
202- static ggml_backend_t create_backend () {
214+ static ggml_backend_t create_backend (const rpc_server_params & params ) {
203215 ggml_backend_t backend = NULL ;
204216#ifdef GGML_USE_CUDA
205217 fprintf (stderr, " %s: using CUDA backend\n " , __func__);
@@ -231,6 +243,7 @@ static ggml_backend_t create_backend() {
231243 if (!backend) {
232244 fprintf (stderr, " %s: using CPU backend\n " , __func__);
233245 backend = ggml_backend_cpu_init ();
246+ ggml_backend_cpu_set_n_threads (backend, params.n_threads );
234247 }
235248 return backend;
236249}
@@ -275,7 +288,7 @@ int main(int argc, char * argv[]) {
275288 fprintf (stderr, " \n " );
276289 }
277290
278- ggml_backend_t backend = create_backend ();
291+ ggml_backend_t backend = create_backend (params );
279292 if (!backend) {
280293 fprintf (stderr, " Failed to create backend\n " );
281294 return 1 ;
0 commit comments