|
37 | 37 | #include <stdio.h> |
38 | 38 | #include <vector> |
39 | 39 | #include <filesystem> |
| 40 | +#include <thread> |
40 | 41 |
|
41 | 42 | namespace fs = std::filesystem; |
42 | 43 |
|
@@ -150,12 +151,14 @@ struct rpc_server_params { |
150 | 151 | int port = 50052; |
151 | 152 | size_t backend_mem = 0; |
152 | 153 | bool use_cache = false; |
| 154 | + int n_threads = std::max(1U, std::thread::hardware_concurrency()/2); |
153 | 155 | }; |
154 | 156 |
|
155 | 157 | static void print_usage(int /*argc*/, char ** argv, rpc_server_params params) { |
156 | 158 | fprintf(stderr, "Usage: %s [options]\n\n", argv[0]); |
157 | 159 | fprintf(stderr, "options:\n"); |
158 | 160 | fprintf(stderr, " -h, --help show this help message and exit\n"); |
| 161 | + fprintf(stderr, " -t, --threads number of threads for the CPU backend (default: %d)\n", params.n_threads); |
159 | 162 | fprintf(stderr, " -H HOST, --host HOST host to bind to (default: %s)\n", params.host.c_str()); |
160 | 163 | fprintf(stderr, " -p PORT, --port PORT port to bind to (default: %d)\n", params.port); |
161 | 164 | fprintf(stderr, " -m MEM, --mem MEM backend memory size (in MB)\n"); |
@@ -199,7 +202,7 @@ static bool rpc_server_params_parse(int argc, char ** argv, rpc_server_params & |
199 | 202 | return true; |
200 | 203 | } |
201 | 204 |
|
202 | | -static ggml_backend_t create_backend() { |
| 205 | +static ggml_backend_t create_backend(const rpc_server_params & params) { |
203 | 206 | ggml_backend_t backend = NULL; |
204 | 207 | #ifdef GGML_USE_CUDA |
205 | 208 | fprintf(stderr, "%s: using CUDA backend\n", __func__); |
@@ -231,6 +234,7 @@ static ggml_backend_t create_backend() { |
231 | 234 | if (!backend) { |
232 | 235 | fprintf(stderr, "%s: using CPU backend\n", __func__); |
233 | 236 | backend = ggml_backend_cpu_init(); |
| 237 | + ggml_backend_cpu_set_n_threads(backend, params.n_threads); |
234 | 238 | } |
235 | 239 | return backend; |
236 | 240 | } |
@@ -275,7 +279,7 @@ int main(int argc, char * argv[]) { |
275 | 279 | fprintf(stderr, "\n"); |
276 | 280 | } |
277 | 281 |
|
278 | | - ggml_backend_t backend = create_backend(); |
| 282 | + ggml_backend_t backend = create_backend(params); |
279 | 283 | if (!backend) { |
280 | 284 | fprintf(stderr, "Failed to create backend\n"); |
281 | 285 | return 1; |
|
0 commit comments