Skip to content

Commit b66cdce

Browse files
authored
enable -td and -tbd for server
1 parent cd6983d commit b66cdce

File tree

2 files changed

+5
-2
lines changed

2 files changed

+5
-2
lines changed

common/arg.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3134,7 +3134,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
31343134
params.speculative.cpuparams.n_threads = std::thread::hardware_concurrency();
31353135
}
31363136
}
3137-
).set_examples({LLAMA_EXAMPLE_SPECULATIVE}));
3137+
).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}));
31383138
add_opt(common_arg(
31393139
{"-tbd", "--threads-batch-draft"}, "N",
31403140
"number of threads to use during batch and prompt processing (default: same as --threads-draft)",
@@ -3144,7 +3144,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
31443144
params.speculative.cpuparams_batch.n_threads = std::thread::hardware_concurrency();
31453145
}
31463146
}
3147-
).set_examples({LLAMA_EXAMPLE_SPECULATIVE}));
3147+
).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}));
31483148
add_opt(common_arg(
31493149
{"-Cd", "--cpu-mask-draft"}, "M",
31503150
"Draft model CPU affinity mask. Complements cpu-range-draft (default: same as --cpu-mask)",

tools/server/server.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2011,6 +2011,9 @@ struct server_context {
20112011
params_dft.cache_type_k = params_base.speculative.cache_type_k;
20122012
params_dft.cache_type_v = params_base.speculative.cache_type_v;
20132013

2014+
params_dft.cpuparams.n_threads = params_base.speculative.cpuparams.n_threads;
2015+
params_dft.cpuparams_batch.n_threads = params_base.speculative.cpuparams_batch.n_threads;
2016+
20142017
llama_init_dft = common_init_from_params(params_dft);
20152018

20162019
model_dft = llama_init_dft.model.get();

0 commit comments

Comments
 (0)