Skip to content

Commit 0ba40c3

Browse files
committed
server : add helper function slot.can_speculate()
ggml-ci
1 parent 156aa6d commit 0ba40c3

File tree

1 file changed

+6
-3
lines changed

1 file changed

+6
-3
lines changed

examples/server/server.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,10 @@ struct server_slot {
241241
return state != SLOT_STATE_IDLE;
242242
}
243243

244+
bool can_speculate() const {
245+
return ctx_dft && params.speculative.n_max > 0 && params.cache_prompt;
246+
}
247+
244248
void add_token(const completion_token_output & token) {
245249
if (!is_processing()) {
246250
SLT_WRN(*this, "%s", "slot is not processing\n");
@@ -1270,7 +1274,7 @@ struct server_context {
12701274
{"min_keep", slot.params.sampling.min_keep},
12711275
{"grammar", slot.params.sampling.grammar},
12721276
{"samplers", samplers},
1273-
{"speculative", slot.params.speculative.model.empty() ? false : true},
1277+
{"speculative", slot.can_speculate()},
12741278
{"speculative.n_max", slot.params.speculative.n_max},
12751279
{"speculative.n_min", slot.params.speculative.n_min},
12761280
{"speculative.p_min", slot.params.speculative.p_min},
@@ -2302,11 +2306,10 @@ struct server_context {
23022306
}
23032307

23042308
// check if the slot supports speculative decoding
2305-
if (!slot.ctx_dft || slot.params.speculative.n_max <= 0 || !slot.params.cache_prompt) {
2309+
if (!slot.can_speculate()) {
23062310
continue;
23072311
}
23082312

2309-
// TODO: configurable through requests
23102313
struct common_speculative_params params_spec;
23112314
params_spec.n_draft = slot.params.speculative.n_max;
23122315
params_spec.n_reuse = llama_n_ctx(slot.ctx_dft) - slot.params.speculative.n_max;

0 commit comments

Comments
 (0)