We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent a69f846 commit 7a0a88dCopy full SHA for 7a0a88d
examples/server/server.cpp
@@ -3249,7 +3249,7 @@ struct server_context {
3249
3250
const int tok_idx = slot.i_batch - i;
3251
3252
- llama_token id = common_sampler_sample(slot.smpl, ctx, tok_idx);
+ llama_token id = common_sampler_sample(slot.smpl, ctx, tok_idx, true);
3253
3254
slot.i_batch = -1;
3255
@@ -3347,7 +3347,7 @@ struct server_context {
3347
llama_decode(ctx, slot.batch_spec);
3348
3349
// the accepted tokens from the speculation
3350
- const auto ids = common_sampler_sample_and_accept_n(slot.smpl, ctx, draft);
+ const auto ids = common_sampler_sample_and_accept_n(slot.smpl, ctx, draft, true);
3351
3352
slot.n_past += ids.size();
3353
slot.n_decoded += ids.size();
0 commit comments