Skip to content

Commit 7a0a88d

Browse files
server: apply grammar before other samplers
1 parent a69f846 commit 7a0a88d

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

examples/server/server.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3249,7 +3249,7 @@ struct server_context {
32493249

32503250
const int tok_idx = slot.i_batch - i;
32513251

3252-
llama_token id = common_sampler_sample(slot.smpl, ctx, tok_idx);
3252+
llama_token id = common_sampler_sample(slot.smpl, ctx, tok_idx, true);
32533253

32543254
slot.i_batch = -1;
32553255

@@ -3347,7 +3347,7 @@ struct server_context {
33473347
llama_decode(ctx, slot.batch_spec);
33483348

33493349
// the accepted tokens from the speculation
3350-
const auto ids = common_sampler_sample_and_accept_n(slot.smpl, ctx, draft);
3350+
const auto ids = common_sampler_sample_and_accept_n(slot.smpl, ctx, draft, true);
33513351

33523352
slot.n_past += ids.size();
33533353
slot.n_decoded += ids.size();

0 commit comments

Comments
 (0)