File tree Expand file tree Collapse file tree 1 file changed +6
-5
lines changed Expand file tree Collapse file tree 1 file changed +6
-5
lines changed Original file line number Diff line number Diff line change @@ -3543,18 +3543,19 @@ struct server_context {
35433543
35443544 const int tok_idx = slot.i_batch - i;
35453545
3546- // This should only trigger on a non-empty update batch once, after prompt processing but not during token generation
3547- if (slot.has_mtp ) {
3548- mtp_update_kv_cache (ctx, slot.mtp_kv_update_batch );
3549- }
3550-
35513546 llama_token id = common_sampler_sample (slot.smpl , ctx, tok_idx);
35523547 slot.last_tok_idx = tok_idx;
3548+ SRV_INF (" main loop sampled token: '%s'\n " , common_token_to_piece (ctx, id, true ).c_str ());
35533549
35543550 slot.i_batch = -1 ;
35553551
35563552 common_sampler_accept (slot.smpl , id, true );
35573553
3554+ // This should only trigger on a non-empty update batch once, after prompt processing but not during token generation
3555+ if (slot.has_mtp ) {
3556+ mtp_update_kv_cache (ctx, slot.mtp_kv_update_batch );
3557+ }
3558+
35583559 slot.n_decoded += 1 ;
35593560
35603561 const int64_t t_current = ggml_time_us ();
You can’t perform that action at this time.
0 commit comments