Skip to content

Commit a6b1258

Browse files
author
wangzaijun
committed
fix
1 parent fc8d209 commit a6b1258

File tree

1 file changed

+1
-2
lines changed
  • lightllm/server/router/model_infer/mode_backend/chunked_prefill

1 file changed

+1
-2
lines changed

lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -342,12 +342,11 @@ def _draft_decode_vanilla(
342342
all_next_token_ids = []
343343
all_next_token_ids.append(next_token_ids)
344344
# process the draft model output
345-
for _step in range(self.mtp_step):
345+
for draft_model_idx in range(self.mtp_step):
346346

347347
draft_model_input.input_ids = draft_next_token_ids
348348
draft_model_input.deepseekv3_mtp_draft_input_hiddens = draft_model_output.deepseekv3_mtp_main_output_hiddens
349349
# spec decode: MTP
350-
draft_model_idx = _step % self.num_mtp_models
351350
draft_model_output: ModelOutput = self.draft_models[draft_model_idx].forward(draft_model_input)
352351
draft_next_token_ids = self._gen_argmax_token_ids(draft_model_output)
353352
all_next_token_ids.append(draft_next_token_ids)

0 commit comments

Comments
 (0)