We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 8c1ab26 commit fe19299Copy full SHA for fe19299
lightllm/server/core/objs/req.py
@@ -317,7 +317,11 @@ def get_decode_need_tokens(self):
317
"""
318
# 当开启 mtp 模式以后,每一次 decode 需要的 token 数量会增加
319
need_tokens = min(self.input_len + self.shm_cur_output_len - self.shm_cur_kv_len, self.chunked_prefill_size)
320
- if need_tokens == 1:
+ if need_tokens == 1 and self._mtp_step > 0:
321
+ # self._mtp_step > 0 时,说明开启了mtp 模式,每次decode需要额外的mem token 资源
322
+ # "deepseekv3_vanilla" 模式需要的 mem 用量为 self._mtp_step + 1
323
+ # "deepseekv3_eagle" 模式需要的 mem 用量为 (self._mtp_step + 1)* 2
324
+ # 为了简化统一 返回 (self._mtp_step + 1)* 2
325
need_tokens = (self._mtp_step + 1) * 2
326
327
return need_tokens
0 commit comments