Skip to content

Commit fe19299

Browse files
author
wangzaijun
committed
fix
1 parent 8c1ab26 commit fe19299

File tree

1 file changed

+5
-1
lines changed
  • lightllm/server/core/objs

1 file changed

+5
-1
lines changed

lightllm/server/core/objs/req.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,11 @@ def get_decode_need_tokens(self):
317317
"""
318318
# 当开启 mtp 模式以后,每一次 decode 需要的 token 数量会增加
319319
need_tokens = min(self.input_len + self.shm_cur_output_len - self.shm_cur_kv_len, self.chunked_prefill_size)
320-
if need_tokens == 1:
320+
if need_tokens == 1 and self._mtp_step > 0:
321+
# self._mtp_step > 0 时,说明开启了mtp 模式,每次decode需要额外的mem token 资源
322+
# "deepseekv3_vanilla" 模式需要的 mem 用量为 self._mtp_step + 1
323+
# "deepseekv3_eagle" 模式需要的 mem 用量为 (self._mtp_step + 1)* 2
324+
# 为了简化统一 返回 (self._mtp_step + 1)* 2
321325
need_tokens = (self._mtp_step + 1) * 2
322326

323327
return need_tokens

0 commit comments

Comments
 (0)