Skip to content

Commit e2b68b3

Browse files
authored
fix mtp in rl (#4234)
1 parent 8a50650 commit e2b68b3

File tree

2 files changed

+7
-1
lines changed

2 files changed

+7
-1
lines changed

fastdeploy/engine/args_utils.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1028,7 +1028,10 @@ def create_engine_config(self) -> FDConfig:
10281028
if paddle.is_compiled_with_xpu():
10291029
self.max_num_batched_tokens = self.max_model_len
10301030
else:
1031-
self.max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM
1031+
if speculative_cfg is not None and speculative_cfg.method is not None:
1032+
self.max_num_batched_tokens = self.max_model_len
1033+
else:
1034+
self.max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM
10321035
else:
10331036
if self.enable_chunked_prefill:
10341037
self.max_num_batched_tokens = 2048

fastdeploy/output/token_processor.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,9 @@ def _process_batch_output(self):
332332
+ accept_num[i]
333333
].tolist()
334334
if len(token_ids) == 0 or token_ids[-1] <= 0:
335+
if envs.ENABLE_V1_KVCACHE_SCHEDULER:
336+
if task_id in self.resource_manager.to_be_rescheduled_request_id_set:
337+
self.resource_manager.reschedule_preempt_task(task_id)
335338
continue
336339
else:
337340
token_id = int(tokens[i, 0])

0 commit comments

Comments
 (0)