File tree Expand file tree Collapse file tree 2 files changed +7
-1
lines changed Expand file tree Collapse file tree 2 files changed +7
-1
lines changed Original file line number Diff line number Diff line change @@ -1028,7 +1028,10 @@ def create_engine_config(self) -> FDConfig:
1028
1028
if paddle .is_compiled_with_xpu ():
1029
1029
self .max_num_batched_tokens = self .max_model_len
1030
1030
else :
1031
- self .max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM
1031
+ if speculative_cfg is not None and speculative_cfg .method is not None :
1032
+ self .max_num_batched_tokens = self .max_model_len
1033
+ else :
1034
+ self .max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM
1032
1035
else :
1033
1036
if self .enable_chunked_prefill :
1034
1037
self .max_num_batched_tokens = 2048
Original file line number Diff line number Diff line change @@ -332,6 +332,9 @@ def _process_batch_output(self):
332
332
+ accept_num [i ]
333
333
].tolist ()
334
334
if len (token_ids ) == 0 or token_ids [- 1 ] <= 0 :
335
+ if envs .ENABLE_V1_KVCACHE_SCHEDULER :
336
+ if task_id in self .resource_manager .to_be_rescheduled_request_id_set :
337
+ self .resource_manager .reschedule_preempt_task (task_id )
335
338
continue
336
339
else :
337
340
token_id = int (tokens [i , 0 ])
You can’t perform that action at this time.
0 commit comments