@@ -137,13 +137,23 @@ def _trigger_preempt(self, request, num_new_blocks, preempted_reqs, scheduled_re
137
137
preempted_req = self .running .pop ()
138
138
preempted_req .status = RequestStatus .PREEMPTED
139
139
preempted_req .num_computed_tokens = 0
140
- self ._free_blocks (preempted_req )
141
- preempted_req .cached_block_num = 0
142
- self .to_be_rescheduled_request_id_set .add (preempted_req .request_id )
140
+ if self .config .scheduler_config .splitwise_role == "decode" :
141
+ self .tasks_list [preempted_req .idx ] = None
142
+ self .stop_flags [preempted_req .idx ] = True
143
+ if preempted_req .request_id in self .requests :
144
+ del self .requests [preempted_req .request_id ]
145
+ if preempted_req .request_id in self .req_dict :
146
+ del self .req_dict [preempted_req .request_id ]
147
+ self ._free_blocks (preempted_req )
148
+ main_process_metrics .num_requests_running .dec (1 )
149
+ else :
150
+ self ._free_blocks (preempted_req )
151
+ preempted_req .cached_block_num = 0
152
+ self .to_be_rescheduled_request_id_set .add (preempted_req .request_id )
153
+ main_process_metrics .num_requests_waiting .inc (1 )
154
+ main_process_metrics .num_requests_running .dec (1 )
143
155
preempted_reqs .append (preempted_req )
144
156
scheduled_reqs .append (self ._prepare_preempt_task (preempted_req ))
145
- main_process_metrics .num_requests_waiting .inc (1 )
146
- main_process_metrics .num_requests_running .dec (1 )
147
157
if preempted_req == request :
148
158
# No more request to preempt.
149
159
can_schedule = False
@@ -588,8 +598,10 @@ def prerelease_resource(self, request: Request):
588
598
with self .lock :
589
599
self .tasks_list [request .idx ] = None
590
600
self .stop_flags [request .idx ] = True
591
- del self .requests [request .request_id ]
592
- del self .req_dict [request .request_id ]
601
+ if request .request_id in self .requests :
602
+ del self .requests [request .request_id ]
603
+ if request .request_id in self .req_dict :
604
+ del self .req_dict [request .request_id ]
593
605
self ._free_blocks (request )
594
606
595
607
def add_request_in_p (self , requests : list [Request ]):
0 commit comments