@@ -97,13 +97,13 @@ def _prepare_decode_task(self, request):
97
97
98
98
def _prepare_preempt_task (self , request ):
99
99
return ScheduledPreemptTask (idx = request .idx , request_id = request .request_id )
100
-
100
+
101
101
def reschedule_preempt_task (self , request_id ):
102
102
with self .lock :
103
103
if request_id in self .to_be_rescheduled_request_id_set and request_id in self .requests :
104
104
request = self .requests [request_id ]
105
105
self .waiting .appendleft (request )
106
- self .to_be_rescheduled_request_id_set .remove (request_id )
106
+ self .to_be_rescheduled_request_id_set .remove (request_id )
107
107
108
108
def _trigger_preempt (self , request , num_new_blocks , preempted_reqs , scheduled_reqs ):
109
109
can_schedule = True
@@ -142,26 +142,31 @@ def _get_num_new_tokens(self, request, token_budget):
142
142
143
143
input_ids_lst = request .prompt_token_ids + request .output_token_ids
144
144
input_ids = paddle .to_tensor (input_ids_lst , dtype = "int64" )
145
- grid_thw = []
146
- for one in inputs ["grid_thw" ]:
147
- if one [0 ] == 1 :
148
- grid_thw .append (one )
149
- else :
150
- grid_thw .extend ([[2 , one [1 ], one [2 ]]] * (one [0 ] // 2 ))
151
-
145
+ input_ids = paddle .to_tensor (input_ids_lst , dtype = "int64" )
152
146
image_patch_id = inputs ["image_patch_id" ]
153
- grid_thw = paddle . to_tensor ( grid_thw , dtype = "int64" )
147
+
154
148
if request .multimodal_img_boundaries is None :
149
+ grid_thw = []
150
+ for one in inputs ["grid_thw" ]:
151
+ if one [0 ] == 1 :
152
+ grid_thw .append (one )
153
+ else :
154
+ grid_thw .extend ([[2 , one [1 ], one [2 ]]] * (one [0 ] // 2 ))
155
+
156
+ grid_thw = paddle .to_tensor (grid_thw , dtype = "int64" )
155
157
from fastdeploy .model_executor .ops .gpu import get_img_boundaries
156
158
157
159
request .multimodal_img_boundaries = get_img_boundaries (
158
160
task_input_ids = input_ids , grid_thw = grid_thw , image_patch_id = image_patch_id
159
161
).numpy ()
160
162
163
+ grid_thw = grid_thw .numpy ().reshape ([- 1 , 3 ])
164
+ inputs ["grid_thw" ] = grid_thw
165
+
166
+ grid_thw = inputs ["grid_thw" ]
161
167
img_boundaries_idx = request .multimodal_img_boundaries [0 ]
162
168
img_num_per_boundary = request .multimodal_img_boundaries [1 ]
163
169
ori_prompt_len = img_boundaries_idx [- 1 ].item ()
164
- grid_thw = grid_thw .numpy ().reshape ([- 1 , 3 ])
165
170
pre_end_idx = request .num_computed_tokens
166
171
new_end_idx = pre_end_idx + num_new_tokens
167
172
if new_end_idx < ori_prompt_len and input_ids [new_end_idx - 1 ] == image_patch_id :
@@ -421,9 +426,15 @@ def finish_requests(self, request_ids: Union[str, Iterable[str]]):
421
426
self .running .remove (request )
422
427
request .status = RequestStatus .FINISHED
423
428
self ._free_blocks (request )
424
- if request .request_id in self .to_be_rescheduled_request_id_set : # finished after preempted, blocks have been recycled.
425
- self .to_be_rescheduled_request_id_set .remove (request .request_id ) # just remove from to_be_rescheduled_request_id_set
426
- if request in self .waiting : # after finished, this request still scheduled from preempted to waiting, unexpected error, should not be here
429
+ if (
430
+ request .request_id in self .to_be_rescheduled_request_id_set
431
+ ): # finished after preempted, blocks have been recycled.
432
+ self .to_be_rescheduled_request_id_set .remove (
433
+ request .request_id
434
+ ) # just remove from to_be_rescheduled_request_id_set
435
+ if (
436
+ request in self .waiting
437
+ ): # after finished, this request still scheduled from preempted to waiting, unexpected error, should not be here
427
438
raise RuntimeError (f"request { request .request_id } scheduled into waiting list, after finished" )
428
439
self .tasks_list [request .idx ] = None
429
440
self .stop_flags [request .idx ] = True
0 commit comments