Skip to content

Commit 366f4d4

Browse files
committed
fix
1 parent f1224be commit 366f4d4

File tree

1 file changed

+1
-3
lines changed
  • lightllm/server/router/model_infer/mode_backend/dp_backend

1 file changed

+1
-3
lines changed

lightllm/server/router/model_infer/mode_backend/dp_backend/impl.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ def __init__(self) -> None:
3636
self.prefill = self.prefill_overlap_mtp
3737
else:
3838
self.prefill = self.prefill_mtp
39-
4039
if self.enable_decode_microbatch_overlap:
4140
self.decode = self.decode_overlap_mtp
4241
else:
@@ -543,7 +542,6 @@ def prefill_overlap_mtp(self, event_pack: OverlapEventPack, prefill_reqs: List[I
543542
run_reqs1,
544543
padded_req_num1,
545544
) = padded_overlap_prepare_prefill_inputs(prefill_reqs, is_multimodal=self.is_multimodal)
546-
print(micro_input0, micro_input1)
547545
with torch.cuda.stream(g_infer_context.get_overlap_stream()):
548546
micro_output0, micro_output1 = self.model.microbatch_overlap_prefill(micro_input0, micro_input1)
549547
logits0 = micro_output0.logits
@@ -622,7 +620,6 @@ def prefill_overlap_mtp(self, event_pack: OverlapEventPack, prefill_reqs: List[I
622620

623621
event_pack.notify_forward_and_wait_post_handle()
624622
sync_event.synchronize()
625-
print(next_token_ids_cpu)
626623

627624
self._post_handle(
628625
run_reqs=run_reqs,
@@ -767,6 +764,7 @@ def decode_overlap_mtp(self, event_pack: OverlapEventPack, decode_reqs: List[Inf
767764
g_infer_state_lock.acquire()
768765
g_infer_context.req_manager.mem_manager.free(need_free_mem_indexes)
769766
g_infer_state_lock.release()
767+
event_pack.notify_pre_post_handle()
770768
else:
771769
event_pack.notify_post_handle_and_wait_pre_post_handle()
772770
event_pack.notify_forward_and_wait_post_handle()

0 commit comments

Comments
 (0)