Skip to content

Commit b22f77a

Browse files
committed
fix
1 parent 8a36651 commit b22f77a

File tree

2 files changed

+1
-26
lines changed

2 files changed

+1
-26
lines changed

lightllm/server/router/model_infer/mode_backend/base_backend.py

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -398,31 +398,6 @@ def _post_handle(
398398
)
399399
return
400400

401-
# 一些可以复用的通用功能函数
402-
def _overlap_req_init_and_filter(
403-
self, uninit_reqs: List[InferReq], ok_finished_reqs: List[InferReq], clear_list=False
404-
):
405-
if uninit_reqs or ok_finished_reqs:
406-
# 利用推理的时间,延迟折叠下一个请求的初始化和退出操作
407-
with torch.cuda.stream(g_infer_context.get_overlap_stream()):
408-
if ok_finished_reqs:
409-
g_infer_state_lock.acquire()
410-
g_infer_context.filter_reqs(ok_finished_reqs)
411-
g_infer_state_lock.release()
412-
413-
if uninit_reqs:
414-
g_infer_state_lock.acquire()
415-
self._post_init_reqs(uninit_reqs)
416-
g_infer_state_lock.release()
417-
418-
torch.cuda.current_stream().wait_stream(g_infer_context.get_overlap_stream())
419-
420-
if clear_list:
421-
uninit_reqs.clear()
422-
ok_finished_reqs.clear()
423-
424-
return
425-
426401
# 一些可以复用的通用功能函数
427402
def _filter_reqs(self, reqs: List[InferReq]):
428403
if reqs:

lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def normal_decode(
8686

8787
# 第二阶段
8888
event_pack.notify_post_handle_and_wait_pre_post_handle()
89-
update_packs = self._pre_post_handle(run_reqs, is_chuncked_mode=not self.disable_chunked_prefill)
89+
update_packs = self._pre_post_handle(run_reqs, is_chuncked_mode=False)
9090

9191
# 第三阶段
9292
event_pack.notify_forward_and_wait_post_handle()

0 commit comments

Comments
 (0)