Skip to content

Commit ffc1ad4

Browse files
committed
fix
1 parent 50591d3 commit ffc1ad4

File tree

5 files changed

+19
-8
lines changed

5 files changed

+19
-8
lines changed

lightllm/server/router/model_infer/mode_backend/base_backend.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ def __init__(self) -> None:
4646

4747
self.enable_decode_microbatch_overlap = get_env_start_args().enable_decode_microbatch_overlap
4848
self.enable_prefill_microbatch_overlap = get_env_start_args().enable_prefill_microbatch_overlap
49+
50+
# 控制分类的参数变量
51+
self.classed_req_no_decode = False
52+
self.classed_req_strict_prefill = False
4953
pass
5054

5155
def init_model(self, kvargs):

lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
class ChunkedPrefillBackend(ModeBackend):
2626
def __init__(self) -> None:
2727
super().__init__()
28-
28+
2929
# 用于控制每一步是执行prefill 和 decode 还是跳过
3030
self.control_state_machine = ControlState()
3131

@@ -47,11 +47,14 @@ def infer_loop(self):
4747

4848
self._try_read_new_reqs()
4949

50-
prefill_reqs, decode_reqs = self._get_classed_reqs(recover_paused=self.control_state_machine.try_recover_paused_reqs())
51-
52-
run_way = self.control_state_machine.select_run_way(prefill_reqs=prefill_reqs,
53-
decode_reqs=decode_reqs)
54-
50+
prefill_reqs, decode_reqs = self._get_classed_reqs(
51+
no_decode=self.classed_req_no_decode,
52+
strict_prefill=self.classed_req_strict_prefill,
53+
recover_paused=self.control_state_machine.try_recover_paused_reqs(),
54+
)
55+
56+
run_way = self.control_state_machine.select_run_way(prefill_reqs=prefill_reqs, decode_reqs=decode_reqs)
57+
5558
if run_way.is_prefill():
5659
self.prefill(
5760
event_pack=event_pack,
@@ -69,7 +72,7 @@ def infer_loop(self):
6972
event_pack.notify_forward_and_wait_post_handle()
7073
event_pack.notify_pre_post_handle()
7174
continue
72-
75+
7376
except BaseException as e:
7477
self.logger.exception(str(e))
7578
raise e

lightllm/server/router/model_infer/mode_backend/continues_batch/pd_mode/prefill_node_impl/prefill_impl.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ def __init__(self, info_queue: mp.Queue, mem_queue: mp.Queue) -> None:
2424
super().__init__()
2525
self.info_queue: mp.Queue = info_queue
2626
self.mem_queue: mp.Queue = mem_queue
27+
self.classed_req_no_decode = True
2728

2829
def init_custom(self):
2930

lightllm/server/router/model_infer/mode_backend/continues_batch/pd_mode/prefill_node_impl/prefill_impl_for_dp.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ def __init__(self, info_queue: mp.Queue, mem_queue: mp.Queue) -> None:
1313
super().__init__()
1414
self.info_queue: mp.Queue = info_queue
1515
self.mem_queue: mp.Queue = mem_queue
16+
self.classed_req_no_decode = True
1617

1718
def init_custom(self):
1819
ChunckedPrefillForPrefillNode.init_custom(self)

lightllm/server/router/model_infer/mode_backend/dp_backend/impl.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,9 @@ def infer_loop(self):
5959
self._try_read_new_reqs()
6060

6161
prefill_reqs, decode_reqs = self._get_classed_reqs(
62-
recover_paused=self.control_state_machine.try_recover_paused_reqs()
62+
no_decode=self.classed_req_no_decode,
63+
strict_prefill=self.classed_req_strict_prefill,
64+
recover_paused=self.control_state_machine.try_recover_paused_reqs(),
6365
)
6466

6567
dp_prefill_req_nums, dp_decode_req_nums = self._dp_all_gather_prefill_and_decode_req_num(

0 commit comments

Comments
 (0)