Skip to content

Commit 65b04b7

Browse files
committed
fix
1 parent 66c5d28 commit 65b04b7

File tree

1 file changed

+3
-1
lines changed

1 file changed

+3
-1
lines changed

lightllm/server/router/manager.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ def __init__(self, args, router_port, detokenization_port, metric_port):
9898
self.stats_tool = Stats(not args.disable_log_stats, args.log_stats_interval)
9999
self.metric_client = MetricClient(metric_port)
100100
self.is_pd_run_mode = self.args.run_mode in ["prefill", "decode"]
101+
self.is_pd_decode_mode = self.args.run_mode == "decode"
101102
# p d 分离模式下,需要调度锁来同步调度端和推理端的一些数据操作
102103
# 主要是为了防止调度失误,造成 OOM 等错误
103104
self.router_lock = mp.Lock()
@@ -249,7 +250,8 @@ async def loop_for_fwd(
249250
f"dp_i {d_i} token used ratio: {token_ratio1} not contain prompt cache tree unrefed token\n"
250251
f"dp_i {d_i} token used ratio: {token_ratio2} contain prompt cache tree unrefed token"
251252
)
252-
self.req_queue.update_token_load(self.running_batch, force_update=False)
253+
# pd decode mode need to update token_load more frequently
254+
self.req_queue.update_token_load(self.running_batch, force_update=self.is_pd_decode_mode)
253255
self.stats_tool.print_stats()
254256
self.metric_client.gauge_set("lightllm_batch_current_size", len(self.running_batch.reqs))
255257
self.metric_client.gauge_set("lightllm_batch_pause_size", self.req_queue.get_paused_req_num())

0 commit comments

Comments
 (0)