Skip to content

Commit 765330e

Browse files
committed
update log & fix dp pause
1 parent 558be53 commit 765330e

File tree

3 files changed

+9
-9
lines changed

3 files changed

+9
-9
lines changed

lightllm/server/router/manager.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -244,17 +244,19 @@ async def loop_for_fwd(
244244
estimated_peak_token_count = self.shared_token_load.get_estimated_peak_token_count(d_i)
245245
logger.debug(
246246
f"dp_i {d_i} current batch size: {len(self.running_batch.reqs)} \n"
247-
f"dp_i {d_i} paused req num: {self.req_queue.get_paused_req_num()} \n"
247+
f"dp_i {d_i} paused req num: {self.req_queue.get_paused_req_num(d_i)} \n"
248248
f"dp_i {d_i} frozen token num: {frozen_token_num} \n"
249249
f"dp_i {d_i} estimated_peak_token_count: {estimated_peak_token_count} \n"
250250
f"dp_i {d_i} token used ratio: {token_ratio1} not contain prompt cache tree unrefed token\n"
251251
f"dp_i {d_i} token used ratio: {token_ratio2} contain prompt cache tree unrefed token"
252252
)
253+
self.metric_client.gauge_set(
254+
"lightllm_batch_pause_size", self.req_queue.get_paused_req_num(d_i)
255+
)
253256
# pd decode mode need to update token_load more frequently
254257
self.req_queue.update_token_load(self.running_batch, force_update=self.is_pd_decode_mode)
255258
self.stats_tool.print_stats()
256259
self.metric_client.gauge_set("lightllm_batch_current_size", len(self.running_batch.reqs))
257-
self.metric_client.gauge_set("lightllm_batch_pause_size", self.req_queue.get_paused_req_num())
258260
self.metric_client.gauge_set("lightllm_queue_size", self.req_queue.get_wait_req_num())
259261
self.metric_client.gauge_set(
260262
"lightllm_batch_current_max_tokens",
@@ -358,15 +360,13 @@ async def _step(self):
358360

359361
# Check if need pause some requests for decode.
360362
for dp_index in range(self.dp_size_in_node):
361-
if self._can_decode(self.running_batch, dp_index=dp_index):
362-
continue
363-
else:
363+
while not self._can_decode(self.running_batch, dp_index=dp_index):
364364
# pause strategy
365365
paused_reqs = select_paused_reqs(
366366
self.running_batch, self.pause_strategy, self.req_queue, self.max_total_token_num, dp_index=dp_index
367367
)
368368
await self._pause_reqs(paused_reqs)
369-
logger.debug(f"DP index {dp_index} pasues req num: {self.req_queue.get_paused_req_num()}")
369+
logger.debug(f"DP index {dp_index} pasues req num: {self.req_queue.get_paused_req_num(dp_index)}")
370370
self.has_wait_tokens = 0
371371

372372
# Decode

lightllm/server/router/req_queue/base_queue.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def extend(self, req_group: List[Req]):
3838
self.waiting_req_list.extend(req_group)
3939
return
4040

41-
def get_paused_req_num(self):
41+
def get_paused_req_num(self, fake_dp_index: int = 0):
4242
return len(self.pause_req_dict)
4343

4444
def get_wait_req_num(self):

lightllm/server/router/req_queue/dp_base_queue.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ def get_dp_queue(self, dp_index: int):
2626
assert dp_index < self.dp_size_in_node, "dp index out of range"
2727
return self.inner_queues[dp_index]
2828

29-
def get_paused_req_num(self):
30-
return sum(queue.get_paused_req_num() for queue in self.inner_queues)
29+
def get_paused_req_num(self, dp_index: int = 0):
30+
return self.inner_queues[dp_index].get_paused_req_num()
3131

3232
def get_wait_req_num(self):
3333
return sum(queue.get_wait_req_num() for queue in self.inner_queues)

0 commit comments

Comments
 (0)