Skip to content

Commit a0bf0ff

Browse files
committed
fix pd
1 parent 1b3bea1 commit a0bf0ff

File tree

3 files changed

+15
-7
lines changed

3 files changed

+15
-7
lines changed

lightllm/server/httpserver_for_pd_master/manager.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ async def fetch_stream(
203203
if old_max_new_tokens != 1:
204204
finish_status = FinishStatus(FinishStatus.NO_FINISH)
205205
else:
206-
finish_status = FinishStatus(finish_status)
206+
finish_status = FinishStatus(FinishStatus.FINISHED_LENGTH)
207207
# 得到 p 节点返回的 prompt_ids 信息
208208
if metadata.get("prompt_ids", None) is not None:
209209
prompt_ids = metadata.get("prompt_ids")

lightllm/server/router/manager.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,7 +459,12 @@ def start_router_process(args, router_port, detokenization_port, metric_port, pi
459459
raise
460460

461461
pipe_writer.send("init ok")
462+
463+
def handle_exception(loop, context):
464+
logger.exception(f"Router Caught exception: {str(context)}")
465+
462466
loop = asyncio.new_event_loop()
467+
loop.set_exception_handler(handle_exception)
463468
asyncio.set_event_loop(loop)
464469
loop.create_task(router.loop_for_fwd())
465470
loop.run_until_complete(router.loop_for_netio_req())

lightllm/server/router/req_queue/continues_batch/impl_for_pd_decode.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,12 +58,15 @@ def generate_new_batch(self, current_batch: Batch, limit_router_queue_length: in
5858
def _calcu_batch_token_load_batch_not_none(self, current_batch: Batch):
5959
is_busy = self.is_busy()
6060
self._init_cache_list(current_batch, is_busy)
61-
self.cache_len_list.sort(key=lambda x: -x[1])
62-
left_out_len_array = np.array([e[1] for e in self.cache_len_list])
63-
has_run_len_array = np.array([e[0] for e in self.cache_len_list])
64-
cum_run_len_array = np.cumsum(has_run_len_array)
65-
size_array = np.arange(1, len(self.cache_len_list) + 1, 1)
66-
need_max_token_num = (left_out_len_array * size_array + cum_run_len_array).max()
61+
if len(self.cache_len_list) == 0:
62+
self.cache_len_list.sort(key=lambda x: -x[1])
63+
left_out_len_array = np.array([e[1] for e in self.cache_len_list])
64+
has_run_len_array = np.array([e[0] for e in self.cache_len_list])
65+
cum_run_len_array = np.cumsum(has_run_len_array)
66+
size_array = np.arange(1, len(self.cache_len_list) + 1, 1)
67+
need_max_token_num = (left_out_len_array * size_array + cum_run_len_array).max()
68+
else:
69+
need_max_token_num = 0
6770
with g_router_lock.obj:
6871
return (
6972
need_max_token_num,

0 commit comments

Comments
 (0)