fix pd

shihaobai · shihaobai · commit a0bf0ff22b70 · 2025-03-27T19:53:30.000+08:00
diff --git a/lightllm/server/httpserver_for_pd_master/manager.py b/lightllm/server/httpserver_for_pd_master/manager.py
@@ -203,7 +203,7 @@ async def fetch_stream(
                     if old_max_new_tokens != 1:
                         finish_status = FinishStatus(FinishStatus.NO_FINISH)
                     else:
-                        finish_status = FinishStatus(finish_status)
+                        finish_status = FinishStatus(FinishStatus.FINISHED_LENGTH)
                     # 得到 p 节点返回的 prompt_ids 信息
                     if metadata.get("prompt_ids", None) is not None:
                         prompt_ids = metadata.get("prompt_ids")
diff --git a/lightllm/server/router/manager.py b/lightllm/server/router/manager.py
@@ -459,7 +459,12 @@ def start_router_process(args, router_port, detokenization_port, metric_port, pi
         raise
 
     pipe_writer.send("init ok")
+
+    def handle_exception(loop, context):
+        logger.exception(f"Router Caught exception: {str(context)}")
+
     loop = asyncio.new_event_loop()
+    loop.set_exception_handler(handle_exception)
     asyncio.set_event_loop(loop)
     loop.create_task(router.loop_for_fwd())
     loop.run_until_complete(router.loop_for_netio_req())
diff --git a/lightllm/server/router/req_queue/continues_batch/impl_for_pd_decode.py b/lightllm/server/router/req_queue/continues_batch/impl_for_pd_decode.py
@@ -58,12 +58,15 @@ def generate_new_batch(self, current_batch: Batch, limit_router_queue_length: in
     def _calcu_batch_token_load_batch_not_none(self, current_batch: Batch):
         is_busy = self.is_busy()
         self._init_cache_list(current_batch, is_busy)
-        self.cache_len_list.sort(key=lambda x: -x[1])
-        left_out_len_array = np.array([e[1] for e in self.cache_len_list])
-        has_run_len_array = np.array([e[0] for e in self.cache_len_list])
-        cum_run_len_array = np.cumsum(has_run_len_array)
-        size_array = np.arange(1, len(self.cache_len_list) + 1, 1)
-        need_max_token_num = (left_out_len_array * size_array + cum_run_len_array).max()
+        if len(self.cache_len_list) == 0:
+            self.cache_len_list.sort(key=lambda x: -x[1])
+            left_out_len_array = np.array([e[1] for e in self.cache_len_list])
+            has_run_len_array = np.array([e[0] for e in self.cache_len_list])
+            cum_run_len_array = np.cumsum(has_run_len_array)
+            size_array = np.arange(1, len(self.cache_len_list) + 1, 1)
+            need_max_token_num = (left_out_len_array * size_array + cum_run_len_array).max()
+        else:
+            need_max_token_num = 0
         with g_router_lock.obj:
             return (
                 need_max_token_num,