@@ -100,13 +100,13 @@ def __init__(self, args: StartArgs, router_port, detokenization_port, metric_por
100100 # 主要是为了防止调度失误,造成 OOM 等错误
101101 self .router_lock = mp .Lock ()
102102 g_router_lock .obj = self .router_lock
103+ return
103104
104- # 调度和推理进行折叠使用的线程池
105+ async def wait_to_model_ready (self ):
106+ # 调度使用的对象
105107 self .schedule_new_batch : Batch = None
106108 self .schedule_event = asyncio .Event ()
107- return
108109
109- async def wait_to_model_ready (self ):
110110 # 初始化模型
111111 self .model_rpc_servers = []
112112 # 用于 kv move 管理进程 和 推理进程进行task信息的交互。
@@ -426,6 +426,13 @@ def start_router_process(args, router_port, detokenization_port, metric_port, pi
426426 graceful_registry (inspect .currentframe ().f_code .co_name )
427427 start_parent_check_thread ()
428428
429+ def handle_exception (loop , context ):
430+ logger .exception (f"Router Caught exception: { str (context )} " )
431+
432+ loop = asyncio .new_event_loop ()
433+ loop .set_exception_handler (handle_exception )
434+ asyncio .set_event_loop (loop )
435+
429436 try :
430437 router = RouterManager (
431438 args ,
@@ -434,7 +441,7 @@ def start_router_process(args, router_port, detokenization_port, metric_port, pi
434441 metric_port = metric_port ,
435442 )
436443
437- asyncio . run (router .wait_to_model_ready ())
444+ loop . run_until_complete (router .wait_to_model_ready ())
438445 except :
439446 import traceback
440447 import sys
@@ -447,14 +454,6 @@ def start_router_process(args, router_port, detokenization_port, metric_port, pi
447454 raise
448455
449456 pipe_writer .send ("init ok" )
450-
451- def handle_exception (loop , context ):
452- logger .exception (f"Router Caught exception: { str (context )} " )
453-
454- loop = asyncio .new_event_loop ()
455- loop .set_exception_handler (handle_exception )
456- asyncio .set_event_loop (loop )
457-
458457 loop .create_task (router .loop_for_fwd ())
459458 loop .run_until_complete (router .loop_for_netio_req ())
460459 return
0 commit comments