fix: fix detached llm actors

ChenZiHong-Gavin · ChenZiHong-Gavin · commit a6d32d7b475c · 2025-12-22T13:51:06.000+08:00
diff --git a/graphgen/common/init_llm.py b/graphgen/common/init_llm.py
@@ -4,7 +4,6 @@
 import ray
 
 from graphgen.bases import BaseLLMWrapper
-from graphgen.common.init_storage import get_actor_handle
 from graphgen.models import Tokenizer
 
 
@@ -74,9 +73,9 @@ class LLMServiceProxy(BaseLLMWrapper):
     A proxy class to interact with the LLMServiceActor for distributed LLM operations.
     """
 
-    def __init__(self, actor_name: str):
+    def __init__(self, actor_handle: ray.actor.ActorHandle):
         super().__init__()
-        self.actor_handle = get_actor_handle(actor_name)
+        self.actor_handle = actor_handle
         self._create_local_tokenizer()
 
     async def generate_answer(
@@ -128,25 +127,25 @@ def create_llm(
 
         actor_name = f"Actor_LLM_{model_type}"
         try:
-            ray.get_actor(actor_name)
+            actor_handle = ray.get_actor(actor_name)
+            print(f"Using existing Ray actor: {actor_name}")
         except ValueError:
             print(f"Creating Ray actor for LLM {model_type} with backend {backend}.")
             num_gpus = float(config.pop("num_gpus", 0))
-            actor = (
+            actor_handle = (
                 ray.remote(LLMServiceActor)
                 .options(
                     name=actor_name,
                     num_gpus=num_gpus,
-                    lifetime="detached",
                     get_if_exists=True,
                 )
                 .remote(backend, config)
             )
 
             # wait for actor to be ready
-            ray.get(actor.ready.remote())
+            ray.get(actor_handle.ready.remote())
 
-        return LLMServiceProxy(actor_name)
+        return LLMServiceProxy(actor_handle)
 
 
 def _load_env_group(prefix: str) -> Dict[str, Any]:
diff --git a/graphgen/engine.py b/graphgen/engine.py
@@ -10,6 +10,7 @@
 
 from graphgen.bases import Config, Node
 from graphgen.utils import logger
+from graphgen.common import init_llm
 
 
 class Engine:
@@ -20,6 +21,7 @@ def __init__(
         self.global_params = self.config.global_params
         self.functions = functions
         self.datasets: Dict[str, ray.data.Dataset] = {}
+        self.llm_actors = {}
 
         ctx = DataContext.get_current()
         ctx.enable_rich_progress_bars = False
@@ -37,6 +39,13 @@ def __init__(
                 **ray_init_kwargs,
             )
             logger.info("Ray Dashboard URL: %s", context.dashboard_url)
+        
+        self._init_llms()
+    
+    def _init_llms(self):
+        self.llm_actors["synthesizer"] = init_llm("synthesizer")
+        self.llm_actors["trainee"] = init_llm("trainee")
+
 
     @staticmethod
     def _topo_sort(nodes: List[Node]) -> List[Node]: