Add a switch for Qwen3 think mode (#37)

pan-x-c · web-flow · commit 9a29a9619d0a · 2025-05-15T14:19:51.000+08:00
diff --git a/trinity/common/config.py b/trinity/common/config.py
@@ -91,6 +91,8 @@ class ModelConfig:
     max_response_tokens: int = 2048
     # The checkpoint directory, contains a latest dir link and multiple checkpoint dirs.
     checkpoint_path: str = ""
+    # for models support both thinking and non-thinking mode, e.g., Qwen3
+    enable_thinking: bool = False
 
 
 @dataclass
diff --git a/trinity/common/models/vllm_async_model.py b/trinity/common/models/vllm_async_model.py
@@ -65,6 +65,7 @@ def __init__(
             output_kind=RequestOutputKind.FINAL_ONLY,
             logprobs=config.explorer.logprobs,
         )
+        self.enable_thinking = config.model.enable_thinking
         self.request_id = 0
         engine_args = vllm.AsyncEngineArgs(
             model=config.model.model_path,
@@ -137,6 +138,7 @@ async def chat_async(self, messages: List[Dict], **kwargs) -> List[Experience]:
                 tokenize=False,
                 add_generation_prompt=True,
                 chat_template=self.chat_template,
+                enable_thinking=self.enable_thinking,
             )
         return await self.generate_async(prompt=prompt, **kwargs)
 
diff --git a/trinity/common/models/vllm_model.py b/trinity/common/models/vllm_model.py
@@ -71,6 +71,7 @@ def __init__(self, config: Config, **kwargs):
         )
         self.tokenizer = self.llm.get_tokenizer()
         self.chat_template = self.tokenizer.get_chat_template()
+        self.enable_thinking = config.model.enable_thinking
         if self.config.explorer.chat_template:
             self.chat_template = self.config.explorer.chat_template
         if not re.search(r"\{\%-?\s*generation\s*-?\%\}", self.chat_template):
@@ -233,6 +234,7 @@ def chat(self, messages: List[dict], **kwargs) -> List[Experience]:
                 tokenize=False,
                 add_generation_prompt=True,
                 chat_template=self.chat_template,
+                enable_thinking=self.enable_thinking,
             )
         return self.generate([prompt], **kwargs)
 
diff --git a/trinity/explorer/runner_pool.py b/trinity/explorer/runner_pool.py
@@ -49,13 +49,15 @@ def __init__(self, config: Config, models: List):
         self._create_actors(config.explorer.runner_num)
 
     def _create_actors(self, num: int = 1):
+        new_actors = []
         for _ in range(num):
             engine_index = self.engine_status.index(min(self.engine_status))
             new_actor = WorkflowRunner.remote(self.config, self.models[engine_index])
-            ray.get(new_actor.__ray_ready__.remote())
+            new_actors.append(new_actor)
             self.engine_status[engine_index] += 1
             self.actor_to_engine_index[new_actor] = engine_index
-            self._return_actor(new_actor)
+        for actor in new_actors:
+            self._return_actor(actor)
 
     def _kill_actors(self, actors):
         if not isinstance(actors, list):
@@ -234,7 +236,7 @@ def get_next(self) -> Status:
 
     def _return_actor(self, actor):
         try:
-            actor.is_alive.remote()
+            ray.get(actor.is_alive.remote())
             self._idle_actors.append(actor)
         except Exception:
             self.logger.info("The actor is not alive, restart a new actor")

Original file line number	Diff line number	Diff line change
`@@ -65,6 +65,7 @@ def __init__(`
`65`	`65`	`output_kind=RequestOutputKind.FINAL_ONLY,`
`66`	`66`	`logprobs=config.explorer.logprobs,`
`67`	`67`	`)`
	`68`	`+ self.enable_thinking = config.model.enable_thinking`
`68`	`69`	`self.request_id = 0`
`69`	`70`	`engine_args = vllm.AsyncEngineArgs(`
`70`	`71`	`model=config.model.model_path,`
`@@ -137,6 +138,7 @@ async def chat_async(self, messages: List[Dict], **kwargs) -> List[Experience]:`
`137`	`138`	`tokenize=False,`
`138`	`139`	`add_generation_prompt=True,`
`139`	`140`	`chat_template=self.chat_template,`
	`141`	`+ enable_thinking=self.enable_thinking,`
`140`	`142`	`)`
`141`	`143`	`return await self.generate_async(prompt=prompt, **kwargs)`
`142`	`144`
Original file line number	Diff line number	Diff line change
`@@ -71,6 +71,7 @@ def __init__(self, config: Config, **kwargs):`
`71`	`71`	`)`
`72`	`72`	`self.tokenizer = self.llm.get_tokenizer()`
`73`	`73`	`self.chat_template = self.tokenizer.get_chat_template()`
	`74`	`+ self.enable_thinking = config.model.enable_thinking`
`74`	`75`	`if self.config.explorer.chat_template:`
`75`	`76`	`self.chat_template = self.config.explorer.chat_template`
`76`	`77`	`if not re.search(r"\{\%-?\sgeneration\s-?\%\}", self.chat_template):`
`@@ -233,6 +234,7 @@ def chat(self, messages: List[dict], **kwargs) -> List[Experience]:`
`233`	`234`	`tokenize=False,`
`234`	`235`	`add_generation_prompt=True,`
`235`	`236`	`chat_template=self.chat_template,`
	`237`	`+ enable_thinking=self.enable_thinking,`
`236`	`238`	`)`
`237`	`239`	`return self.generate([prompt], **kwargs)`
`238`	`240`