Update ray_trainer.py

Kunlun-Zhu · Kunlun-Zhu · commit 276b43333349 · 2025-03-20T14:51:52.000-05:00
diff --git a/verl/trainer/ppo/ray_trainer.py b/verl/trainer/ppo/ray_trainer.py
@@ -40,7 +40,7 @@
 from verl.utils.seqlen_balancing import get_seqlen_balanced_partitions, log_seqlen_unbalance
 
 import re
-from search_r1.llm_agent.generation import LLMGenerationManager, GenerationConfig
+from openmanus_rl.llm_agent.openmanus import OpenManusAgent, AgentConfig
 
 WorkerType = Type[Worker]
 
@@ -444,7 +444,7 @@ def _validate(self):
         reward_tensor_lst = []
         data_source_lst = []
 
-        gen_config = GenerationConfig(
+        gen_config = AgentConfig(
             max_turns=self.config.max_turns,
             max_start_length=self.config.data.max_start_length,
             max_prompt_length=self.config.data.max_prompt_length,
@@ -457,7 +457,7 @@ def _validate(self):
         )
 
         # Agent config preparation
-        generation_manager = LLMGenerationManager(
+        generation_manager = OpenManusAgent(
             tokenizer=self.tokenizer,
             actor_rollout_wg=self.actor_rollout_wg,
             config=gen_config,
@@ -675,7 +675,7 @@ def fit(self):
         self.global_steps += 1
 
         # Agent config preparation
-        gen_config = GenerationConfig(
+        gen_config = AgentConfig(
             max_turns=self.config.max_turns,
             max_start_length=self.config.data.max_start_length,
             max_prompt_length=self.config.data.max_prompt_length,
@@ -687,7 +687,7 @@ def fit(self):
             topk = self.config.retriever.topk,
         )
 
-        generation_manager = LLMGenerationManager(
+        generation_manager = OpenManusAgent(
             tokenizer=self.tokenizer,
             actor_rollout_wg=self.actor_rollout_wg,
             config=gen_config,