fix: update gaia env

realtmxi · realtmxi · commit 0b3c6eb0533f · 2025-09-21T14:29:14.000Z
diff --git a/openmanus_rl/environments/env_package/tool_use/envs.py b/openmanus_rl/environments/env_package/tool_use/envs.py
@@ -16,10 +16,10 @@ class ToolUseEnv:
     Provides tasks from dataset and handles tool execution.
     """
     
-    def __init__(self, tasks_data: List[Dict], available_tools: List[str], seed: int = 42):
+    def __init__(self, tasks_data: List[Dict], available_tools: List[str], seed: int = 42, model_string: str = None):
         self.tasks_data = tasks_data
         self.available_tools = available_tools
-        self.tool_manager = ToolManager(available_tools)
+        self.tool_manager = ToolManager(available_tools, model_string=model_string)
         self.current_task_idx = 0
         self.seed = seed
         random.seed(seed)
@@ -65,7 +65,7 @@ class ToolUseEnvs:
     """
     
     def __init__(self, tasks_data: List[Dict], available_tools: List[str], 
-                 seed: int = 0, env_num: int = 1, group_n: int = 1, is_train: bool = True):
+                 seed: int = 0, env_num: int = 1, group_n: int = 1, is_train: bool = True, model_string: str = None):
         self.tasks_data = tasks_data
         self.available_tools = available_tools
         self.num_processes = env_num * group_n
@@ -75,7 +75,7 @@ def __init__(self, tasks_data: List[Dict], available_tools: List[str],
         # Create individual environments
         self.envs = []
         for i in range(self.num_processes):
-            env = ToolUseEnv(tasks_data, available_tools, seed + i)
+            env = ToolUseEnv(tasks_data, available_tools, seed + i, model_string=model_string)
             self.envs.append(env)
         
         # Track current task indices for each environment
@@ -121,8 +121,9 @@ def close(self):
 class ToolManager:
     """Manages available tools and their execution"""
     
-    def __init__(self, tool_names: List[str]):
+    def __init__(self, tool_names: List[str], model_string: str = None):
         self.tool_names = tool_names
+        self.model_string = model_string
         self.available_tools = {}
         self._load_tools()
     
@@ -140,7 +141,7 @@ def _load_tool(self, tool_name: str):
         tool_mapping = {
             'google_search': 'openmanus_rl.tools.google_search.tool.Google_Search_Tool',
             'wikipedia_knowledge_searcher': 'openmanus_rl.tools.wikipedia_knowledge_searcher.tool.Wikipedia_Knowledge_Searcher_Tool',
-            'arxiv_paper_searcher': 'openmanus_rl.tools.arxiv_paper_searcher.tool.Arxiv_Paper_Searcher_Tool',
+            'arxiv_paper_searcher': 'openmanus_rl.tools.arxiv_paper_searcher.tool.ArXiv_Paper_Searcher_Tool',
             'pubmed_search': 'openmanus_rl.tools.pubmed_search.tool.Pubmed_Search_Tool',
             'url_text_extractor': 'openmanus_rl.tools.url_text_extractor.tool.URL_Text_Extractor_Tool',
             'python_code_generator': 'openmanus_rl.tools.python_code_generator.tool.Python_Code_Generator_Tool',
@@ -150,13 +151,20 @@ def _load_tool(self, tool_name: str):
             print(f"Unknown tool: {tool_name}, skipping...")
             return
         
+        print(f"Loading tool: {tool_name}")
+        
         module_path = tool_mapping[tool_name]
         module_name, class_name = module_path.rsplit('.', 1)
         
         # Import and instantiate the tool
         module = importlib.import_module(module_name)
         tool_class = getattr(module, class_name)
-        tool_instance = tool_class()
+        
+        # Check if tool requires LLM engine and pass model_string if available
+        if hasattr(tool_class, 'require_llm_engine') and tool_class.require_llm_engine and self.model_string:
+            tool_instance = tool_class(model_string=self.model_string)
+        else:
+            tool_instance = tool_class()
         
         self.available_tools[tool_name] = tool_instance
     
@@ -199,6 +207,6 @@ def execute_tool(self, tool_name: str, params: Dict) -> str:
 
 
 def build_tool_use_envs(tasks_data: List[Dict], available_tools: List[str], 
-                       seed: int, env_num: int, group_n: int, is_train: bool = True):
+                       seed: int, env_num: int, group_n: int, is_train: bool = True, model_string: str = None):
     """Build tool use environments"""
-    return ToolUseEnvs(tasks_data, available_tools, seed, env_num, group_n, is_train)
+    return ToolUseEnvs(tasks_data, available_tools, seed, env_num, group_n, is_train, model_string=model_string)
diff --git a/openmanus_rl/environments/env_package/tool_use/manager.py b/openmanus_rl/environments/env_package/tool_use/manager.py
@@ -23,6 +23,7 @@ def __init__(self, envs, projection_f, config):
         self.ground_truths = []
         self.step_counts = []
         self.task_completed = []
+        self.task_success = []
         
     def reset(self):
         """Reset environment and get new tasks"""
@@ -36,6 +37,7 @@ def reset(self):
         batch_size = len(self.current_tasks)
         self.step_counts = [0] * batch_size
         self.task_completed = [False] * batch_size
+        self.task_success = [False] * batch_size
         
         # Initialize memory
         self.memory.reset(batch_size=batch_size)
@@ -59,7 +61,7 @@ def step(self, text_actions: List[str]):
         for i, (action, valid) in enumerate(zip(actions, valids)):
             if self.task_completed[i]:
                 observations.append("Task completed.")
-                infos.append({'is_action_valid': True, 'won': True})
+                infos.append({'is_action_valid': True, 'won': self.task_success[i]})
                 continue
                 
             self.step_counts[i] += 1
@@ -70,14 +72,19 @@ def step(self, text_actions: List[str]):
             
             # Check completion
             if self._is_completion_action(action):
+                is_correct = self._evaluate_answer(action, i)
+                self.task_success[i] = is_correct
                 self.task_completed[i] = True
                 dones[i] = True
+                obs_feedback = "\n\nEvaluation: final answer matches the ground truth." if is_correct else "\n\nEvaluation: final answer does not match the ground truth."
+                observations[-1] = obs + obs_feedback
             elif self.step_counts[i] >= self.config.env.max_steps:
                 obs += "\n\nMaximum steps reached. Please provide your final answer in <answer></answer> tags."
                 dones[i] = True
+                observations[-1] = obs
                 
             info['is_action_valid'] = to_numpy(valid)
-            info['won'] = self.task_completed[i]
+            info['won'] = self.task_success[i]
             info['step_count'] = self.step_counts[i]
             infos.append(info)
         
@@ -125,28 +132,78 @@ def _is_completion_action(self, action: str) -> bool:
         """Check if action indicates task completion"""
         return action.startswith("FINAL_ANSWER:") or "<answer>" in action
         
+    def _evaluate_answer(self, action: str, batch_idx: int) -> bool:
+        """Compare model answer with ground truth"""
+        predicted = self._extract_answer_text(action)
+        ground_truth = self.ground_truths[batch_idx]
+        return self._normalize_answer(predicted) == self._normalize_answer(ground_truth)
+
+    @staticmethod
+    def _extract_answer_text(action: str) -> str:
+        """Extract answer text from action string"""
+        if action.startswith("FINAL_ANSWER:"):
+            return action.split("FINAL_ANSWER:", 1)[1].strip()
+
+        match = re.search(r"<answer>(.*?)</answer>", action, re.DOTALL)
+        if match:
+            return match.group(1).strip()
+        return action.strip()
+
+    @staticmethod
+    def _normalize_answer(text: str) -> str:
+        """Normalize answer string for comparison"""
+        normalized = re.sub(r"\s+", " ", text).strip().lower()
+        normalized = normalized.strip(".,!?:;\"")
+        return normalized
+
     def build_text_obs(self, observations: List[str] = None, init: bool = False) -> List[str]:
         """Build text observations for agent"""
         batch_size = len(self.current_tasks)
         postprocess_text_obs = []
-        
+        max_steps = getattr(self.config.env, "max_steps", None)
+        history_length_cfg = getattr(self.config.env, "history_length", 0)
+
+        if not init and history_length_cfg > 0:
+            memory_contexts, valid_lens = self.memory.fetch(
+                history_length_cfg,
+                obs_key="text_obs",
+                action_key="action",
+            )
+        else:
+            memory_contexts = [""] * batch_size
+            valid_lens = [0] * batch_size
+
         for i in range(batch_size):
-            if init or self.config.env.history_length <= 0:
+            current_obs = observations[i] if observations else "Continue with your task."
+            should_use_last_step = (
+                not init
+                and not self.task_completed[i]
+                and max_steps is not None
+                and self.step_counts[i] >= max_steps - 1
+            )
+
+            if init:
                 obs = TOOL_USE_TEMPLATE_NO_HIS.format(
                     task_description=self.current_tasks[i],
                     available_tools=self.tool_metadata,
                     current_observation="Start working on the task."
                 )
-            else:
-                # Get history
-                memory_contexts, valid_lens = self.memory.fetch(
-                    self.config.env.history_length,
-                    obs_key="text_obs", 
-                    action_key="action"
+            elif should_use_last_step:
+                obs = TOOL_USE_TEMPLATE_LAST_STEP.format(
+                    task_description=self.current_tasks[i],
+                    step_count=self.step_counts[i],
+                    history_length=valid_lens[i],
+                    action_history=memory_contexts[i],
+                    current_step=self.step_counts[i] + 1,
+                    current_observation=current_obs,
                 )
-                
-                current_obs = observations[i] if observations else "Continue with your task."
-                
+            elif history_length_cfg <= 0:
+                obs = TOOL_USE_TEMPLATE_NO_HIS.format(
+                    task_description=self.current_tasks[i],
+                    available_tools=self.tool_metadata,
+                    current_observation=current_obs,
+                )
+            else:
                 obs = TOOL_USE_TEMPLATE.format(
                     task_description=self.current_tasks[i],
                     step_count=self.step_counts[i],
@@ -156,7 +213,7 @@ def build_text_obs(self, observations: List[str] = None, init: bool = False) ->
                     current_observation=current_obs,
                     available_tools=self.tool_metadata
                 )
-                
+
             postprocess_text_obs.append(obs)
             
         return postprocess_text_obs
diff --git a/openmanus_rl/environments/prompts/tool_use.py b/openmanus_rl/environments/prompts/tool_use.py
@@ -17,10 +17,10 @@
 4. When you have sufficient information, provide your final answer in <answer></answer> tags
 
 Format for tool usage:
-<tool_call>
+<action>
 tool: [tool_name]
 parameters: {{"param1": "value1", "param2": "value2"}}
-</tool_call>
+</action>
 
 Now it's your turn to take an action. You should first reason step-by-step about the current situation. This reasoning process MUST be enclosed within <plan> </plan> tags.
 Once you've finished your reasoning, you should either use a tool or provide your final answer within <answer> </answer> tags.
@@ -35,6 +35,8 @@
 You are now at step {current_step} and this is the final step.
 Current Observation: {current_observation}
 You must provide your final answer within <answer> </answer> tags.
+Even if the evidence is incomplete, infer the most plausible answer.
+Never respond with "unknown", "cannot determine", or similar phrases.
 """
 
 TOOL_USE_TEMPLATE = """
@@ -85,4 +87,3 @@
 </action>
 
 """
-
diff --git a/openmanus_rl/tools/google_search/tool.py b/openmanus_rl/tools/google_search/tool.py