feat: optimized file_save tool and add tester role in software agent team

zhayujie · zhayujie · commit 19ce5391e0e8 · 2025-04-23T17:44:44.000+08:00
diff --git a/agentmesh/common/utils/xml_util.py b/agentmesh/common/utils/xml_util.py
@@ -69,6 +69,10 @@ def __init__(self):
         self.in_final_answer = False
         self.final_answer_started = False
 
+        # Flag to track if we've seen a final_answer start tag
+        self.has_final_answer_start = False
+        self.final_answer_start_pos = -1
+
     def process_chunk(self, chunk):
         """Process a chunk of streaming content."""
         self.raw_response += chunk
@@ -77,6 +81,11 @@ def process_chunk(self, chunk):
         for char in chunk:
             self._process_char(char)
 
+        # Check for unclosed final_answer tag at the end of processing
+        if self.current_tag == "final_answer" and self.in_final_answer:
+            # Add current content to parsed data
+            self.parsed_data[self.current_tag] = self.current_content
+
     def _process_char(self, char):
         """Process a single character."""
         # State machine processing
@@ -163,6 +172,7 @@ def _handle_tag_complete(self):
                     if self.current_tag == "final_answer":
                         self.in_final_answer = False
                         self.final_answer_started = False
+                        self.has_final_answer_start = False
 
                     self.current_tag = None
                     self.current_content = ""
@@ -171,13 +181,20 @@ def _handle_tag_complete(self):
                     self._handle_invalid_tag('</' + self.tag_buffer + '>')
             else:
                 # Start tag
+                # If we're already in a final_answer tag and see another tag, treat it as content
+                if self.in_final_answer and self.current_tag == "final_answer":
+                    self._handle_invalid_tag('<' + self.tag_buffer + '>')
+                    return
+
                 self.current_tag = self.tag_buffer
                 self.current_content = ""
 
                 # Set state
                 if self.current_tag == "final_answer":
                     self.in_final_answer = True
                     self.final_answer_started = False
+                    self.has_final_answer_start = True
+                    self.final_answer_start_pos = len(self.raw_response) - len("<final_answer>")
 
                 # Print tag name
                 if not self.printed_tags[self.tag_buffer]:
@@ -219,13 +236,38 @@ def get_parsed_data(self):
         """Get parsing results."""
         result = self.parsed_data.copy()
 
+        # Handle incomplete final_answer tag
+        if self.has_final_answer_start and "final_answer" not in result:
+            # Extract everything after the final_answer start tag
+            if self.final_answer_start_pos >= 0:
+                final_answer_content = self.raw_response[self.final_answer_start_pos + len("<final_answer>"):].strip()
+                result["final_answer"] = final_answer_content
+                self.tag_contents["final_answer"] = final_answer_content
+
+                # Update null content flag
+                self.is_null_content["final_answer"] = (
+                        final_answer_content.lower() == "null" or
+                        final_answer_content == "" or
+                        final_answer_content.isspace()
+                )
+
         # Handle incomplete action_input if present
         if "action" in result and not self.is_null_content["action"] and "action_input" not in result:
             # Check if we have partial action_input in the raw response
             action_input_start = self.raw_response.find("<action_input>")
             if action_input_start != -1:
                 action_input_start += len("<action_input>")
-                action_input_content = self.raw_response[action_input_start:].strip()
+                action_input_end = self.raw_response.find("</action_input>", action_input_start)
+
+                if action_input_end != -1:
+                    action_input_content = self.raw_response[action_input_start:action_input_end].strip()
+                else:
+                    # If no end tag, take everything until the next start tag or end of string
+                    next_tag_start = self.raw_response.find("<", action_input_start)
+                    if next_tag_start != -1:
+                        action_input_content = self.raw_response[action_input_start:next_tag_start].strip()
+                    else:
+                        action_input_content = self.raw_response[action_input_start:].strip()
 
                 # Store the extracted action_input
                 result["action_input"] = action_input_content
diff --git a/agentmesh/protocal/agent.py b/agentmesh/protocal/agent.py
@@ -93,9 +93,12 @@ def _build_react_prompt(self) -> str:
 ## Current task context:
 Current time: {formatted_time}
 Team description: {self.team_context.description}
-Other agents output: {self._fetch_agents_outputs()}
 
-Your sub task: {self.subtask}"""
+## Other agents output:
+{self._fetch_agents_outputs()}
+
+## Your sub task
+{self.subtask}"""
 
         return tools_prompt + ext_data_prompt + current_task_prompt
 
@@ -129,7 +132,6 @@ def step(self):
         """
         final_answer = None
         current_step = 0
-        raw_response = ""
 
         # Initialize captured actions list (if it doesn't exist)
         if not hasattr(self, 'captured_actions'):
@@ -144,9 +146,17 @@ def step(self):
 
         # Use max_steps if set, otherwise continue until final answer is found
         while (self.max_steps is None or current_step < self.max_steps) and not final_answer:
+            # Check if team's max_steps will be exceeded with this step
+            if self.team_context.current_steps >= self.team_context.max_steps:
+                logger.warning(f"Team's max steps ({self.team_context.max_steps}) reached. Stopping agent execution.")
+                return AgentResult.error("Team's max steps reached", current_step)
+
+            # Increment team's step counter
+            self.team_context.current_steps += 1
+
             user_prompt = self._build_react_prompt() + "\n\n## Historical steps:\n"
             if self.action_history:
-                user_prompt += f"\n{json.dumps(self.action_history[-5:], ensure_ascii=False, indent=4)}"
+                user_prompt += f"\n{json.dumps(self.action_history[-10:], ensure_ascii=False, indent=4)}"
             messages = [
                 {"role": "system", "content": self.system_prompt},
                 {"role": "user", "content": user_prompt}
@@ -163,15 +173,13 @@ def step(self):
                 stream=self.output_mode == "print"  # Only stream in print mode
             )
 
-            # Start loading animation before getting model response (only in print mode)
-            loading = None
+            # Get model response based on output mode
             if self.output_mode == "print":
+                # Start loading animation before getting model response (only in print mode)
                 print()
                 loading = LoadingIndicator(message="Thinking...", animation_type="spinner")
                 loading.start()
 
-            # Get model response based on output mode
-            if self.output_mode == "print":
                 # Stream response in print mode
                 stream_response = model_to_use.call_stream(request)
                 parser = XmlResParser()
@@ -290,6 +298,7 @@ def step(self):
                     })
             else:
                 # No action, end loop
+                self.output("No action error, end step")
                 break
 
             current_step += 1
@@ -315,9 +324,11 @@ def _execute_post_process_tools(self):
 
             # Log result
             if result.status == "success":
-                logger.info(f"Post-process tool {tool.name} executed successfully: {result.result.get('message', '')}")
+                # Print tool execution result in the desired format
+                self.output(f"\n🛠️ {tool.name}: {json.dumps(result.result)}")
             else:
-                logger.warning(f"Post-process tool {tool.name} failed: {result.result}")
+                # Print failure in print mode
+                self.output(f"\n🛠️ {tool.name}: {json.dumps({'status': 'error', 'message': str(result.result)})}")
 
     def should_invoke_next_agent(self) -> int:
         """
diff --git a/agentmesh/protocal/context.py b/agentmesh/protocal/context.py
@@ -1,5 +1,5 @@
 class TeamContext:
-    def __init__(self, name: str, description: str, rule: str, agents: list):
+    def __init__(self, name: str, description: str, rule: str, agents: list, max_steps: int = 20):
         """
         Initialize the TeamContext with a name, description, rules, a list of agents, and a user question.
         :param name: The name of the group context.
@@ -17,6 +17,8 @@ def __init__(self, name: str, description: str, rule: str, agents: list):
         self.task_short_name = None  # Store the task directory name
         # List of agents that have been executed
         self.agent_outputs: list = []
+        self.current_steps = 0
+        self.max_steps = max_steps
 
 
 class AgentOutput:
diff --git a/agentmesh/protocal/team.py b/agentmesh/protocal/team.py
@@ -27,7 +27,7 @@ def __init__(self, name: str, description: str, rule: str = "", model: LLMModel
         self.description = description
         self.rule = rule
         self.agents = []
-        self.context = TeamContext(name, description, rule, agents=self.agents)
+        self.context = TeamContext(name, description, rule, agents=self.agents, max_steps=max_steps)
         self.model: LLMModel = model  # Instance of LLMModel
         self.max_steps = max_steps  # Maximum total steps across all agents
         self.task_short_name = ""
@@ -247,9 +247,13 @@ def output(message, end="\n"):
                 # Print task completion information
                 output(f"\nTeam {self.name} completed the task")
 
+                # Clean up resources before returning
+                self.cleanup()
+
                 return result
             else:
                 output("No agent found with the selected id.")
+                self.cleanup()
                 result.complete("failed")
                 return result
 
@@ -262,9 +266,27 @@ def output(message, end="\n"):
             logger.error(error_msg)
             logger.debug(f"Error details: {detail_msg}")
 
+            # Clean up resources even when exception occurs
+            self.cleanup()
+
             result.complete("failed")
             return result
 
+    def cleanup(self):
+        """
+        Clean up resources used by the team and its agents.
+        This includes closing browser connections, file handles, etc.
+        """
+        # Clean up resources for each agent
+        for agent in self.agents:
+            # Clean up tools for each agent
+            if hasattr(agent, 'tools'):
+                for tool in agent.tools:
+                    try:
+                        tool.close()
+                    except Exception as e:
+                        logger.warning(f"Error closing tool {tool.name}: {str(e)}")
+
 
 GROUP_DECISION_PROMPT = """## Role
 You are the coordinator for a team of AI agents. Your job is to analyze the user's task and decide which agent in the team should handle it first, and give the subtask that need to be answered by this member.
diff --git a/agentmesh/tools/__init__.py b/agentmesh/tools/__init__.py
@@ -37,7 +37,7 @@ def __init__(self, *args, **kwargs):
     'GoogleSearch',
     'Calculator',
     'CurrentTime',
-    'FileOutput',
+    'FileSave',
     'BrowserTool'
 ]
 
diff --git a/agentmesh/tools/base_tool.py b/agentmesh/tools/base_tool.py
@@ -85,3 +85,13 @@ def should_auto_execute(self, context) -> bool:
         """
         # Only tools in post-process stage will be automatically executed
         return self.stage == ToolStage.POST_PROCESS
+
+    def close(self):
+        """
+        Close any resources used by the tool.
+        This method should be overridden by tools that need to clean up resources
+        such as browser connections, file handles, etc.
+
+        By default, this method does nothing.
+        """
+        pass
diff --git a/agentmesh/tools/browser/browser_tool.py b/agentmesh/tools/browser/browser_tool.py
@@ -56,7 +56,7 @@ class BrowserTool(BaseTool):
             },
             "url": {
                 "type": "string",
-                "description": f"The URL to navigate to (required for '{Navigate.code}', '{OpenTab.code}' actions)."
+                "description": f"The URL to navigate to (required for '{Navigate.code}', '{OpenTab.code}' actions). "
             },
             "goal": {
                 "type": "string",
@@ -174,6 +174,8 @@ async def _execute_async(self, action: str, params: Dict[str, Any]) -> ToolResul
             url = params.get("url")
             if not url:
                 return ToolResult.fail(result="URL is required for navigate action")
+            if url.startswith("/"):
+                url = f"file://{url}"
             print(f"Navigating to {url}...")
             page = await context.get_current_page()
             await page.goto(url)
@@ -185,6 +187,8 @@ async def _execute_async(self, action: str, params: Dict[str, Any]) -> ToolResul
 
         elif action == OpenTab.code:
             url = params.get("url")
+            if url.startswith("/"):
+                url = f"file://{url}"
             await context.create_new_tab(url)
             msg = f"Opened new tab with {url}"
             return ToolResult.success(result=msg)
@@ -262,3 +266,43 @@ async def _execute_async(self, action: str, params: Dict[str, Any]) -> ToolResul
         else:
             msg = "Failed to operate the browser"
             return ToolResult.fail(result=msg)
+
+    def close(self):
+        """
+        Close browser resources.
+        This method handles the asynchronous closing of browser and browser context.
+        """
+        if not BrowserTool._initialized:
+            return
+
+        try:
+            # Use the existing event loop to close browser resources
+            if BrowserTool._event_loop is not None:
+                # Define the async close function
+                async def close_browser_async():
+                    if BrowserTool.browser_context is not None:
+                        try:
+                            await BrowserTool.browser_context.close()
+                        except Exception as e:
+                            logger.error(f"Error closing browser context: {e}")
+
+                    if BrowserTool.browser is not None:
+                        try:
+                            await BrowserTool.browser.close()
+                        except Exception as e:
+                            logger.error(f"Error closing browser: {e}")
+
+                    # Reset the initialized flag
+                    BrowserTool._initialized = False
+                    BrowserTool.browser = None
+                    BrowserTool.browser_context = None
+                    BrowserTool.dom_service = None
+
+                # Run the async close function in the existing event loop
+                BrowserTool._event_loop.run_until_complete(close_browser_async())
+
+                # Close the event loop
+                BrowserTool._event_loop.close()
+                BrowserTool._event_loop = None
+        except Exception as e:
+            print(f"Error during browser cleanup: {e}")
diff --git a/agentmesh/tools/file_save/file_save.py b/agentmesh/tools/file_save/file_save.py
diff --git a/config-template.yaml b/config-template.yaml
diff --git a/main.py b/main.py

Original file line number	Diff line number	Diff line change
`@@ -37,7 +37,7 @@ def __init__(self, args, *kwargs):`
`37`	`37`	`'GoogleSearch',`
`38`	`38`	`'Calculator',`
`39`	`39`	`'CurrentTime',`
`40`		`- 'FileOutput',`
	`40`	`+ 'FileSave',`
`41`	`41`	`'BrowserTool'`
`42`	`42`	`]`
`43`	`43`