Skip to content

Commit 19ce539

Browse files
committed
feat: optimized file_save tool and add tester role in software agent team
1 parent dde5749 commit 19ce539

File tree

10 files changed

+619
-120
lines changed

10 files changed

+619
-120
lines changed

agentmesh/common/utils/xml_util.py

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,10 @@ def __init__(self):
6969
self.in_final_answer = False
7070
self.final_answer_started = False
7171

72+
# Flag to track if we've seen a final_answer start tag
73+
self.has_final_answer_start = False
74+
self.final_answer_start_pos = -1
75+
7276
def process_chunk(self, chunk):
7377
"""Process a chunk of streaming content."""
7478
self.raw_response += chunk
@@ -77,6 +81,11 @@ def process_chunk(self, chunk):
7781
for char in chunk:
7882
self._process_char(char)
7983

84+
# Check for unclosed final_answer tag at the end of processing
85+
if self.current_tag == "final_answer" and self.in_final_answer:
86+
# Add current content to parsed data
87+
self.parsed_data[self.current_tag] = self.current_content
88+
8089
def _process_char(self, char):
8190
"""Process a single character."""
8291
# State machine processing
@@ -163,6 +172,7 @@ def _handle_tag_complete(self):
163172
if self.current_tag == "final_answer":
164173
self.in_final_answer = False
165174
self.final_answer_started = False
175+
self.has_final_answer_start = False
166176

167177
self.current_tag = None
168178
self.current_content = ""
@@ -171,13 +181,20 @@ def _handle_tag_complete(self):
171181
self._handle_invalid_tag('</' + self.tag_buffer + '>')
172182
else:
173183
# Start tag
184+
# If we're already in a final_answer tag and see another tag, treat it as content
185+
if self.in_final_answer and self.current_tag == "final_answer":
186+
self._handle_invalid_tag('<' + self.tag_buffer + '>')
187+
return
188+
174189
self.current_tag = self.tag_buffer
175190
self.current_content = ""
176191

177192
# Set state
178193
if self.current_tag == "final_answer":
179194
self.in_final_answer = True
180195
self.final_answer_started = False
196+
self.has_final_answer_start = True
197+
self.final_answer_start_pos = len(self.raw_response) - len("<final_answer>")
181198

182199
# Print tag name
183200
if not self.printed_tags[self.tag_buffer]:
@@ -219,13 +236,38 @@ def get_parsed_data(self):
219236
"""Get parsing results."""
220237
result = self.parsed_data.copy()
221238

239+
# Handle incomplete final_answer tag
240+
if self.has_final_answer_start and "final_answer" not in result:
241+
# Extract everything after the final_answer start tag
242+
if self.final_answer_start_pos >= 0:
243+
final_answer_content = self.raw_response[self.final_answer_start_pos + len("<final_answer>"):].strip()
244+
result["final_answer"] = final_answer_content
245+
self.tag_contents["final_answer"] = final_answer_content
246+
247+
# Update null content flag
248+
self.is_null_content["final_answer"] = (
249+
final_answer_content.lower() == "null" or
250+
final_answer_content == "" or
251+
final_answer_content.isspace()
252+
)
253+
222254
# Handle incomplete action_input if present
223255
if "action" in result and not self.is_null_content["action"] and "action_input" not in result:
224256
# Check if we have partial action_input in the raw response
225257
action_input_start = self.raw_response.find("<action_input>")
226258
if action_input_start != -1:
227259
action_input_start += len("<action_input>")
228-
action_input_content = self.raw_response[action_input_start:].strip()
260+
action_input_end = self.raw_response.find("</action_input>", action_input_start)
261+
262+
if action_input_end != -1:
263+
action_input_content = self.raw_response[action_input_start:action_input_end].strip()
264+
else:
265+
# If no end tag, take everything until the next start tag or end of string
266+
next_tag_start = self.raw_response.find("<", action_input_start)
267+
if next_tag_start != -1:
268+
action_input_content = self.raw_response[action_input_start:next_tag_start].strip()
269+
else:
270+
action_input_content = self.raw_response[action_input_start:].strip()
229271

230272
# Store the extracted action_input
231273
result["action_input"] = action_input_content

agentmesh/protocal/agent.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,12 @@ def _build_react_prompt(self) -> str:
9393
## Current task context:
9494
Current time: {formatted_time}
9595
Team description: {self.team_context.description}
96-
Other agents output: {self._fetch_agents_outputs()}
9796
98-
Your sub task: {self.subtask}"""
97+
## Other agents output:
98+
{self._fetch_agents_outputs()}
99+
100+
## Your sub task
101+
{self.subtask}"""
99102

100103
return tools_prompt + ext_data_prompt + current_task_prompt
101104

@@ -129,7 +132,6 @@ def step(self):
129132
"""
130133
final_answer = None
131134
current_step = 0
132-
raw_response = ""
133135

134136
# Initialize captured actions list (if it doesn't exist)
135137
if not hasattr(self, 'captured_actions'):
@@ -144,9 +146,17 @@ def step(self):
144146

145147
# Use max_steps if set, otherwise continue until final answer is found
146148
while (self.max_steps is None or current_step < self.max_steps) and not final_answer:
149+
# Check if team's max_steps will be exceeded with this step
150+
if self.team_context.current_steps >= self.team_context.max_steps:
151+
logger.warning(f"Team's max steps ({self.team_context.max_steps}) reached. Stopping agent execution.")
152+
return AgentResult.error("Team's max steps reached", current_step)
153+
154+
# Increment team's step counter
155+
self.team_context.current_steps += 1
156+
147157
user_prompt = self._build_react_prompt() + "\n\n## Historical steps:\n"
148158
if self.action_history:
149-
user_prompt += f"\n{json.dumps(self.action_history[-5:], ensure_ascii=False, indent=4)}"
159+
user_prompt += f"\n{json.dumps(self.action_history[-10:], ensure_ascii=False, indent=4)}"
150160
messages = [
151161
{"role": "system", "content": self.system_prompt},
152162
{"role": "user", "content": user_prompt}
@@ -163,15 +173,13 @@ def step(self):
163173
stream=self.output_mode == "print" # Only stream in print mode
164174
)
165175

166-
# Start loading animation before getting model response (only in print mode)
167-
loading = None
176+
# Get model response based on output mode
168177
if self.output_mode == "print":
178+
# Start loading animation before getting model response (only in print mode)
169179
print()
170180
loading = LoadingIndicator(message="Thinking...", animation_type="spinner")
171181
loading.start()
172182

173-
# Get model response based on output mode
174-
if self.output_mode == "print":
175183
# Stream response in print mode
176184
stream_response = model_to_use.call_stream(request)
177185
parser = XmlResParser()
@@ -290,6 +298,7 @@ def step(self):
290298
})
291299
else:
292300
# No action, end loop
301+
self.output("No action error, end step")
293302
break
294303

295304
current_step += 1
@@ -315,9 +324,11 @@ def _execute_post_process_tools(self):
315324

316325
# Log result
317326
if result.status == "success":
318-
logger.info(f"Post-process tool {tool.name} executed successfully: {result.result.get('message', '')}")
327+
# Print tool execution result in the desired format
328+
self.output(f"\n🛠️ {tool.name}: {json.dumps(result.result)}")
319329
else:
320-
logger.warning(f"Post-process tool {tool.name} failed: {result.result}")
330+
# Print failure in print mode
331+
self.output(f"\n🛠️ {tool.name}: {json.dumps({'status': 'error', 'message': str(result.result)})}")
321332

322333
def should_invoke_next_agent(self) -> int:
323334
"""

agentmesh/protocal/context.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
class TeamContext:
2-
def __init__(self, name: str, description: str, rule: str, agents: list):
2+
def __init__(self, name: str, description: str, rule: str, agents: list, max_steps: int = 20):
33
"""
44
Initialize the TeamContext with a name, description, rules, a list of agents, and a user question.
55
:param name: The name of the group context.
@@ -17,6 +17,8 @@ def __init__(self, name: str, description: str, rule: str, agents: list):
1717
self.task_short_name = None # Store the task directory name
1818
# List of agents that have been executed
1919
self.agent_outputs: list = []
20+
self.current_steps = 0
21+
self.max_steps = max_steps
2022

2123

2224
class AgentOutput:

agentmesh/protocal/team.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def __init__(self, name: str, description: str, rule: str = "", model: LLMModel
2727
self.description = description
2828
self.rule = rule
2929
self.agents = []
30-
self.context = TeamContext(name, description, rule, agents=self.agents)
30+
self.context = TeamContext(name, description, rule, agents=self.agents, max_steps=max_steps)
3131
self.model: LLMModel = model # Instance of LLMModel
3232
self.max_steps = max_steps # Maximum total steps across all agents
3333
self.task_short_name = ""
@@ -247,9 +247,13 @@ def output(message, end="\n"):
247247
# Print task completion information
248248
output(f"\nTeam {self.name} completed the task")
249249

250+
# Clean up resources before returning
251+
self.cleanup()
252+
250253
return result
251254
else:
252255
output("No agent found with the selected id.")
256+
self.cleanup()
253257
result.complete("failed")
254258
return result
255259

@@ -262,9 +266,27 @@ def output(message, end="\n"):
262266
logger.error(error_msg)
263267
logger.debug(f"Error details: {detail_msg}")
264268

269+
# Clean up resources even when exception occurs
270+
self.cleanup()
271+
265272
result.complete("failed")
266273
return result
267274

275+
def cleanup(self):
276+
"""
277+
Clean up resources used by the team and its agents.
278+
This includes closing browser connections, file handles, etc.
279+
"""
280+
# Clean up resources for each agent
281+
for agent in self.agents:
282+
# Clean up tools for each agent
283+
if hasattr(agent, 'tools'):
284+
for tool in agent.tools:
285+
try:
286+
tool.close()
287+
except Exception as e:
288+
logger.warning(f"Error closing tool {tool.name}: {str(e)}")
289+
268290

269291
GROUP_DECISION_PROMPT = """## Role
270292
You are the coordinator for a team of AI agents. Your job is to analyze the user's task and decide which agent in the team should handle it first, and give the subtask that need to be answered by this member.

agentmesh/tools/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def __init__(self, *args, **kwargs):
3737
'GoogleSearch',
3838
'Calculator',
3939
'CurrentTime',
40-
'FileOutput',
40+
'FileSave',
4141
'BrowserTool'
4242
]
4343

agentmesh/tools/base_tool.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,3 +85,13 @@ def should_auto_execute(self, context) -> bool:
8585
"""
8686
# Only tools in post-process stage will be automatically executed
8787
return self.stage == ToolStage.POST_PROCESS
88+
89+
def close(self):
90+
"""
91+
Close any resources used by the tool.
92+
This method should be overridden by tools that need to clean up resources
93+
such as browser connections, file handles, etc.
94+
95+
By default, this method does nothing.
96+
"""
97+
pass

agentmesh/tools/browser/browser_tool.py

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ class BrowserTool(BaseTool):
5656
},
5757
"url": {
5858
"type": "string",
59-
"description": f"The URL to navigate to (required for '{Navigate.code}', '{OpenTab.code}' actions)."
59+
"description": f"The URL to navigate to (required for '{Navigate.code}', '{OpenTab.code}' actions). "
6060
},
6161
"goal": {
6262
"type": "string",
@@ -174,6 +174,8 @@ async def _execute_async(self, action: str, params: Dict[str, Any]) -> ToolResul
174174
url = params.get("url")
175175
if not url:
176176
return ToolResult.fail(result="URL is required for navigate action")
177+
if url.startswith("/"):
178+
url = f"file://{url}"
177179
print(f"Navigating to {url}...")
178180
page = await context.get_current_page()
179181
await page.goto(url)
@@ -185,6 +187,8 @@ async def _execute_async(self, action: str, params: Dict[str, Any]) -> ToolResul
185187

186188
elif action == OpenTab.code:
187189
url = params.get("url")
190+
if url.startswith("/"):
191+
url = f"file://{url}"
188192
await context.create_new_tab(url)
189193
msg = f"Opened new tab with {url}"
190194
return ToolResult.success(result=msg)
@@ -262,3 +266,43 @@ async def _execute_async(self, action: str, params: Dict[str, Any]) -> ToolResul
262266
else:
263267
msg = "Failed to operate the browser"
264268
return ToolResult.fail(result=msg)
269+
270+
def close(self):
271+
"""
272+
Close browser resources.
273+
This method handles the asynchronous closing of browser and browser context.
274+
"""
275+
if not BrowserTool._initialized:
276+
return
277+
278+
try:
279+
# Use the existing event loop to close browser resources
280+
if BrowserTool._event_loop is not None:
281+
# Define the async close function
282+
async def close_browser_async():
283+
if BrowserTool.browser_context is not None:
284+
try:
285+
await BrowserTool.browser_context.close()
286+
except Exception as e:
287+
logger.error(f"Error closing browser context: {e}")
288+
289+
if BrowserTool.browser is not None:
290+
try:
291+
await BrowserTool.browser.close()
292+
except Exception as e:
293+
logger.error(f"Error closing browser: {e}")
294+
295+
# Reset the initialized flag
296+
BrowserTool._initialized = False
297+
BrowserTool.browser = None
298+
BrowserTool.browser_context = None
299+
BrowserTool.dom_service = None
300+
301+
# Run the async close function in the existing event loop
302+
BrowserTool._event_loop.run_until_complete(close_browser_async())
303+
304+
# Close the event loop
305+
BrowserTool._event_loop.close()
306+
BrowserTool._event_loop = None
307+
except Exception as e:
308+
print(f"Error during browser cleanup: {e}")

0 commit comments

Comments
 (0)