Skip to content

Commit dde5749

Browse files
committed
feat: add file_save tool
1 parent 9c031e5 commit dde5749

File tree

12 files changed

+581
-238
lines changed

12 files changed

+581
-238
lines changed

agentmesh/protocal/agent.py

Lines changed: 43 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from agentmesh.protocal.context import TeamContext, AgentOutput
1010
from agentmesh.protocal.result import AgentAction, AgentActionType, ToolResult, AgentResult
1111
from agentmesh.tools.base_tool import BaseTool
12+
from agentmesh.tools.base_tool import ToolStage
1213

1314

1415
class Agent:
@@ -48,10 +49,11 @@ def add_tool(self, tool: BaseTool):
4849
self.tools.append(tool)
4950

5051
def _build_tools_prompt(self) -> str:
51-
"""Build the tool list description"""
52+
"""Build the tool list description, only including pre-process tools"""
5253
return "\n".join([
5354
f"{tool.name}: {tool.description} (parameters: {tool.params})"
5455
for tool in self.tools
56+
if tool.stage == ToolStage.PRE_PROCESS # Only include pre-process tools
5557
])
5658

5759
def _build_react_prompt(self) -> str:
@@ -98,10 +100,19 @@ def _build_react_prompt(self) -> str:
98100
return tools_prompt + ext_data_prompt + current_task_prompt
99101

100102
def _find_tool(self, tool_name: str):
103+
"""Find and return a tool with the specified name"""
101104
for tool in self.tools:
102105
if tool.name == tool_name:
103-
tool.model = self.model
104-
return tool
106+
# Only pre-process stage tools can be actively called
107+
if tool.stage == ToolStage.PRE_PROCESS:
108+
tool.model = self.model
109+
tool.context = self # Set tool context
110+
return tool
111+
else:
112+
# If it's a post-process tool, return None to prevent direct calling
113+
logger.warning(f"Tool {tool_name} is a post-process tool and cannot be called directly.")
114+
return None
115+
return None
105116

106117
# output function based on mode
107118
def output(self, message="", end="\n"):
@@ -233,6 +244,16 @@ def step(self):
233244
if "final_answer" in parsed and parsed["final_answer"] and parsed["final_answer"].lower() not in ["null",
234245
"none"]:
235246
final_answer = parsed["final_answer"]
247+
self.final_answer = final_answer
248+
249+
# Store the final answer in team context
250+
self.team_context.agent_outputs.append(
251+
AgentOutput(agent_name=self.name, output=final_answer)
252+
)
253+
254+
# Execute all post-process tools
255+
self._execute_post_process_tools()
256+
236257
break
237258

238259
# Handle tool invocation
@@ -273,19 +294,31 @@ def step(self):
273294

274295
current_step += 1
275296

276-
# Save final result
277-
result = final_answer if final_answer else raw_response
278-
self.final_answer = result
279-
self.team_context.agent_outputs.append(
280-
AgentOutput(agent_name=self.name, output=result)
281-
)
282-
283297
# Return a StepResult object
284298
return AgentResult.success(
285299
final_answer=self.final_answer,
286300
step_count=current_step + 1 # +1 because we count steps starting from 1
287301
)
288302

303+
def _execute_post_process_tools(self):
304+
"""Execute all post-process stage tools"""
305+
# Get all post-process stage tools
306+
post_process_tools = [tool for tool in self.tools if tool.stage == ToolStage.POST_PROCESS]
307+
308+
# Execute each tool
309+
for tool in post_process_tools:
310+
# Set tool context
311+
tool.context = self
312+
313+
# Execute tool (with empty parameters, tool will extract needed info from context)
314+
result = tool.execute({})
315+
316+
# Log result
317+
if result.status == "success":
318+
logger.info(f"Post-process tool {tool.name} executed successfully: {result.result.get('message', '')}")
319+
else:
320+
logger.warning(f"Post-process tool {tool.name} failed: {result.result}")
321+
289322
def should_invoke_next_agent(self) -> int:
290323
"""
291324
Determine if the next agent should be invoked based on the reply.

agentmesh/protocal/context.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
class TeamContext:
22
def __init__(self, name: str, description: str, rule: str, agents: list):
33
"""
4-
Initialize the GroupContext with a name, description, rules, a list of agents, and a user question.
4+
Initialize the TeamContext with a name, description, rules, a list of agents, and a user question.
55
:param name: The name of the group context.
66
:param description: A description of the group context.
77
:param rule: The rules governing the group context.
@@ -14,6 +14,7 @@ def __init__(self, name: str, description: str, rule: str, agents: list):
1414
self.user_task = "" # For backward compatibility
1515
self.task = None # Will be a Task instance
1616
self.model = None # Will be an instance of LLMModel
17+
self.task_short_name = None # Store the task directory name
1718
# List of agents that have been executed
1819
self.agent_outputs: list = []
1920

agentmesh/protocal/team.py

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from typing import Union, Literal
22
import json
3+
import re
34

45
from agentmesh.common import LoadingIndicator
56
from agentmesh.common.utils import string_util
@@ -29,6 +30,7 @@ def __init__(self, name: str, description: str, rule: str = "", model: LLMModel
2930
self.context = TeamContext(name, description, rule, agents=self.agents)
3031
self.model: LLMModel = model # Instance of LLMModel
3132
self.max_steps = max_steps # Maximum total steps across all agents
33+
self.task_short_name = ""
3234

3335
def add(self, agent: Agent):
3436
"""
@@ -92,7 +94,7 @@ def output(message, end="\n"):
9294

9395
prompt = GROUP_DECISION_PROMPT.format(group_name=self.name, group_description=self.description,
9496
group_rules=self.rule, agents_str=agents_str,
95-
user_question=task.get_text())
97+
user_task=task.get_text())
9698

9799
# Start loading animation (only in print mode)
98100
loading = None
@@ -132,6 +134,8 @@ def output(message, end="\n"):
132134
decision_res = string_util.json_loads(reply_text)
133135
selected_agent_id = decision_res.get("id") # Extract the id from the response
134136
subtask = decision_res.get("subtask")
137+
task_short_name = decision_res.get("task_short_name")
138+
self.context.task_short_name = task_short_name
135139

136140
# Find the selected agent based on the id
137141
selected_agent: Agent = self.agents[selected_agent_id]
@@ -263,19 +267,24 @@ def output(message, end="\n"):
263267

264268

265269
GROUP_DECISION_PROMPT = """## Role
266-
As an expert in team task allocation, your role is to select the most suitable team member to initially address the task at hand, and give the subtask that need to be answered by this member.
267-
After the task is completed, the results will pass to next member.
270+
You are the coordinator for a team of AI agents. Your job is to analyze the user's task and decide which agent in the team should handle it first, and give the subtask that need to be answered by this member.
268271
269-
## Team
270-
Team Name: {group_name}
271-
Team Description: {group_description}
272-
Team Rules: {group_rules}
272+
## Team Information
273+
Team name: {group_name}
274+
Team description: {group_description}
275+
Team rules: {group_rules}
273276
274-
## List of team members:
277+
## Available Agents
275278
{agents_str}
276279
277-
## User Question:
278-
{user_question}
280+
## User Task
281+
{user_task}
282+
283+
## Output Format
284+
Return your response in JSON format with the following fields:
285+
- id: The ID of the selected agent
286+
- subtask: the subtask that need to be answered by this member (use the same language as the user's task and preserve all key information from the original task)
287+
- task_short_name: A descriptive name for the user's original task (lowercase with underscores, max 5 English words)
279288
280289
Please return the result in the following JSON structure which can be parsed directly by json.loads(), no extra content:
281-
{{"id": <member_id>, "subtask": ""}}"""
290+
{{"id": <member_id>, "subtask": "", "task_short_name": ""}}"""

agentmesh/tools/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
from agentmesh.tools.google_search.google_search import GoogleSearch
77
from agentmesh.tools.calculator.calculator import Calculator
88
from agentmesh.tools.current_time.current_time import CurrentTime
9-
from agentmesh.tools.file_output.file_output import FileOutput
9+
from agentmesh.tools.file_save.file_save import FileSave
10+
1011

1112
# Delayed import for BrowserTool
1213
def _import_browser_tool():
@@ -22,8 +23,10 @@ def __init__(self, *args, **kwargs):
2223
"Please install it with 'pip install browser-use>=0.1.40' or "
2324
"'pip install agentmesh-sdk[full]'."
2425
)
26+
2527
return BrowserToolPlaceholder
2628

29+
2730
# Dynamically set BrowserTool
2831
BrowserTool = _import_browser_tool()
2932

agentmesh/tools/base_tool.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,16 @@
1+
from enum import Enum
12
from typing import Any
23
from pydantic import BaseModel, Field
34
from agentmesh.models.llm.base_model import LLMModel
45
from agentmesh.common import logger
56

67

8+
class ToolStage(Enum):
9+
"""Enum representing tool decision stages"""
10+
PRE_PROCESS = "pre_process" # Tools that need to be actively selected by the agent
11+
POST_PROCESS = "post_process" # Tools that automatically execute after final_answer
12+
13+
714
class ToolResult(BaseModel):
815
status: str = Field(default=None)
916
result: Any = Field(default=None)
@@ -19,6 +26,11 @@ def fail(result, ext_data: Any = None):
1926

2027

2128
class BaseTool:
29+
"""Base class for all tools."""
30+
31+
# Default decision stage is pre-process
32+
stage = ToolStage.PRE_PROCESS
33+
2234
# Class attributes must be inherited
2335
name: str = "base_tool"
2436
description: str = "Base tool"
@@ -63,3 +75,13 @@ def _parse_schema(cls) -> dict:
6375
prop.get("default", ...)
6476
)
6577
return fields
78+
79+
def should_auto_execute(self, context) -> bool:
80+
"""
81+
Determine if this tool should be automatically executed based on context.
82+
83+
:param context: The agent context
84+
:return: True if the tool should be executed, False otherwise
85+
"""
86+
# Only tools in post-process stage will be automatically executed
87+
return self.stage == ToolStage.POST_PROCESS

agentmesh/tools/browser/browser_action.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,10 @@ class SwitchTab(BrowserAction):
5050
"""Switch to a tab"""
5151
code = "switch_tab"
5252
description = "Switched to tab"
53+
54+
55+
class SendKeys(BrowserAction):
56+
"""Switch to a tab"""
57+
code = "send_keys"
58+
description = "Send strings of special keyboard keys like Escape, Backspace, Insert, PageDown, Delete, Enter, " \
59+
"ArrowRight, ArrowUp, etc"

agentmesh/tools/browser/browser_tool.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ def _import_browser_use():
2929

3030

3131
def _get_action_prompt():
32-
action_classes = [Navigate, ClickElement, ExtractContent, InputText, OpenTab, SwitchTab, ScrollDown, ScrollUp]
32+
action_classes = [Navigate, ClickElement, ExtractContent, InputText, OpenTab, SwitchTab, ScrollDown, ScrollUp,
33+
SendKeys]
3334
action_prompt = ""
3435
for action_class in action_classes:
3536
action_prompt += f"{action_class.code}: {action_class.description}\n"
@@ -76,6 +77,10 @@ class BrowserTool(BaseTool):
7677
"scroll_amount": {
7778
"type": "integer",
7879
"description": f"The number of pixels to scroll (required for '{ScrollDown.code}', '{ScrollUp.code}' action)."
80+
},
81+
"keys": {
82+
"type": "string",
83+
"description": f"Keys to send (required for '{SendKeys.code}' action)"
7984
}
8085
},
8186
"required": ["operation"]
@@ -246,6 +251,14 @@ async def _execute_async(self, action: str, params: Dict[str, Any]) -> ToolResul
246251
msg = f"{action} by {scroll_amount} pixels"
247252
return ToolResult.success(result=msg, ext_data=await self._get_page_info(context))
248253

254+
elif action == SendKeys.code:
255+
keys = params.get("keys")
256+
page = await context.get_current_page()
257+
await page.keyboard.press(keys)
258+
msg = f"Sent keys: {keys}"
259+
print(msg)
260+
return ToolResult(output=f"Sent keys: {keys}")
261+
249262
else:
250263
msg = "Failed to operate the browser"
251264
return ToolResult.fail(result=msg)

agentmesh/tools/file_output/__init__.py

Lines changed: 0 additions & 3 deletions
This file was deleted.

0 commit comments

Comments
 (0)