Skip to content

Commit e75d487

Browse files
authored
Merge pull request #10 from MiroMindAI/blacklist-tool
feat(tool-blacklist): add tool blacklist function
2 parents 392349d + 9c6aa90 commit e75d487

7 files changed

+1127
-7
lines changed
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
defaults:
2+
- benchmark: gaia-validation-text-only
3+
- override hydra/job_logging: none
4+
- _self_
5+
6+
entrypoint: main_agent
7+
main_agent:
8+
name: main_agent
9+
type: IterativeAgentWithToolAndRollback
10+
max_consecutive_rollbacks: 3
11+
max_turns: 200
12+
llm:
13+
_base_: config/llm/base_mirothinker.yaml
14+
prompt: config/prompts/fangda_prompt_main_agent.yaml
15+
tools:
16+
- config/tool/tool-search-and-scrape-webpage.yaml
17+
- config/tool/tool-jina-scrape-llm-summary.yaml
18+
- config/tool/tool-python.yaml
19+
tool_blacklist:
20+
- server: "tool-search-and-scrape-webpage"
21+
tool: "sogou_search"
22+
- server: "tool-python"
23+
tool: "download_file_from_sandbox_to_local"
24+
input_processor:
25+
- ${input-message-generator}
26+
output_processor:
27+
- ${output-summary}
28+
- ${output-final-answer-extraction}
29+
- ${output-failure-experience}
30+
31+
input-message-generator:
32+
type: InputMessageGenerator
33+
output-summary:
34+
type: SummaryGenerator
35+
output-failure-experience:
36+
type: FailureExperienceSummaryGenerator
37+
prompt: config/prompts/fangda_prompt_main_agent.yaml
38+
llm:
39+
_base_: config/llm/base_mirothinker.yaml
40+
output-final-answer-extraction:
41+
type: FinalAnswerExtractor
42+
prompt: config/prompts/fangda_prompt_main_agent.yaml
43+
llm:
44+
_base_: config/llm/base_mirothinker.yaml
45+
46+
output_dir: logs/
47+
data_dir: "${oc.env:DATA_DIR,data}"
48+
49+

config/tool/tool-python.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
name: "tool-python"
2+
tool_command: "python"
3+
args:
4+
- "-m"
5+
- "src.tool.mcp_servers.python_mcp_server"
6+
env:
7+
E2B_API_KEY: "${oc.env:E2B_API_KEY}"
8+
LOGS_DIR: "./logs"

scripts/binwang_run_evaluate_multiple_runs_mirothinker_gaia-validation-text-only.sh

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,40 @@
77
# Configuration parameters
88
NUM_RUNS=3
99
BENCHMARK_NAME="gaia-validation-text-only"
10-
AGENT_SET="binwang_agent_gaia-validation-text-only_mirothinker_single_agent"
11-
MAX_CONCURRENT=30
10+
AGENT_SET="binwang_fangda_agent_gaia-validation-text-only_mirothinker_single_agent_rollback_new_tools_toolblacklist"
11+
MAX_CONCURRENT=10
1212

1313
# Set results directory with timestamp
1414
TIMESTAMP=$(date +%Y%m%d_%H%M)
1515
RESULTS_DIR=${RESULTS_DIR:-"logs/${BENCHMARK_NAME}/${AGENT_SET}_${TIMESTAMP}"}
1616

17+
# Array to track child PIDs
18+
declare -a CHILD_PIDS=()
19+
20+
cleanup() {
21+
echo ""
22+
echo "Received interrupt signal, terminating all processes..."
23+
for pid in "${CHILD_PIDS[@]}"; do
24+
if kill -0 "$pid" 2>/dev/null; then
25+
echo "Killing process group $pid"
26+
kill -TERM -"$pid" 2>/dev/null
27+
fi
28+
done
29+
# Wait a moment for graceful shutdown
30+
sleep 2
31+
# Force kill any remaining processes
32+
for pid in "${CHILD_PIDS[@]}"; do
33+
if kill -0 "$pid" 2>/dev/null; then
34+
echo "Force killing process group $pid"
35+
kill -KILL -"$pid" 2>/dev/null
36+
fi
37+
done
38+
echo "All processes terminated."
39+
exit 130
40+
}
41+
42+
trap cleanup SIGINT SIGTERM
43+
1744
echo "Starting $NUM_RUNS runs of the evaluation..."
1845
echo "Results will be saved in: $RESULTS_DIR"
1946

@@ -27,14 +54,17 @@ for i in $(seq 1 $NUM_RUNS); do
2754

2855
RUN_ID="run_$i"
2956

57+
# Start process in new process group (set -m creates new pgrp)
3058
(
59+
set -m
3160
uv run test_benchmark.py \
3261
--config-path config/${AGENT_SET}.yaml \
3362
benchmark.execution.max_concurrent=$MAX_CONCURRENT \
3463
output_dir="$RESULTS_DIR/$RUN_ID" \
3564
> "$RESULTS_DIR/${RUN_ID}_output.log" 2>&1
3665

37-
if [ $? -eq 0 ]; then
66+
EXIT_CODE=$?
67+
if [ $EXIT_CODE -eq 0 ]; then
3868
echo "Run $i completed successfully"
3969
RESULT_FILE=$(find "${RESULTS_DIR}/$RUN_ID" -name "*accuracy.txt" 2>/dev/null | head -1)
4070
if [ -f "$RESULT_FILE" ]; then
@@ -43,15 +73,26 @@ for i in $(seq 1 $NUM_RUNS); do
4373
echo "Warning: Result file not found for run $i"
4474
fi
4575
else
46-
echo "Run $i failed!"
76+
# Check if we have JSON result files (task completed but evaluator had issues)
77+
JSON_COUNT=$(find "${RESULTS_DIR}/$RUN_ID" -name "task_*.json" 2>/dev/null | wc -l)
78+
if [ "$JSON_COUNT" -gt 0 ]; then
79+
echo "Run $i finished with exit code $EXIT_CODE but generated $JSON_COUNT task logs"
80+
else
81+
echo "Run $i failed with exit code $EXIT_CODE"
82+
fi
4783
fi
4884
) &
4985

86+
# Get the PID and store it
87+
CHILD_PIDS+=($!)
88+
5089
sleep 2
5190
done
5291

5392
echo "All $NUM_RUNS runs have been launched in parallel"
93+
echo "Child PIDs: ${CHILD_PIDS[*]}"
5494
echo "Waiting for all runs to complete..."
95+
echo "Press Ctrl+C to terminate all processes"
5596

5697
wait
5798

@@ -66,5 +107,4 @@ echo "=========================================="
66107
echo "Multiple runs evaluation completed!"
67108
echo "Check results in: $RESULTS_DIR"
68109
echo "Check individual run logs: $RESULTS_DIR/run_*_output.log"
69-
echo "=========================================="
70-
110+
echo "=========================================="

src/agents/base.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,38 @@ def __init__(self, cfg: Optional[DictConfig | dict] = None, parent=None):
7171
self.llm_client = build_llm_client(cfg=self.cfg.get("llm"))
7272
self.prompt_manager = PromptManager(config_path=self.cfg.get("prompt"))
7373
self.sub_agents = self.cfg.get("sub_agents")
74-
self.tool_manager = ToolManager(cfg=self.cfg.get("tools"))
74+
75+
# Parse tool_blacklist from config
76+
tool_blacklist = self._parse_tool_blacklist(self.cfg.get("tool_blacklist"))
77+
self.tool_manager = ToolManager(
78+
cfg=self.cfg.get("tools"), tool_blacklist=tool_blacklist
79+
)
7580
self.skill_manager = SkillManager(skill_dirs=self.cfg.get("skills"))
7681

82+
def _parse_tool_blacklist(self, blacklist_cfg) -> set:
83+
"""
84+
Parse tool_blacklist config into a set of (server_name, tool_name) tuples.
85+
86+
Config format:
87+
tool_blacklist:
88+
- server: "tool-code"
89+
tool: "create_sandbox"
90+
- server: "tool-search-and-scrape-webpage"
91+
tool: "sogou_search"
92+
93+
Returns:
94+
Set of (server_name, tool_name) tuples
95+
"""
96+
if not blacklist_cfg:
97+
return set()
98+
99+
blacklist = set()
100+
for item in blacklist_cfg:
101+
# Handles both regular dict and OmegaConf DictConfig
102+
if hasattr(item, "get") and item.get("server") and item.get("tool"):
103+
blacklist.add((str(item.get("server")), str(item.get("tool"))))
104+
return blacklist
105+
77106
@abstractmethod
78107
async def run_internal(self, ctx: AgentContext) -> AgentContext:
79108
pass

src/tool/manager.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ def __init__(
8888
logger.info(
8989
f"ToolManager initialized, loaded servers: {list(self.server_dict.keys())}"
9090
)
91+
if self.tool_blacklist:
92+
logger.info(f"Tool blacklist configured: {self.tool_blacklist}")
9193

9294
def _is_huggingface_dataset_or_space_url(self, url):
9395
"""

0 commit comments

Comments
 (0)