Skip to content

Commit 7d4336a

Browse files
authored
Merge pull request #5 from MiroMindAI/patch_pengxiang
feat(tool): integrate two key tools from mirothinker
2 parents 9003b53 + 7ebd5b7 commit 7d4336a

7 files changed

+1360
-5
lines changed

config/__init__.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,13 @@ def load_config(config_path: str, *overrides) -> omegaconf.DictConfig:
2020
if config_name.endswith((".yaml", ".yml")):
2121
config_name = os.path.splitext(config_name)[0]
2222

23+
# Check if output_dir is explicitly specified in overrides
24+
output_dir_override = None
25+
for override in overrides:
26+
if override.startswith("output_dir="):
27+
output_dir_override = override.split("=", 1)[1]
28+
break
29+
2330
# Load and resolve configuration
2431
hydra.initialize_config_dir(
2532
config_dir=str(pathlib.Path(__file__).parent.absolute()),
@@ -28,10 +35,16 @@ def load_config(config_path: str, *overrides) -> omegaconf.DictConfig:
2835
cfg = hydra.compose(config_name=config_name, overrides=list(overrides))
2936
cfg = omegaconf.OmegaConf.create(omegaconf.OmegaConf.to_container(cfg, resolve=True))
3037

31-
# Create timestamped output directory
32-
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
33-
output_dir = pathlib.Path(cfg.output_dir) / f"{config_name}_{timestamp}"
34-
output_dir.mkdir(parents=True, exist_ok=True)
35-
cfg.output_dir = str(output_dir)
38+
# Create timestamped output directory only if output_dir was not explicitly specified
39+
if output_dir_override is None:
40+
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
41+
output_dir = pathlib.Path(cfg.output_dir) / f"{config_name}_{timestamp}"
42+
output_dir.mkdir(parents=True, exist_ok=True)
43+
cfg.output_dir = str(output_dir)
44+
else:
45+
# Use the explicitly specified output_dir directly
46+
output_dir = pathlib.Path(cfg.output_dir)
47+
output_dir.mkdir(parents=True, exist_ok=True)
48+
cfg.output_dir = str(output_dir)
3649

3750
return cfg
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
defaults:
2+
- benchmark: gaia-validation-text-only
3+
- override hydra/job_logging: none
4+
- _self_ # Allow defining variables at the top of this file
5+
6+
entrypoint: main_agent
7+
main_agent:
8+
name: main_agent
9+
type: IterativeAgentWithTool
10+
llm:
11+
_base_: config/llm/base_mirothinker.yaml
12+
prompt: config/prompts/prompt_main_agent.yaml
13+
tools:
14+
- config/tool/tool-search-and-scrape-webpage.yaml
15+
- config/tool/tool-jina-scrape-llm-summary.yaml
16+
- config/tool/tool-code.yaml
17+
input_processor:
18+
- ${input-message-generator}
19+
output_processor:
20+
- ${output-summary}
21+
- ${output-final-answer-extraction}
22+
23+
input-message-generator:
24+
type: InputMessageGenerator
25+
output-summary:
26+
type: SummaryGenerator
27+
output-final-answer-extraction:
28+
type: FinalAnswerExtractor
29+
prompt: config/prompts/prompt_final_answer_extraction.yaml
30+
llm:
31+
_base_: config/llm/base_mirothinker.yaml
32+
33+
34+
output_dir: logs/
35+
data_dir: "${oc.env:DATA_DIR,data}" # Points to where data is stored
36+
37+
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
name: "jina_scrape_llm_summary"
2+
tool_command: "python"
3+
args:
4+
- "-m"
5+
- "src.tool.mcp_servers.jina_scrape_llm_summary_mcp_server"
6+
env:
7+
# Jina and LLM API keys - these values will be loaded from the .env file at runtime
8+
JINA_API_KEY: "${oc.env:JINA_API_KEY}"
9+
JINA_BASE_URL: "${oc.env:JINA_BASE_URL,https://r.jina.ai}"
10+
SUMMARY_LLM_BASE_URL: "${oc.env:SUMMARY_LLM_BASE_URL}"
11+
SUMMARY_LLM_MODEL_NAME: "${oc.env:SUMMARY_LLM_MODEL_NAME}"
12+
SUMMARY_LLM_API_KEY: "${oc.env:SUMMARY_LLM_API_KEY}"
13+
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
name: "search_and_scrape_webpage"
2+
tool_command: "python"
3+
args:
4+
- "-m"
5+
- "src.tool.mcp_servers.search_and_scrape_webpage_mcp_server"
6+
env:
7+
# Search API keys - these values will be loaded from the .env file at runtime
8+
SERPER_API_KEY: "${oc.env:SERPER_API_KEY}"
9+
SERPER_BASE_URL: "${oc.env:SERPER_BASE_URL,https://google.serper.dev}"
10+
# Temporarily disabled: Sogou search functionality
11+
# TENCENTCLOUD_SECRET_ID: "${oc.env:TENCENTCLOUD_SECRET_ID}"
12+
# TENCENTCLOUD_SECRET_KEY: "${oc.env:TENCENTCLOUD_SECRET_KEY}"
13+
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#!/bin/bash
2+
3+
# SPDX-FileCopyrightText: 2025 MiromindAI
4+
#
5+
# SPDX-License-Identifier: Apache-2.0
6+
7+
# Configuration parameters
8+
NUM_RUNS=3
9+
BENCHMARK_NAME="gaia-validation-text-only"
10+
AGENT_SET="agent_gaia-validation-text-only_mirothinker_single_agent_new_tools"
11+
MAX_CONCURRENT=25
12+
13+
# Set results directory with timestamp
14+
TIMESTAMP=$(date +%Y%m%d_%H%M)
15+
RESULTS_DIR=${RESULTS_DIR:-"logs/${BENCHMARK_NAME}/${AGENT_SET}_${TIMESTAMP}"}
16+
17+
echo "Starting $NUM_RUNS runs of the evaluation..."
18+
echo "Results will be saved in: $RESULTS_DIR"
19+
20+
# Create results directory
21+
mkdir -p "$RESULTS_DIR"
22+
23+
for i in $(seq 1 $NUM_RUNS); do
24+
echo "=========================================="
25+
echo "Launching experiment $i/$NUM_RUNS"
26+
echo "=========================================="
27+
28+
RUN_ID="run_$i"
29+
30+
(
31+
uv run test_benchmark.py \
32+
--config-path config/${AGENT_SET}.yaml \
33+
benchmark.execution.max_concurrent=$MAX_CONCURRENT \
34+
output_dir="$RESULTS_DIR/$RUN_ID" \
35+
> "$RESULTS_DIR/${RUN_ID}_output.log" 2>&1
36+
37+
if [ $? -eq 0 ]; then
38+
echo "Run $i completed successfully"
39+
RESULT_FILE=$(find "${RESULTS_DIR}/$RUN_ID" -name "*accuracy.txt" 2>/dev/null | head -1)
40+
if [ -f "$RESULT_FILE" ]; then
41+
echo "Results saved to $RESULT_FILE"
42+
else
43+
echo "Warning: Result file not found for run $i"
44+
fi
45+
else
46+
echo "Run $i failed!"
47+
fi
48+
) &
49+
50+
sleep 2
51+
done
52+
53+
echo "All $NUM_RUNS runs have been launched in parallel"
54+
echo "Waiting for all runs to complete..."
55+
56+
wait
57+
58+
echo "=========================================="
59+
echo "All $NUM_RUNS runs completed!"
60+
echo "=========================================="
61+
62+
echo "Calculating average scores..."
63+
uv run python -c "from src.utils.old.calculate_average_score import main; main('$RESULTS_DIR')"
64+
65+
echo "=========================================="
66+
echo "Multiple runs evaluation completed!"
67+
echo "Check results in: $RESULTS_DIR"
68+
echo "Check individual run logs: $RESULTS_DIR/run_*_output.log"
69+
echo "=========================================="
70+

0 commit comments

Comments
 (0)