Merge pull request #5 from MiroMindAI/patch_pengxiang

BinWang28 · web-flow · commit 7d4336a773d8 · 2026-01-24T16:41:18.000+08:00
feat(tool): integrate two key tools from mirothinker
diff --git a/config/__init__.py b/config/__init__.py
@@ -20,6 +20,13 @@ def load_config(config_path: str, *overrides) -> omegaconf.DictConfig:
     if config_name.endswith((".yaml", ".yml")):
         config_name = os.path.splitext(config_name)[0]
     
+    # Check if output_dir is explicitly specified in overrides
+    output_dir_override = None
+    for override in overrides:
+        if override.startswith("output_dir="):
+            output_dir_override = override.split("=", 1)[1]
+            break
+    
     # Load and resolve configuration
     hydra.initialize_config_dir(
         config_dir=str(pathlib.Path(__file__).parent.absolute()), 
@@ -28,10 +35,16 @@ def load_config(config_path: str, *overrides) -> omegaconf.DictConfig:
     cfg = hydra.compose(config_name=config_name, overrides=list(overrides))
     cfg = omegaconf.OmegaConf.create(omegaconf.OmegaConf.to_container(cfg, resolve=True))
 
-    # Create timestamped output directory
-    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
-    output_dir = pathlib.Path(cfg.output_dir) / f"{config_name}_{timestamp}"
-    output_dir.mkdir(parents=True, exist_ok=True)
-    cfg.output_dir = str(output_dir)
+    # Create timestamped output directory only if output_dir was not explicitly specified
+    if output_dir_override is None:
+        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+        output_dir = pathlib.Path(cfg.output_dir) / f"{config_name}_{timestamp}"
+        output_dir.mkdir(parents=True, exist_ok=True)
+        cfg.output_dir = str(output_dir)
+    else:
+        # Use the explicitly specified output_dir directly
+        output_dir = pathlib.Path(cfg.output_dir)
+        output_dir.mkdir(parents=True, exist_ok=True)
+        cfg.output_dir = str(output_dir)
 
     return cfg
diff --git a/config/agent_gaia-validation-text-only_mirothinker_single_agent_new_tools.yaml b/config/agent_gaia-validation-text-only_mirothinker_single_agent_new_tools.yaml
@@ -0,0 +1,37 @@
+defaults:
+  - benchmark: gaia-validation-text-only
+  - override hydra/job_logging: none
+  - _self_  # Allow defining variables at the top of this file
+
+entrypoint: main_agent
+main_agent:
+  name: main_agent
+  type: IterativeAgentWithTool                           
+  llm: 
+    _base_: config/llm/base_mirothinker.yaml
+  prompt: config/prompts/prompt_main_agent.yaml
+  tools:
+    - config/tool/tool-search-and-scrape-webpage.yaml
+    - config/tool/tool-jina-scrape-llm-summary.yaml
+    - config/tool/tool-code.yaml
+  input_processor:
+    - ${input-message-generator}
+  output_processor:
+    - ${output-summary}
+    - ${output-final-answer-extraction}
+  
+input-message-generator:
+  type: InputMessageGenerator
+output-summary:
+  type: SummaryGenerator
+output-final-answer-extraction:
+  type: FinalAnswerExtractor
+  prompt: config/prompts/prompt_final_answer_extraction.yaml
+  llm:
+    _base_: config/llm/base_mirothinker.yaml
+
+
+output_dir: logs/
+data_dir: "${oc.env:DATA_DIR,data}"  # Points to where data is stored
+
+
diff --git a/config/tool/tool-jina-scrape-llm-summary.yaml b/config/tool/tool-jina-scrape-llm-summary.yaml
@@ -0,0 +1,13 @@
+name: "jina_scrape_llm_summary"
+tool_command: "python"
+args:
+  - "-m"
+  - "src.tool.mcp_servers.jina_scrape_llm_summary_mcp_server"
+env:
+# Jina and LLM API keys - these values will be loaded from the .env file at runtime
+  JINA_API_KEY: "${oc.env:JINA_API_KEY}"
+  JINA_BASE_URL: "${oc.env:JINA_BASE_URL,https://r.jina.ai}"
+  SUMMARY_LLM_BASE_URL: "${oc.env:SUMMARY_LLM_BASE_URL}"
+  SUMMARY_LLM_MODEL_NAME: "${oc.env:SUMMARY_LLM_MODEL_NAME}"
+  SUMMARY_LLM_API_KEY: "${oc.env:SUMMARY_LLM_API_KEY}"
+
diff --git a/config/tool/tool-search-and-scrape-webpage.yaml b/config/tool/tool-search-and-scrape-webpage.yaml
@@ -0,0 +1,13 @@
+name: "search_and_scrape_webpage"
+tool_command: "python"
+args:
+  - "-m"
+  - "src.tool.mcp_servers.search_and_scrape_webpage_mcp_server"
+env:
+# Search API keys - these values will be loaded from the .env file at runtime
+  SERPER_API_KEY: "${oc.env:SERPER_API_KEY}"
+  SERPER_BASE_URL: "${oc.env:SERPER_BASE_URL,https://google.serper.dev}"
+  # Temporarily disabled: Sogou search functionality
+  # TENCENTCLOUD_SECRET_ID: "${oc.env:TENCENTCLOUD_SECRET_ID}"
+  # TENCENTCLOUD_SECRET_KEY: "${oc.env:TENCENTCLOUD_SECRET_KEY}"
+
diff --git a/scripts/pengxiang_run_evaluate_multiple_runs_mirothinker_gaia-validation-text-only.sh b/scripts/pengxiang_run_evaluate_multiple_runs_mirothinker_gaia-validation-text-only.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+# SPDX-FileCopyrightText: 2025 MiromindAI
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# Configuration parameters
+NUM_RUNS=3
+BENCHMARK_NAME="gaia-validation-text-only"
+AGENT_SET="agent_gaia-validation-text-only_mirothinker_single_agent_new_tools"
+MAX_CONCURRENT=25
+
+# Set results directory with timestamp
+TIMESTAMP=$(date +%Y%m%d_%H%M)
+RESULTS_DIR=${RESULTS_DIR:-"logs/${BENCHMARK_NAME}/${AGENT_SET}_${TIMESTAMP}"}
+
+echo "Starting $NUM_RUNS runs of the evaluation..."
+echo "Results will be saved in: $RESULTS_DIR"
+
+# Create results directory
+mkdir -p "$RESULTS_DIR"
+
+for i in $(seq 1 $NUM_RUNS); do
+    echo "=========================================="
+    echo "Launching experiment $i/$NUM_RUNS"
+    echo "=========================================="
+    
+    RUN_ID="run_$i"
+    
+    (
+        uv run test_benchmark.py \
+            --config-path config/${AGENT_SET}.yaml \
+            benchmark.execution.max_concurrent=$MAX_CONCURRENT \
+            output_dir="$RESULTS_DIR/$RUN_ID" \
+            > "$RESULTS_DIR/${RUN_ID}_output.log" 2>&1
+        
+        if [ $? -eq 0 ]; then
+            echo "Run $i completed successfully"
+            RESULT_FILE=$(find "${RESULTS_DIR}/$RUN_ID" -name "*accuracy.txt" 2>/dev/null | head -1)
+            if [ -f "$RESULT_FILE" ]; then
+                echo "Results saved to $RESULT_FILE"
+            else
+                echo "Warning: Result file not found for run $i"
+            fi
+        else
+            echo "Run $i failed!"
+        fi
+    ) &
+    
+    sleep 2
+done
+
+echo "All $NUM_RUNS runs have been launched in parallel"
+echo "Waiting for all runs to complete..."
+
+wait
+
+echo "=========================================="
+echo "All $NUM_RUNS runs completed!"
+echo "=========================================="
+
+echo "Calculating average scores..."
+uv run python -c "from src.utils.old.calculate_average_score import main; main('$RESULTS_DIR')"
+
+echo "=========================================="
+echo "Multiple runs evaluation completed!"
+echo "Check results in: $RESULTS_DIR"
+echo "Check individual run logs: $RESULTS_DIR/run_*_output.log"
+echo "==========================================" 
+
diff --git a/src/tool/mcp_servers/jina_scrape_llm_summary_mcp_server.py b/src/tool/mcp_servers/jina_scrape_llm_summary_mcp_server.py
diff --git a/src/tool/mcp_servers/search_and_scrape_webpage_mcp_server.py b/src/tool/mcp_servers/search_and_scrape_webpage_mcp_server.py