diff --git a/README.md b/README.md index 3c30b930..057d8ed4 100644 --- a/README.md +++ b/README.md @@ -88,7 +88,7 @@ cp .env.template .env # Edit .env and add your OPENROUTER_API_KEY # 3. Run your first agent -uv run main.py trace --config_file_name=agent_quickstart_1 --task="What is the first country listed in the XLSX file that have names starting with Co?" --task_file_name="data/FSI-2023-DOWNLOAD.xlsx" +uv run main.py trace --config_file_name=agent_quickstart_reading --task="What is the first country listed in the XLSX file that have names starting with Co?" --task_file_name="data/FSI-2023-DOWNLOAD.xlsx" ``` ๐ŸŽ‰ **Expected Output:** Your agent should return **\boxed{Congo Democratic Republic}** ๐Ÿ˜Š diff --git a/README_ja.md b/README_ja.md index 61bc0011..335c83f6 100644 --- a/README_ja.md +++ b/README_ja.md @@ -86,7 +86,7 @@ cp .env.template .env # .env ใ‚’็ทจ้›†ใ—ใฆ OPENROUTER_API_KEY ใ‚’่ฟฝๅŠ  # 3. ๆœ€ๅˆใฎใ‚จใƒผใ‚ธใ‚งใƒณใƒˆใ‚’ๅฎŸ่กŒ -uv run main.py trace --config_file_name=agent_quickstart_1 --task="What is the first country listed in the XLSX file that have names starting with Co?" --task_file_name="data/FSI-2023-DOWNLOAD.xlsx" +uv run main.py trace --config_file_name=agent_quickstart_reading --task="What is the first country listed in the XLSX file that have names starting with Co?" --task_file_name="data/FSI-2023-DOWNLOAD.xlsx" ``` ๐ŸŽ‰ **ๆƒณๅฎšๅ‡บๅŠ›**: ใ‚จใƒผใ‚ธใ‚งใƒณใƒˆใฏ **\boxed{Congo Democratic Republic}** ใ‚’่ฟ”ใ™ใฏใšใงใ™ ๐Ÿ˜Š diff --git a/README_zh.md b/README_zh.md index 156cac66..769b4a67 100644 --- a/README_zh.md +++ b/README_zh.md @@ -86,7 +86,7 @@ cp .env.template .env # ็ผ–่พ‘ .env ๅนถๆทปๅŠ ๆ‚จ็š„ OPENROUTER_API_KEY # 3. ่ฟ่กŒๆ‚จ็š„็ฌฌไธ€ไธชๆ™บ่ƒฝไฝ“ -uv run main.py trace --config_file_name=agent_quickstart_1 --task="What is the first country listed in the XLSX file that have names starting with Co?" --task_file_name="data/FSI-2023-DOWNLOAD.xlsx" +uv run main.py trace --config_file_name=agent_quickstart_reading --task="What is the first country listed in the XLSX file that have names starting with Co?" --task_file_name="data/FSI-2023-DOWNLOAD.xlsx" ``` ๐ŸŽ‰ **้ข„ๆœŸ่พ“ๅ‡บ**: ๆ‚จ็š„ๆ™บ่ƒฝไฝ“ๅบ”่ฏฅ่ฟ”ๅ›ž **\boxed{Congo Democratic Republic}** ๐Ÿ˜Š diff --git a/config/agent_quickstart_1.yaml b/config/agent_quickstart_reading.yaml similarity index 64% rename from config/agent_quickstart_1.yaml rename to config/agent_quickstart_reading.yaml index 076daf13..e0161624 100644 --- a/config/agent_quickstart_1.yaml +++ b/config/agent_quickstart_reading.yaml @@ -22,7 +22,8 @@ main_agent: keep_tool_result: -1 oai_tool_thinking: false - tool_config: [] + tool_config: + - tool-reading max_turns: -1 # Maximum number of turns for main agent execution max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn @@ -40,30 +41,7 @@ main_agent: chinese_context: "${oc.env:CHINESE_CONTEXT,false}" -sub_agents: - agent-worker: - prompt_class: SubAgentWorkerPrompt - llm: - provider_class: "ClaudeOpenRouterClient" - model_name: "anthropic/claude-3.7-sonnet" - async_client: true - temperature: 0.3 - top_p: 0.95 - min_p: 0.0 - top_k: -1 - max_tokens: 32000 - openrouter_api_key: "${oc.env:OPENROUTER_API_KEY,???}" - openrouter_base_url: "${oc.env:OPENROUTER_BASE_URL,https://openrouter.ai/api/v1}" - openrouter_provider: "anthropic" - disable_cache_control: false - keep_tool_result: -1 - oai_tool_thinking: false - - tool_config: - - tool-reading - - max_turns: -1 # Maximum number of turns for main agent execution - max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn +sub_agents: null # Can define some top-level or default parameters here diff --git a/config/agent_quickstart_search.yaml b/config/agent_quickstart_search.yaml new file mode 100644 index 00000000..96df9e5f --- /dev/null +++ b/config/agent_quickstart_search.yaml @@ -0,0 +1,50 @@ +defaults: + - benchmark: gaia-validation + - override hydra/job_logging: none + - _self_ # Allow defining variables at the top of this file + + +main_agent: + prompt_class: MainAgentPromptBoxedAnswer + llm: + provider_class: "ClaudeOpenRouterClient" + model_name: "anthropic/claude-3.7-sonnet" + async_client: true + temperature: 0.3 + top_p: 0.95 + min_p: 0.0 + top_k: -1 + max_tokens: 32000 + openrouter_api_key: "${oc.env:OPENROUTER_API_KEY,???}" + openrouter_base_url: "${oc.env:OPENROUTER_BASE_URL,https://openrouter.ai/api/v1}" + openrouter_provider: "anthropic" + disable_cache_control: false + keep_tool_result: -1 + oai_tool_thinking: false + + tool_config: + - tool-searching-serper + + max_turns: -1 # Maximum number of turns for main agent execution + max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn + + input_process: + hint_generation: false + hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}" + output_process: + final_answer_extraction: false + final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}" + + openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction + add_message_id: true + keep_tool_result: -1 + chinese_context: "${oc.env:CHINESE_CONTEXT,false}" + + +sub_agents: null + + +# Can define some top-level or default parameters here +output_dir: logs/ +data_dir: "${oc.env:DATA_DIR,data}" # Points to where data is stored + diff --git a/config/agent_quickstart_single.yaml b/config/agent_quickstart_single_agent.yaml similarity index 94% rename from config/agent_quickstart_single.yaml rename to config/agent_quickstart_single_agent.yaml index 8bff69a8..068da69d 100644 --- a/config/agent_quickstart_single.yaml +++ b/config/agent_quickstart_single_agent.yaml @@ -23,12 +23,8 @@ main_agent: oai_tool_thinking: false tool_config: - - tool-reasoning-os - - tool-searching - - tool-image-video-os - tool-reading - - tool-code - - tool-audio-os + - tool-searching max_turns: -1 # Maximum number of turns for main agent execution max_tool_calls_per_turn: 10 # Maximum number of tool calls per turn diff --git a/config/tool/tool-serper-search.yaml b/config/tool/tool-searching-serper.yaml similarity index 67% rename from config/tool/tool-serper-search.yaml rename to config/tool/tool-searching-serper.yaml index b044896f..6fa4c221 100644 --- a/config/tool/tool-serper-search.yaml +++ b/config/tool/tool-searching-serper.yaml @@ -1,8 +1,8 @@ -name: "tool-serper-search" +name: "tool-searching-serper" tool_command: "npx" args: - "-y" - "serper-search-scrape-mcp-server" env: # Search API key - this value will be loaded from the .env file at runtime - SERPER_API_KEY: "${oc.env:SERPER_API_KEY}" \ No newline at end of file + SERPER_API_KEY: "${oc.env:SERPER_API_KEY}" diff --git a/docs/mkdocs/docs/contribute_benchmarks.md b/docs/mkdocs/docs/contribute_benchmarks.md index 64152a66..2f09dc21 100644 --- a/docs/mkdocs/docs/contribute_benchmarks.md +++ b/docs/mkdocs/docs/contribute_benchmarks.md @@ -148,7 +148,7 @@ Start with a small subset to verify everything works correctly: ```bash title="Test Benchmark Integration" uv run main.py common-benchmark \ - --config_file_name=agent_quickstart_1 \ + --config_file_name=agent_quickstart_reading \ benchmark=your-benchmark \ benchmark.execution.max_tasks=3 \ output_dir="logs/test-your-benchmark/$(date +"%Y%m%d_%H%M")" @@ -160,7 +160,7 @@ Once testing passes, run the complete benchmark: ```bash title="Run Full Benchmark" uv run main.py common-benchmark \ - --config_file_name=agent_quickstart_1 \ + --config_file_name=agent_quickstart_reading \ benchmark=your-benchmark \ output_dir="logs/your-benchmark/$(date +"%Y%m%d_%H%M")" ``` diff --git a/docs/mkdocs/docs/futurex.md b/docs/mkdocs/docs/futurex.md index 080ec6f5..eec78603 100644 --- a/docs/mkdocs/docs/futurex.md +++ b/docs/mkdocs/docs/futurex.md @@ -72,10 +72,10 @@ OPENAI_BASE_URL="https://api.openai.com/v1" ### Step 3: Run the Evaluation !!! example "Evaluation Execution" - Execute the following command to run evaluation on the Futurex-Online dataset. This uses the basic `agent_quickstart_1` configuration for quick start purposes. + Execute the following command to run evaluation on the Futurex-Online dataset. This uses the basic `agent_quickstart_reading` configuration for quick start purposes. ```bash title="Run Futurex-Online Evaluation" -uv run main.py common-benchmark --config_file_name=agent_quickstart_1 benchmark=futurex output_dir="logs/futurex/$(date +"%Y%m%d_%H%M")" +uv run main.py common-benchmark --config_file_name=agent_quickstart_reading benchmark=futurex output_dir="logs/futurex/$(date +"%Y%m%d_%H%M")" ``` !!! tip "Progress Monitoring and Resume" @@ -88,7 +88,7 @@ uv run main.py common-benchmark --config_file_name=agent_quickstart_1 benchmark= If you need to resume an interrupted evaluation, specify the same output directory to continue from where you left off. ```bash title="Resume Evaluation, e.g." - uv run main.py common-benchmark --config_file_name=agent_quickstart_1 benchmark=futurex output_dir="logs/futurex/20250918_1010" + uv run main.py common-benchmark --config_file_name=agent_quickstart_reading benchmark=futurex output_dir="logs/futurex/20250918_1010" ``` ### Step 4: Extract Results @@ -184,13 +184,13 @@ Check the generated files for voting analysis: ```bash title="Check Voting Results" # View submission file with voting results -cat logs/futurex/agent_quickstart_1_*/futurex_submission.jsonl +cat logs/futurex/agent_quickstart_reading_*/futurex_submission.jsonl # Check individual run results -ls logs/futurex/agent_quickstart_1_*/run_*/ +ls logs/futurex/agent_quickstart_reading_*/run_*/ # Check progress and voting statistics -uv run python utils/progress_check/check_futurex_progress.py logs/futurex/agent_quickstart_1_* +uv run python utils/progress_check/check_futurex_progress.py logs/futurex/agent_quickstart_reading_* ``` ### Manual Voting Aggregation @@ -199,13 +199,13 @@ You can also manually run the voting aggregation: ```bash title="Manual Voting Aggregation" # Aggregate multiple runs with majority voting -uv run python utils/extract_futurex_results.py logs/futurex/agent_quickstart_1_* --aggregate +uv run python utils/extract_futurex_results.py logs/futurex/agent_quickstart_reading_* --aggregate # Force single run mode (if needed) -uv run python utils/extract_futurex_results.py logs/futurex/agent_quickstart_1_*/run_1 --single +uv run python utils/extract_futurex_results.py logs/futurex/agent_quickstart_reading_*/run_1 --single # Specify custom output file -uv run python utils/extract_futurex_results.py logs/futurex/agent_quickstart_1_* -o my_voted_predictions.jsonl +uv run python utils/extract_futurex_results.py logs/futurex/agent_quickstart_reading_* -o my_voted_predictions.jsonl ``` ### Voting Output Format @@ -249,7 +249,7 @@ For example, `"vote_counts": {"No": 2}` means 2 out of 2 runs predicted "No", in After running multiple evaluations, you'll find the following structure: ``` -logs/futurex/agent_quickstart_1_YYYYMMDD_HHMM/ +logs/futurex/agent_quickstart_reading_YYYYMMDD_HHMM/ โ”œโ”€โ”€ futurex_submission.jsonl # Final voted predictions โ”œโ”€โ”€ run_1/ # First run results โ”‚ โ”œโ”€โ”€ benchmark_results.jsonl # Individual task results diff --git a/docs/mkdocs/docs/quickstart.md b/docs/mkdocs/docs/quickstart.md index cdd55000..8f502b70 100644 --- a/docs/mkdocs/docs/quickstart.md +++ b/docs/mkdocs/docs/quickstart.md @@ -1,7 +1,6 @@ - # ๐Ÿš€ Get Started in Under 5 Minutes -Clone the repository, configure your API key, and run your first intelligent agent. You'll just need one `OPENROUTER_API_KEY`. +Clone the repository, configure your API keys, and run your first intelligent agent. MiroFlow provides multiple pre-configured agents for different use cases. --- @@ -14,52 +13,139 @@ Clone the repository, configure your API key, and run your first intelligent age --- -## โšก Quick Setup - -### Example 1: Intelligent document analysis with file processing capabilities +## ๐ŸŽฏ Example 1: Document Analysis !!! example "File Processing Demo" - This example demonstrates MiroFlow's document analysis capabilities. + Analyze structured data files (Excel, CSV, PDF, etc.) with intelligent document processing. + + **Required:** [OPENROUTER_API_KEY](https://openrouter.ai/): to access Claude 3.7 Sonnet -```bash title="Setup Commands" +```bash title="Setup and Run Document Analysis" # 1. Clone and setup git clone https://github.com/MiroMindAI/MiroFlow && cd MiroFlow uv sync -# 2. Configure API key +# 2. Configure API key (REQUIRED for LLM access) cp .env.template .env # Edit .env and add your OPENROUTER_API_KEY +# This key is necessary to access Claude 3.7 Sonnet for document analysis -# 3. Run your first agent -uv run main.py trace --config_file_name=agent_quickstart_1 --task="What is the first country listed in the XLSX file that have names starting with Co?" --task_file_name="data/FSI-2023-DOWNLOAD.xlsx" +# 3. Run document analysis +uv run main.py trace --config_file_name=agent_quickstart_reading --task="What is the first country listed in the XLSX file that have names starting with Co?" --task_file_name="data/FSI-2023-DOWNLOAD.xlsx" ``` +**What this does:** + +- Uses the `tool-reading` capability to process Excel files +- Leverages Claude 3.7 Sonnet (via OpenRouter API) for intelligent analysis +- Finds countries starting with "Co" and returns the first one + !!! success "Expected Output" - ๐ŸŽ‰ **Expected Output:** Your agent should return **\boxed{Congo Democratic Republic}** ๐Ÿ˜Š + ๐ŸŽ‰ **Expected Output:** Your agent should return **\boxed{Congo Democratic Republic}** + +--- + +## ๐ŸŽฏ Example 2: Web Search Analysis + +!!! example "Real-time Web Research" + Search the web for current information and get intelligent analysis of the results. + + **Required:** [OPENROUTER_API_KEY](https://openrouter.ai/) and [SERPER_API_KEY](https://serper.dev/) -!!! tip "Troubleshooting" - **๐Ÿ’ก Tip:** If you encounter issues, check that your API key is correctly set in the `.env` file and that all dependencies are installed. +```bash title="Setup and Run Web Search" +# 1. Clone and setup (if not done already) +git clone https://github.com/MiroMindAI/MiroFlow && cd MiroFlow +uv sync -!!! note "Coming Soon" - **Coming Soon:** We will add a video demo for this example +# 2. Configure API keys (if not done already) +cp .env.template .env +# Edit .env and add your OPENROUTER_API_KEY and SERPER_API_KEY +# These keys are necessary to access Claude 3.7 Sonnet and web search capabilities + +# 3. Run web search analysis +uv run main.py trace --config_file_name=agent_quickstart_search --task="What is the current NASDAQ index price and what are the main factors affecting it today?" +``` + +**What this does:** + +- Uses the `tool-searching-serper` capability to search the web +- Leverages Claude 3.7 Sonnet (via OpenRouter API) for intelligent analysis +- Searches for current NASDAQ index information and market factors +- Provides real-time financial data analysis + +!!! success "Expected Output" + ๐ŸŽ‰ **Expected Output:** Current NASDAQ index price with analysis of key market factors affecting it --- -### Example 2: Web research and multi-agent orchestration +## ๐Ÿ”ง Configuration Options -!!! warning "Work in Progress" - The example is not complete yet, to be completed +### Available Agent Configurations -```bash title="Web Research Command" -uv run main.py trace --config_file_name=agent_quickstart_2 --task="What is the Nasdaq Composite Index at today?" +| Agent | Tools | Use Case | +|-------|-------|----------| +| `agent_quickstart_reading` | Document reading | File analysis, data extraction, document summarization | +| `agent_quickstart_search` | Web search | Real-time information, market data, current events | + +### Customizing Tasks + +You can customize any task by modifying the `--task` parameter: + +```bash +# Analyze different files +uv run main.py trace --config_file_name=agent_quickstart_reading \ + --task="Summarize the main findings in this document" \ + --task_file_name="path/to/your/document.pdf" + +# Search for different information +uv run main.py trace --config_file_name=agent_quickstart_search \ + --task="What are the latest developments in AI technology?" ``` -!!! note "Coming Soon" - **Coming Soon:** Web research and multi-agent orchestration example +--- + +## ๐Ÿ› Troubleshooting + +### Common Issues + +!!! warning "API Key Issues" + **Problem:** Agent fails to start or returns errors + **Solution:** Ensure your API keys are correctly set in the `.env` file: + ```bash + OPENROUTER_API_KEY=your_key_here + SERPER_API_KEY=your_key_here # For web search examples + ``` + + +!!! warning "Tool Execution Errors" + **Problem:** Tools fail to execute + **Solution:** Check that all dependencies are installed: + ```bash + uv sync # Reinstall dependencies + ``` + +### Getting Help + +- Check the [FAQ section](faqs.md) for common questions +- Review the [YAML Configuration Guide](yaml_config.md) for advanced setup +- Explore [Tool Documentation](tool_overview.md) for available capabilities --- -!!! info "Documentation Info" - **Last Updated:** Sep 2025 ยท **Doc Contributor:** Team @ MiroMind AI +## ๐Ÿš€ Next Steps + +Once you've tried the examples above, explore more advanced features: +1. **Custom Agent Configuration**: Create your own agent setups + ```bash + # Copy and modify existing configs + cp config/agent_quickstart_reading.yaml config/my_custom_agent.yaml + ``` +2. **Tool Development**: Add custom tools for your specific needs + - See [Contributing Tools](contribute_tools.md) guide + +--- + +!!! info "Documentation Info" + **Last Updated:** October 2025 ยท **Doc Contributor:** Team @ MiroMind AI \ No newline at end of file diff --git a/docs/mkdocs/docs/yaml_config.md b/docs/mkdocs/docs/yaml_config.md index 1ce8917d..693cde80 100644 --- a/docs/mkdocs/docs/yaml_config.md +++ b/docs/mkdocs/docs/yaml_config.md @@ -30,7 +30,7 @@ uv run main.py common-benchmark \ **Single Task** ```bash uv run main.py trace \ - --config_file_name=agent_quickstart_1 \ + --config_file_name=agent_quickstart_reading \ --task="Your task here" \ --task_file_name="data/file.xlsx" ``` @@ -174,7 +174,7 @@ CHINESE_CONTEXT="false" ## Best Practices !!! success "Quick Tips" - - **Start simple**: Use `agent_quickstart_1.yaml` as a base + - **Start simple**: Use `agent_quickstart_reading.yaml` as a base - **Tool selection**: Choose tools based on your task requirements - **API keys**: Always use environment variables, never hardcode - **Resource limits**: Set `max_concurrent` and `max_tokens` appropriately diff --git a/scripts/run_evaluate_multiple_runs_futurex.sh b/scripts/run_evaluate_multiple_runs_futurex.sh index c441696d..ce54e1bf 100755 --- a/scripts/run_evaluate_multiple_runs_futurex.sh +++ b/scripts/run_evaluate_multiple_runs_futurex.sh @@ -5,16 +5,16 @@ # SPDX-License-Identifier: Apache-2.0 # Multiple runs FutureX evaluation script -# Based on the working command: uv run main.py common-benchmark --config_file_name=agent_quickstart_1 benchmark=futurex output_dir=logs/futurex-test +# Based on the working command: uv run main.py common-benchmark --config_file_name=agent_quickstart_reading benchmark=futurex output_dir=logs/futurex-test # Configuration parameters NUM_RUNS=${NUM_RUNS:-3} MAX_TASKS=${MAX_TASKS:-null} MAX_CONCURRENT=${MAX_CONCURRENT:-5} BENCHMARK_NAME="futurex" -AGENT_SET=${AGENT_SET:-"agent_quickstart_1"} +AGENT_SET=${AGENT_SET:-"agent_quickstart_reading"} -# TODO: Add more settings like message ID and max turns, currently not supported using agent_quickstart_1 +# TODO: Add more settings like message ID and max turns, currently not supported using agent_quickstart_reading # ADD_MESSAGE_ID=${ADD_MESSAGE_ID:-"false"} # MAX_TURNS=${MAX_TURNS:-1} diff --git a/utils/trace_single_task.py b/utils/trace_single_task.py index 33d90837..8626e868 100644 --- a/utils/trace_single_task.py +++ b/utils/trace_single_task.py @@ -17,10 +17,6 @@ ) from omegaconf import DictConfig -import os - -LOGGER_LEVEL = os.getenv("LOGGER_LEVEL", "INFO") - async def single_task( cfg: DictConfig, @@ -75,6 +71,10 @@ def main( dotenv.load_dotenv() with hydra.initialize_config_dir(config_dir=config_path(), version_base=None): cfg = hydra.compose(config_name=chosen_config_name, overrides=list(args)) - logger = bootstrap_logger(level=LOGGER_LEVEL) + logger = bootstrap_logger(level="DEBUG", to_console=True) + + # Test if logger is working + logger.info("Logger initialized successfully") + # Tracing functionality removed - miroflow-contrib deleted asyncio.run(single_task(cfg, logger, str(task_id), task, task_file_name))