MiroMindAI
diff --git a/‎.env.template‎
Lines changed: 72 additions & 16 deletions b/‎.env.template‎
Lines changed: 72 additions & 16 deletions
diff --git a/‎.github/workflows/run-ruff.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/run-ruff.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 1 deletion b/‎.gitignore‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 69 additions & 11 deletions b/‎README.md‎
Lines changed: 69 additions & 11 deletions
diff --git a/‎config/agent_quickstart.yaml‎
Lines changed: 4 additions & 4 deletions b/‎config/agent_quickstart.yaml‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎config/agent_quickstart_graph.yaml‎
Lines changed: 6 additions & 6 deletions b/‎config/agent_quickstart_graph.yaml‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎config/agent_quickstart_skill.yaml‎
Lines changed: 3 additions & 3 deletions b/‎config/agent_quickstart_skill.yaml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎config/agent_single-test.yaml‎
Lines changed: 2 additions & 2 deletions b/‎config/agent_single-test.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎…ndard_browsecomp-en-200_mirothinker.yaml‎ ‎…hmark_browsecomp-en-200_mirothinker.yaml‎config/standard_browsecomp-en-200_mirothinker.yaml renamed to config/benchmark_browsecomp-en-200_mirothinker.yaml
Lines changed: 2 additions & 2 deletions b/‎…ndard_browsecomp-en-200_mirothinker.yaml‎ ‎…hmark_browsecomp-en-200_mirothinker.yaml‎config/standard_browsecomp-en-200_mirothinker.yaml renamed to config/benchmark_browsecomp-en-200_mirothinker.yaml
Lines changed: 2 additions & 2 deletions
diff --git a/‎…/standard_browsecomp-en_mirothinker.yaml‎ ‎…benchmark_browsecomp-en_mirothinker.yaml‎config/standard_browsecomp-en_mirothinker.yaml renamed to config/benchmark_browsecomp-en_mirothinker.yaml
Lines changed: 2 additions & 2 deletions b/‎…/standard_browsecomp-en_mirothinker.yaml‎ ‎…benchmark_browsecomp-en_mirothinker.yaml‎config/standard_browsecomp-en_mirothinker.yaml renamed to config/benchmark_browsecomp-en_mirothinker.yaml
Lines changed: 2 additions & 2 deletions
@@ -1,23 +1,79 @@
 
-# Must have for minimal agent get started
-OPENROUTER_API_KEY=xxxx
+# ============================================================
+# MiroFlow Environment Configuration Template
+# ============================================================
+# Copy this file to .env and fill in the values.
+# Lines starting with # are optional or have defaults.
+# ============================================================
 
-SERPER_API_KEY=xxxx
-JINA_API_KEY=xxxx
-E2B_API_KEY=xxxx
 
-OPENAI_API_KEY=xxxx
-OPENAI_BASE_URL=xxxx
+# ------ Core LLM (OpenAI-compatible, required) ------
+OPENAI_API_KEY=
+OPENAI_BASE_URL=
 
-OAI_MIROTHINKER_BASE_URL=xxxx
-OAI_MIROTHINKER_API_KEY=xxxx
+# ------ MiroThinker ------
+OAI_MIROTHINKER_BASE_URL=
+OAI_MIROTHINKER_API_KEY=
 
-SUMMARY_LLM_BASE_URL=xxxx
-SUMMARY_LLM_API_KEY=xxxx
-SUMMARY_LLM_MODEL_NAME=xxxx
+# ------ Summary LLM (used by jina_scrape) ------
+SUMMARY_LLM_BASE_URL=
+SUMMARY_LLM_API_KEY=
+SUMMARY_LLM_MODEL_NAME=
 
-HF_TOKEN=xxxx
+# ------ Search: Serper (used by searching/reading MCP servers) ------
+SERPER_API_KEY=
+# SERPER_BASE_URL=              # Optional: override default Serper endpoint
 
-# TencentCloud credentials for Sogou search (used by serper_sogou_search tool)
-TENCENTCLOUD_SECRET_ID=xxxx
-TENCENTCLOUD_SECRET_KEY=xxxx
+# ------ Search: Jina (used by searching/reading MCP servers) ------
+JINA_API_KEY=
+# JINA_BASE_URL=                # Optional: override default Jina endpoint
+
+# ------ Search: TencentCloud Sogou (used by serper_sogou_search) ------
+# TENCENTCLOUD_SECRET_ID=
+# TENCENTCLOUD_SECRET_KEY=
+
+# ------ Code Sandbox: E2B ------
+E2B_API_KEY=
+
+# ------ Vision MCP Server ------
+# Supports multiple providers; enable the ones you need.
+# ENABLE_OPENAI_VISION=true     # Uses OPENAI_API_KEY / OPENAI_BASE_URL above
+# OPENAI_MODEL_NAME=            # Model name for OpenAI vision
+# ENABLE_CLAUDE_VISION=true
+# ANTHROPIC_API_KEY=
+# ANTHROPIC_BASE_URL=
+# ANTHROPIC_MODEL_NAME=
+# GEMINI_API_KEY=
+# GEMINI_MODEL_NAME=
+
+# ------ Vision MCP Server (Open-Source alternative) ------
+# VISION_API_KEY=
+# VISION_BASE_URL=
+# VISION_MODEL_NAME=
+
+# ------ Reasoning MCP Server ------
+# Uses OPENAI / ANTHROPIC keys above by default.
+# OPENAI_MODEL_NAME is shared with vision.
+
+# ------ Reasoning MCP Server (Open-Source alternative) ------
+# REASONING_API_KEY=
+# REASONING_BASE_URL=
+# REASONING_MODEL_NAME=
+
+# ------ Audio MCP Server ------
+# Uses OPENAI_API_KEY / OPENAI_BASE_URL above by default.
+# OPENAI_AUDIO_MODEL_NAME=
+# OPENAI_TRANSCRIPTION_MODEL_NAME=
+
+# ------ Audio MCP Server (Open-Source alternative) ------
+# WHISPER_API_KEY=
+# WHISPER_BASE_URL=
+# WHISPER_MODEL_NAME=
+
+# ------ Hugging Face ------
+# HF_TOKEN=                     # Optional: for downloading benchmark datasets
+
+# ------ Web App ------
+# MIROFLOW_HOST=0.0.0.0
+# MIROFLOW_PORT=8000
+# MIROFLOW_DEBUG=false
@@ -26,7 +26,7 @@ jobs:
       uses: astral-sh/setup-uv@v5
 
     - name: Install dependencies
-      run: uv sync
+      run: uv sync --extra dev
 
     - name: Check static error
       run: |
 
@@ -136,6 +136,7 @@ celerybeat.pid
 
 # Environments
 .env
+.env.*
 .envrc
 .venv
 env/
@@ -229,4 +230,4 @@ web_app/uploads/
 .vscode/
 .ruff_cache/
 .env
-.env.local
+.env.*
@@ -1,7 +1,7 @@
 <div align="center">
   <img src="docs/mkdocs/docs/assets/miroflow_logo.png" width="45%" alt="MiroFlow" />
 
-  <h3>Open-Source Research Agent Framework with State-of-the-Art Performance</h3>
+  <h3>Performance-First Agent Framework That Makes Any Model Better</h3>
 
 [![DEMO](https://img.shields.io/badge/Demo-FFB300?style=for-the-badge&logo=airplayvideo&logoColor=white)](https://dr.miromind.ai/)
 [![MODELS](https://img.shields.io/badge/Models-5EDDD2?style=for-the-badge&logo=huggingface&logoColor=ffffff&labelColor)](https://huggingface.co/miromind-ai)
@@ -13,8 +13,8 @@
 </div>
 
 <div align="center">
-<strong>MiroFlow</strong> is an open-source research agent framework that achieves <strong>#1 ranking</strong> across representative benchmarks (FutureX, GAIA, HLE, xBench-DeepSearch, BrowseComp).<br>
-It powers <a href="https://github.com/MiroMindAI/mirothinker">MiroThinker</a>, our open-source agent foundation model with native tool-assisted reasoning.
+<strong>MiroFlow</strong> is the open-source agent framework that maximizes any model's agent performance — and proves it across 9+ benchmarks with reproducible results.<br>
+Plug in GPT-5, Claude, <a href="https://github.com/MiroMindAI/mirothinker">MiroThinker</a>, Kimi, DeepSeek, or any OpenAI-compatible model. Same tools. Same environment. Better results.
 </div>
 
 <br>
@@ -34,22 +34,61 @@ It powers <a href="https://github.com/MiroMindAI/mirothinker">MiroThinker</a>, o
 
 - **[2025-09-15]**: **MiroFlow v0.3**: Enhanced codebase architecture and significantly improved benchmark performance, boosting GPT-5's prediction accuracy for future events by 11%. MiroFlow now ranks #1 in the future prediction benchmark. See [FutureX](https://futurex-ai.github.io/).
 - **[2025-08-27]**: **MiroFlow v0.2**: Achieves state-of-the-art performance across [multiple agentic benchmarks](https://miromind.ai), including HLE (27.2%), HLE-Text-Only (29.5%), BrowserComp-EN (33.2%), BrowserComp-ZH (47.1%), and xBench-DeepSearch (72.0%).
-- **[2025-08-26]**: Released [GAIA Validation Trace](docs/public_trace.md) (73.94% pass@1) and [Gradio Demo](https://github.com/MiroMindAI/MiroThinker/tree/main/apps/gradio-demo) for local deployment.
+- **[2025-08-26]**: Released GAIA Validation Trace (73.94% pass@1) and [Gradio Demo](https://github.com/MiroMindAI/MiroThinker/tree/main/apps/gradio-demo) for local deployment.
 - **[2025-08-08]**: **MiroFlow v0.1**: Complete open-source release of the research agent framework.
 
 </details>
 
 ---
 
-## Highlights
+## Architecture
 
-- **Reproducible State-of-the-Art Performance**: #1 ranking across [multiple representative agentic benchmarks](https://miromindai.github.io/miroflow/evaluation_overview/), including FutureX, GAIA, HLE, xBench-DeepSearch, and BrowseComp.
-- **High Concurrency & Reliability**: Robust concurrency management and fault-tolerant design for handling rate-limited APIs and unstable networks.
-- **Cost-Effective Deployment**: Run a research agent service on a single RTX 4090 with the open-source [MiroThinker](https://github.com/MiroMindAI/mirothinker) model and free tools.
+<div align="center">
+  <img src="docs/mkdocs/docs/assets/miroflow_architecture_v1.6.png" width="100%" alt="MiroFlow Architecture" />
+</div>
 
 ---
 
-## Performance on Benchmarks
+## Why MiroFlow
+
+### Make Any Model Better
+- **Model-Agnostic Performance**: Plug in any LLM — GPT-5, Claude, MiroThinker, Kimi K2.5, DeepSeek — and get better agent performance through smart rollback, iterative reasoning, and optimized tool orchestration.
+- **#1 Across 9+ Benchmarks**: Reproducible state-of-the-art on FutureX, GAIA, HLE, xBench-DeepSearch, BrowseComp, and more.
+- **One-Line Model Switching**: Change `provider_class` and `model_name` in YAML. Same tools, same prompts, same environment.
+
+### Prove It
+- **Standardized Evaluation**: Fair model comparison with identical infrastructure. The framework is the constant; the model is the variable.
+- **Automated Multi-Run Evaluation**: Parallel runs with statistical aggregation (mean, std dev, min/max). Every result reproducible from config to score.
+
+### Build With It
+- **Skill System**: Define agent skills via `SKILL.md` — no code changes needed.
+- **Agent Graph**: Compose multi-agent workflows with hierarchical graphs.
+- **Web Application**: FastAPI + React interface out of the box.
+- **Plugin Architecture**: `@register` decorator — extend without touching core code.
+- **Zero-Code Prompts**: YAML + Jinja2 templates.
+- **Cost-Effective**: Single RTX 4090 with open-source [MiroThinker](https://github.com/MiroMindAI/mirothinker).
+
+---
+
+## Any Model, Better Results
+
+### Cross-Model Performance (MiroFlow Framework)
+
+| Benchmark | MiroThinker | Claude 3.7 Sonnet | Kimi K2.5 |
+|-----------|-------------|-------------------|-----------|
+| GAIA Validation (165) | **82.4%** | 73.9% | — |
+| GAIA Text-Only (103) | **79.6%** | — | 52.4% |
+| HLE | **27.2%** | — | — |
+| HLE Text-Only | **29.5%** | — | — |
+| BrowseComp-EN | 33.2% | — | — |
+| BrowseComp-ZH | **47.1%** | — | — |
+| xBench-DeepSearch | **72.0%** | — | — |
+| FutureX | **#1** | — | — |
+
+> All results use the same MiroFlow tools, prompts, and infrastructure. The only variable is the model.
+> See the full [Model Comparison](https://miromindai.github.io/miroflow/model_comparison/) for details.
+
+### Featured Results: MiroThinker
 
 <div align="center">
   <img width="100%" alt="MiroThinker Performance" src="docs/mkdocs/docs/assets/mirothinker.png" />
@@ -59,7 +98,7 @@ It powers <a href="https://github.com/MiroMindAI/mirothinker">MiroThinker</a>, o
   <img width="100%" alt="BrowseComp MiroThinker Performance" src="docs/mkdocs/docs/assets/bc-mirothinker.png" />
 </div>
 
-Follow our detailed guides to reproduce benchmark results in our [Benchmarks Documentation](https://miromindai.github.io/miroflow/evaluation_overview/).
+Follow our detailed guides to reproduce any result in our [Benchmarks Documentation](https://miromindai.github.io/miroflow/evaluation_overview/).
 
 ---
 
@@ -83,6 +122,25 @@ bash scripts/test_single_task.sh \
 
 Expected output: `\boxed{Congo Democratic Republic}`
 
+**Switch models in one line** — same tools, same prompts, different LLM:
+
+```yaml
+# GPT-5
+llm:
+  provider_class: GPT5OpenAIClient
+  model_name: gpt-5
+
+# Claude 3.7 Sonnet
+llm:
+  provider_class: ClaudeAnthropicClient
+  model_name: claude-3-7-sonnet-20250219
+
+# MiroThinker (open-source, self-hosted)
+llm:
+  provider_class: MiroThinkerSGLangClient
+  model_name: mirothinker-v1.5
+```
+
 See [full documentation](https://miromindai.github.io/miroflow/quickstart/) for web app setup, more examples, and configuration options.
 
 ---
@@ -104,7 +162,7 @@ If you find our work helpful, please consider citing:
 **MiroFlow** (Framework)
 ```bibtex
 @misc{2026miroflow,
-  title={MiroFlow: A High-Performance Open-Source Research Agent Framework},
+  title={MiroFlow: A Performance-First Agent Framework for Any Model},
   author={MiroMind AI Team},
   howpublished={\url{https://github.com/MiroMindAI/miroflow}},
   year={2026}
 
@@ -21,13 +21,13 @@ main_agent:
     max_tokens: 128000
     reasoning_effort: medium
 
-  prompt: config/prompts/standard_prompt_main_agent.yaml
+  prompt: config/prompts/prompt_main_agent_benchmark.yaml
 
   tools:
     - config/tool/tool-reading.yaml
-    # - config/tool/tool-python.yaml
-    # - config/tool/tool-search-and-scrape-webpage.yaml
-    # - config/tool/tool-jina-scrape-llm-summary.yaml
+    # - config/tool/tool-code-sandbox.yaml
+    # - config/tool/tool-serper-search.yaml
+    # - config/tool/tool-jina-scrape.yaml
     #- config/tool/tool-code.yaml
     #- config/tool/tool-image-video.yaml
     # - config/tool/tool-audio.yaml     # Uncomment for audio processing
 
@@ -21,7 +21,7 @@ main_agent:
     max_tokens: 128000
     reasoning_effort: medium
 
-  prompt: config/prompts/standard_prompt_main_agent.yaml
+  prompt: config/prompts/prompt_main_agent_benchmark.yaml
 
   tools: null
 
@@ -76,13 +76,13 @@ agent-subagent-3:
     _base_: config/llm/base_mirothinker.yaml
   prompt: config/prompts/prompt_sub_agent.yaml
   tools:
-    - config/tool/tool-python.yaml
-    - config/tool/tool-search-and-scrape-webpage.yaml
-    - config/tool/tool-jina-scrape-llm-summary.yaml
+    - config/tool/tool-code-sandbox.yaml
+    - config/tool/tool-serper-search.yaml
+    - config/tool/tool-jina-scrape.yaml
   tool_blacklist:
-    - server: "tool-search-and-scrape-webpage"
+    - server: "tool-serper-search"
       tool: "sogou_search"
-    - server: "tool-python"
+    - server: "tool-code-sandbox"
       tool: "download_file_from_sandbox_to_local"
   input_processor:
     - ${input-message-generator}
 
@@ -26,13 +26,13 @@ main_agent:
     max_tokens: 128000
     reasoning_effort: medium
 
-  prompt: config/prompts/standard_prompt_main_agent.yaml
+  prompt: config/prompts/prompt_main_agent_benchmark.yaml
 
   tools: 
-    - config/tool/tool-python.yaml
+    - config/tool/tool-code-sandbox.yaml
 
   skills:
-    - src/skill/skills/simple_file_understanding
+    - miroflow/skill/skills/simple_file_understanding
 
   input_processor:
     - ${input-message-generator}
 
@@ -21,8 +21,8 @@ main_agent:
     - config/tool/tool-audio.yaml
     # - config/tool/tool-reasoning.yaml
   skills:
-    - src/skill/skills/Today_feeling
-    - src/skill/skills/Afternoon_feeling
+    - miroflow/skill/skills/Today_feeling
+    - miroflow/skill/skills/Afternoon_feeling
   input_processor:
     - ${input-hint-generator} 
     - ${input-message-generator}
 
@@ -11,7 +11,7 @@ main_agent:
   max_turns: 400
   llm:
     _base_: config/llm/base_mirothinker.yaml
-  prompt: config/prompts/standard_prompt_main_agent.yaml
+  prompt: config/prompts/prompt_main_agent_benchmark.yaml
   tools:
     - config/tool/tool-code-sandbox.yaml
     - config/tool/tool-serper-search.yaml
@@ -36,7 +36,7 @@ output-boxed-extractor:
   type: RegexBoxedExtractor
 output-exceed-max-turn-summary:
   type: ExceedMaxTurnSummaryGenerator
-  prompt: config/prompts/standard_prompt_main_agent.yaml
+  prompt: config/prompts/prompt_main_agent_benchmark.yaml
   llm:
     _base_: config/llm/base_mirothinker.yaml
 
 
@@ -11,7 +11,7 @@ main_agent:
   max_turns: 400
   llm:
     _base_: config/llm/base_mirothinker.yaml
-  prompt: config/prompts/standard_prompt_main_agent.yaml
+  prompt: config/prompts/prompt_main_agent_benchmark.yaml
   tools:
     - config/tool/tool-code-sandbox.yaml
     - config/tool/tool-serper-search.yaml
@@ -36,7 +36,7 @@ output-boxed-extractor:
   type: RegexBoxedExtractor
 output-exceed-max-turn-summary:
   type: ExceedMaxTurnSummaryGenerator
-  prompt: config/prompts/standard_prompt_main_agent.yaml
+  prompt: config/prompts/prompt_main_agent_benchmark.yaml
   llm:
     _base_: config/llm/base_mirothinker.yaml