Merge pull request #29 from MiroMindAI/patch_pengxiang

BinWang28 · web-flow · commit 70d934f4f6e9 · 2026-02-12T14:43:46.000+08:00
feat(agent): add initial support for agent graph and agent skills
diff --git a/.gitignore b/.gitignore
@@ -209,6 +209,7 @@ __marimo__/
 
 logs/
 tmp/
+thirdparty/
 
 data/*
 !data/README.md
diff --git a/config/agent_quickstart_graph.yaml b/config/agent_quickstart_graph.yaml
@@ -0,0 +1,111 @@
+# MiroFlow Gradio Demo Configuration
+# A simplified configuration for the Gradio web interface (using MiroThinker)
+
+# No benchmark defaults - this is a standalone config
+defaults:
+  - benchmark: example_dataset
+  - override hydra/job_logging: none
+  - _self_
+
+entrypoint: main_agent
+
+main_agent:
+  name: main_agent
+  type: IterativeAgentWithTool
+  max_turns: 30
+
+  llm:
+    _base_: config/llm/base_openai.yaml
+    provider_class: GPT5OpenAIClient
+    model_name: gpt-5
+    max_tokens: 128000
+    reasoning_effort: medium
+
+  prompt: config/prompts/standard_prompt_main_agent.yaml
+
+  tools: null
+
+  input_processor:
+    - ${input-message-generator}
+
+  output_processor:
+    - ${output-summary}
+    - ${output-boxed-extractor}
+
+  sub_agents:
+    agent-worker: ${agent-subagent-1}
+
+agent-subagent-1:
+  type: IterativeAgentWithTool     
+  name: agent-subagent-1
+  max_consecutive_rollbacks: 3
+  max_turns: 200
+  llm:
+    _base_: config/llm/base_mirothinker.yaml
+  prompt: config/prompts/prompt_sub_agent.yaml
+  tools: null
+  input_processor:
+    - ${input-message-generator}
+  output_processor:
+    - ${output-summary}
+  sub_agents:
+    agent-worker: ${agent-subagent-3}
+
+agent-subagent-2:
+  type: IterativeAgentWithTool     
+  name: agent-subagent-2
+  max_consecutive_rollbacks: 3
+  max_turns: 200
+  llm:
+    _base_: config/llm/base_mirothinker.yaml
+  prompt: config/prompts/prompt_sub_agent.yaml
+  tools: null
+  input_processor:
+    - ${input-message-generator}
+  output_processor:
+    - ${output-summary}
+  sub_agents:
+    agent-worker: ${agent-subagent-3}
+
+agent-subagent-3:
+  type: IterativeAgentWithTool     
+  name: agent-subagent-3
+  max_consecutive_rollbacks: 3
+  max_turns: 200
+  llm:
+    _base_: config/llm/base_mirothinker.yaml
+  prompt: config/prompts/prompt_sub_agent.yaml
+  tools:
+    - config/tool/tool-python.yaml
+    - config/tool/tool-search-and-scrape-webpage.yaml
+    - config/tool/tool-jina-scrape-llm-summary.yaml
+  tool_blacklist:
+    - server: "tool-search-and-scrape-webpage"
+      tool: "sogou_search"
+    - server: "tool-python"
+      tool: "download_file_from_sandbox_to_local"
+  input_processor:
+    - ${input-message-generator}
+  output_processor:
+    - ${output-summary}
+
+
+
+# Input processor: generates initial message from task description
+input-message-generator:
+  type: InputMessageGenerator
+
+# Output processor: summarizes conversation
+output-summary:
+  type: SummaryGenerator
+
+# Output processor: extracts final answer in a boxed format
+output-boxed-extractor:
+  type: RegexBoxedExtractor
+
+# Output directory for logs
+output_dir: logs
+data_dir: "${oc.env:DATA_DIR,data}"
+
+benchmark:
+  exceed_max_turn_summary: false
diff --git a/config/agent_quickstart_skill.yaml b/config/agent_quickstart_skill.yaml
@@ -0,0 +1,61 @@
+# MiroFlow Quickstart with Skill Configuration
+# A simple single-agent setup with the reading tool and simple_file_understanding skill.
+#
+# Usage:
+#   bash scripts/test_single_task.sh \
+#     --config config/agent_quickstart_skill.yaml \
+#     --task-question "What is the first country listed in the XLSX file that have names starting with Co?" \
+#     --file-path data/FSI-2023-DOWNLOAD.xlsx
+
+defaults:
+  - benchmark: example_dataset
+  - override hydra/job_logging: none
+  - _self_
+
+entrypoint: main_agent
+
+main_agent:
+  name: main_agent
+  type: IterativeAgentWithTool
+  max_turns: 30
+
+  llm:
+    _base_: config/llm/base_openai.yaml
+    provider_class: GPT5OpenAIClient
+    model_name: gpt-5
+    max_tokens: 128000
+    reasoning_effort: medium
+
+  prompt: config/prompts/standard_prompt_main_agent.yaml
+
+  tools: 
+    - config/tool/tool-python.yaml
+
+  skills:
+    - src/skill/skills/simple_file_understanding
+
+  input_processor:
+    - ${input-message-generator}
+
+  output_processor:
+    - ${output-summary}
+    - ${output-boxed-extractor}
+
+# Input processor: generates initial message from task description
+input-message-generator:
+  type: InputMessageGenerator
+
+# Output processor: summarizes conversation
+output-summary:
+  type: SummaryGenerator
+
+# Output processor: extracts final answer in a boxed format
+output-boxed-extractor:
+  type: RegexBoxedExtractor
+
+# Output directory for logs
+output_dir: logs
+data_dir: "${oc.env:DATA_DIR,data}"
+
+benchmark:
+  exceed_max_turn_summary: false
diff --git a/scripts/run_single_task.py b/scripts/run_single_task.py
@@ -166,10 +166,7 @@ def main():
 
     # Load configuration
     print(f"Loading configuration from: {args.config_path}")
-    cfg = load_config(args.config_path)
-
-    # Override output directory
-    cfg.output_dir = args.output_dir
+    cfg = load_config(args.config_path, f"output_dir={args.output_dir}")
 
     # Determine which task to run
     task = None
diff --git a/src/skill/manager.py b/src/skill/manager.py
@@ -1,11 +1,13 @@
 from __future__ import annotations
 
+import logging
 import re
-import sys
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple
 
+logger = logging.getLogger(__name__)
+
 
 @dataclass
 class SkillMeta:
@@ -26,7 +28,7 @@ class SkillError(Exception):
 def _parse_frontmatter(md_text: str) -> Tuple[Dict[str, Any], str]:
     m = _FRONTMATTER_RE.match(md_text)
     if not m:
-        raise SkillError("SKILL.md 缺少 frontmatter（必须以 --- 开头并闭合 ---）")
+        raise SkillError("SKILL.md is missing frontmatter (must start and end with ---)")
 
     fm_raw, body = m.group(1), m.group(2)
     meta: Dict[str, Any] = {}
@@ -90,8 +92,8 @@ def __init__(
         allowed_skill_ids: Optional[List[str]] = None,
     ):
         """
-        allow_python_skills: 是否允许加载执行 python skill（建议默认 True 但配合白名单）
-        allowed_skill_ids: 若提供，则只有这些 skill_id 能被执行（强烈建议生产环境启用）
+        allow_python_skills: Whether to allow loading and executing python skills (recommended to keep True but use with whitelist)
+        allowed_skill_ids: If provided, only these skill_ids can be executed (strongly recommended for production)
         """
         self.skill_dirs = skill_dirs
         self.allow_python_skills = allow_python_skills
@@ -102,7 +104,7 @@ def __init__(
     def get_all_skills_definitions(self) -> List[SkillMeta]:
         skills_server_params = []
         index = self.discover()
-        print("index:", index)
+        logger.info("Discovered skills index: %s", index)
         schema = {
             "type": "object",
             "properties": {"subtask": {"title": "Subtask", "type": "string"}},
@@ -128,7 +130,7 @@ def get_all_skills_definitions(self) -> List[SkillMeta]:
 
     def discover(self) -> Dict[str, SkillMeta]:
         """
-        扫描目录，解析每个 SKILL.md 的 frontmatter（只加载元数据，不加载正文/资源）
+        Scan directories and parse the frontmatter of each SKILL.md (loads metadata only, not body/resources)
         """
         index: Dict[str, SkillMeta] = {}
 
@@ -147,7 +149,7 @@ def discover(self) -> Dict[str, SkillMeta]:
                 name = str(fm.get("name", "")).strip()
                 desc = str(fm.get("description", "")).strip()
                 if not name or not desc:
-                    raise SkillError("frontmatter 必须包含 name 和 description")
+                    raise SkillError("frontmatter must contain name and description")
 
                 meta = SkillMeta(
                     skill_id=skill_dir.name,
@@ -158,11 +160,7 @@ def discover(self) -> Dict[str, SkillMeta]:
                 )
                 index[meta.skill_id] = meta
             except Exception as e:
-                # 生产环境建议记录日志，不要直接炸
-                print(
-                    f"[warn] Failed to load skill meta from {skill_md}: {e}",
-                    file=sys.stderr,
-                )
+                logger.warning("Failed to load skill meta from %s: %s", skill_md, e)
 
         self._index = index
         return index
@@ -184,7 +182,7 @@ def load(self, skill_id: str) -> str:
             and meta.skill_id not in self.allowed_skill_ids
         ):
             raise SkillError(
-                f"Skill '{meta.skill_id}' 不在 allowed_skill_ids 白名单内，拒绝加载执行。"
+                f"Skill '{meta.skill_id}' is not in the allowed_skill_ids whitelist, loading denied."
             )
 
         text = meta.skill_md.read_text(encoding="utf-8")
diff --git a/src/skill/skills/simple_file_understanding/SKILL.md b/src/skill/skills/simple_file_understanding/SKILL.md
@@ -0,0 +1,32 @@
+---
+name: simple_file_understanding
+description: Understand and analyze CSV files. Use when the task involves reading, parsing, or answering questions about data in a CSV file.
+---
+
+# simple_file_understanding
+
+## Instructions
+
+When a task involves a CSV file, follow this workflow:
+
+### Step 1: Read the File
+Use the `read_file` tool from the `tool-reading` MCP server to load the file content. Provide the full local file path as the `uri` argument.
+
+### Step 2: Understand the Structure
+After reading the file, identify:
+- **Column headers**: The first row typically contains column names.
+- **Data types**: Determine whether each column contains numbers, text, dates, or mixed types.
+- **Row count**: Note the approximate number of data rows.
+- **Delimiter**: CSV files use commas by default, but the content returned will already be converted to markdown table format.
+
+### Step 3: Answer the Question
+When answering questions about the CSV data:
+- **Filtering**: To find rows matching a condition (e.g., "names starting with Co"), scan the relevant column and apply the filter.
+- **Sorting**: If the question asks for "first", "last", "highest", or "lowest", identify the ordering criterion. Unless otherwise specified, "first" means the first matching row in the file's original order (top to bottom).
+- **Aggregation**: For questions involving counts, sums, averages, or other aggregations, compute them from the relevant column values.
+- **Exact matching**: Pay close attention to exact string matching vs. prefix/substring matching. "Starting with Co" means the value begins with "Co", not just contains "Co".
+
+### Important Notes
+- Always read the file before attempting to answer. Do not guess the content.
+- If the file is large and the markdown output is truncated, focus on the portions relevant to the question.
+- Provide the final answer clearly and concisely, wrapped in \boxed{}.