vamplabAI
diff --git a/‎examples/progressive_discovery/README.md‎
Lines changed: 51 additions & 0 deletions b/‎examples/progressive_discovery/README.md‎
Lines changed: 51 additions & 0 deletions
diff --git a/‎examples/progressive_discovery/__init__.py‎ b/‎examples/progressive_discovery/__init__.py‎
diff --git a/‎examples/progressive_discovery/config.yaml.example‎
Lines changed: 53 additions & 0 deletions b/‎examples/progressive_discovery/config.yaml.example‎
Lines changed: 53 additions & 0 deletions
diff --git a/‎examples/progressive_discovery/progressive_discovery_agent.py‎
Lines changed: 80 additions & 0 deletions b/‎examples/progressive_discovery/progressive_discovery_agent.py‎
Lines changed: 80 additions & 0 deletions
diff --git a/‎examples/progressive_discovery/services/__init__.py‎ b/‎examples/progressive_discovery/services/__init__.py‎
diff --git a/‎examples/progressive_discovery/services/tool_filter_service.py‎
Lines changed: 84 additions & 0 deletions b/‎examples/progressive_discovery/services/tool_filter_service.py‎
Lines changed: 84 additions & 0 deletions
diff --git a/‎examples/progressive_discovery/tools/__init__.py‎ b/‎examples/progressive_discovery/tools/__init__.py‎
diff --git a/‎examples/progressive_discovery/tools/search_tools_tool.py‎
Lines changed: 53 additions & 0 deletions b/‎examples/progressive_discovery/tools/search_tools_tool.py‎
Lines changed: 53 additions & 0 deletions
@@ -0,0 +1,51 @@
+# Progressive Tool Discovery
+
+Example agent demonstrating dynamic tool discovery for SGR Agent Core.
+
+## Problem
+
+When using multiple MCP servers (Jira, Confluence, GitHub, GDrive), each adds dozens of tools. With ~60 tools the LLM context becomes bloated — local models can't handle it, and paid APIs waste tokens on irrelevant tool descriptions.
+
+## Solution
+
+The agent starts with a minimal set of **system tools** (reasoning, planning, clarification, final answer) and dynamically discovers additional tools via `SearchToolsTool`.
+
+```
+User query → Agent reasons → needs web search → calls SearchToolsTool("search the web")
+→ WebSearchTool discovered and added to active toolkit → Agent uses WebSearchTool
+```
+
+### How it works
+
+1. **Init**: Toolkit is split into system tools (`isSystemTool=True`) and discoverable tools
+2. **Runtime**: Only system tools + already discovered tools are sent to LLM
+3. **Discovery**: Agent calls `SearchToolsTool` with a natural language query
+4. **Matching**: `ToolFilterService` uses BM25 ranking + regex keyword overlap to find relevant tools
+5. **Activation**: Matched tools are added to the active toolkit for subsequent calls
+
+### Key components
+
+| Component                   | Description                                                   |
+| --------------------------- | ------------------------------------------------------------- |
+| `ProgressiveDiscoveryAgent` | Agent subclass that manages system/discovered tool split      |
+| `SearchToolsTool`           | Meta-tool for discovering new tools by capability description |
+| `ToolFilterService`         | Stateless BM25 + regex matching service                       |
+
+## Usage
+
+```bash
+cp config.yaml.example config.yaml
+# Edit config.yaml with your API key and MCP servers
+sgr --config-file config.yaml
+```
+
+## Architecture
+
+```
+ProgressiveDiscoveryAgent
+├── self.toolkit = [ReasoningTool, SearchToolsTool, ...]  (system tools)
+├── context.custom_context["all_tools"] = [WebSearchTool, ...]  (discoverable)
+└── context.custom_context["discovered_tools"] = []  (accumulates at runtime)
+```
+
+`_get_active_tools()` returns `system_tools + discovered_tools` — used by both `_prepare_tools()` and `_prepare_context()`.
@@ -0,0 +1,53 @@
+# Progressive Discovery Agent Configuration
+#
+# This agent starts with minimal system tools and dynamically discovers
+# additional tools as needed via SearchToolsTool (BM25 + regex matching).
+#
+# Useful when you have many MCP servers with dozens of tools — keeps
+# the LLM context small and focused.
+
+llm:
+  model: "gpt-4o"
+  base_url: "https://api.openai.com/v1"
+  api_key: "sk-..."
+  temperature: 0.1
+  max_tokens: 16000
+
+execution:
+  max_iterations: 15
+  max_clarifications: 2
+
+prompts:
+  system_prompt_path: "examples/progressive_discovery/prompts/system_prompt.txt"
+
+# MCP servers provide additional tools that will be discoverable
+# (not loaded into context until agent searches for them)
+#
+# mcp:
+#   servers:
+#     - name: "jira"
+#       command: "npx"
+#       args: ["-y", "@anthropic/jira-mcp-server"]
+#       env:
+#         JIRA_URL: "https://your-org.atlassian.net"
+#         JIRA_TOKEN: "your-token"
+#
+#     - name: "github"
+#       command: "npx"
+#       args: ["-y", "@anthropic/github-mcp-server"]
+#       env:
+#         GITHUB_TOKEN: "your-token"
+
+agents:
+  progressive_discovery:
+    base_class: "examples.progressive_discovery.progressive_discovery_agent.ProgressiveDiscoveryAgent"
+    tools:
+      - "sgr_agent_core.tools.reasoning_tool.ReasoningTool"
+      - "sgr_agent_core.tools.clarification_tool.ClarificationTool"
+      - "sgr_agent_core.tools.generate_plan_tool.GeneratePlanTool"
+      - "sgr_agent_core.tools.adapt_plan_tool.AdaptPlanTool"
+      - "sgr_agent_core.tools.create_report_tool.CreateReportTool"
+      - "sgr_agent_core.tools.final_answer_tool.FinalAnswerTool"
+      # Add any non-system tools here — they will be discoverable, not loaded by default
+      # - "sgr_agent_core.tools.web_search_tool.WebSearchTool"
+      # - "sgr_agent_core.tools.extract_page_content_tool.ExtractPageContentTool"
@@ -0,0 +1,80 @@
+from __future__ import annotations
+
+from typing import Type
+
+from openai import AsyncOpenAI, pydantic_function_tool
+
+from sgr_agent_core.agent_definition import AgentConfig
+from sgr_agent_core.agents.sgr_tool_calling_agent import SGRToolCallingAgent
+from sgr_agent_core.base_tool import BaseTool
+from sgr_agent_core.services.prompt_loader import PromptLoader
+
+from .tools.search_tools_tool import SearchToolsTool
+
+
+class ProgressiveDiscoveryAgent(SGRToolCallingAgent):
+    """Agent that starts with minimal system tools and dynamically discovers
+    additional tools via SearchToolsTool.
+
+    On init, splits the toolkit into:
+    - system tools (isSystemTool=True) -> self.toolkit (always available)
+    - non-system tools -> stored in context.custom_context["all_tools"]
+
+    SearchToolsTool is automatically added if not already present.
+    Discovered tools accumulate in context.custom_context["discovered_tools"].
+    """
+
+    name: str = "progressive_discovery_agent"
+
+    def __init__(
+        self,
+        task_messages: list,
+        openai_client: AsyncOpenAI,
+        agent_config: AgentConfig,
+        toolkit: list[Type[BaseTool]],
+        def_name: str | None = None,
+        **kwargs: dict,
+    ):
+        system_tools = [t for t in toolkit if getattr(t, "isSystemTool", False)]
+        non_system_tools = [t for t in toolkit if not getattr(t, "isSystemTool", False)]
+
+        if SearchToolsTool not in system_tools:
+            system_tools.append(SearchToolsTool)
+
+        super().__init__(
+            task_messages=task_messages,
+            openai_client=openai_client,
+            agent_config=agent_config,
+            toolkit=system_tools,
+            def_name=def_name,
+            **kwargs,
+        )
+
+        if self._context.custom_context is None:
+            self._context.custom_context = {}
+        self._context.custom_context["all_tools"] = non_system_tools
+        self._context.custom_context["discovered_tools"] = []
+
+    def _get_active_tools(self) -> list[Type[BaseTool]]:
+        """Return system tools + discovered tools."""
+        discovered = []
+        if isinstance(self._context.custom_context, dict):
+            discovered = self._context.custom_context.get("discovered_tools", [])
+        return list(self.toolkit) + list(discovered)
+
+    async def _prepare_tools(self) -> list[dict]:
+        """Override to return only active tools (system + discovered)."""
+        active_tools = self._get_active_tools()
+        if self._context.iteration >= self.config.execution.max_iterations:
+            raise RuntimeError("Max iterations reached")
+        return [pydantic_function_tool(tool, name=tool.tool_name) for tool in active_tools]
+
+    async def _prepare_context(self) -> list[dict]:
+        """Override to pass only active tools to system prompt."""
+        active_tools = self._get_active_tools()
+        return [
+            {"role": "system", "content": PromptLoader.get_system_prompt(active_tools, self.config.prompts)},
+            *self.task_messages,
+            {"role": "user", "content": PromptLoader.get_initial_user_request(self.task_messages, self.config.prompts)},
+            *self.conversation,
+        ]
@@ -0,0 +1,84 @@
+from __future__ import annotations
+
+import re
+from typing import TYPE_CHECKING
+
+from rank_bm25 import BM25Okapi
+
+if TYPE_CHECKING:
+    from sgr_agent_core.base_tool import BaseTool
+
+
+class ToolFilterService:
+    """Stateless service for filtering tools by relevance to a query.
+
+    Uses BM25 ranking + regex keyword overlap to find tools matching a
+    query.
+    """
+
+    @classmethod
+    def filter_tools(
+        cls,
+        query: str,
+        tools: list[type[BaseTool]],
+        bm25_threshold: float = 0.1,
+    ) -> list[type[BaseTool]]:
+        """Filter tools by relevance to query using BM25 + regex.
+
+        Args:
+            query: Natural language description of needed capability.
+            tools: Full list of available tool classes.
+            bm25_threshold: Minimum BM25 score to consider a tool relevant.
+
+        Returns:
+            List of tool classes matching the query.
+        """
+        if not query or not query.strip() or not tools:
+            return list(tools)
+
+        query_lower = query.strip().lower()
+
+        tool_documents = []
+        for tool in tools:
+            tool_name = (tool.tool_name or tool.__name__).lower()
+            tool_description = (tool.description or "").lower()
+            tool_documents.append(f"{tool_name} {tool_description}")
+
+        tokenized_docs = [doc.split() for doc in tool_documents]
+        bm25 = BM25Okapi(tokenized_docs)
+
+        query_tokens = query_lower.split()
+        scores = bm25.get_scores(query_tokens)
+
+        query_words = set(re.findall(r"\b\w+\b", query_lower))
+
+        filtered = []
+        for i, tool in enumerate(tools):
+            bm25_score = scores[i]
+
+            tool_name = (tool.tool_name or tool.__name__).lower()
+            tool_description = (tool.description or "").lower()
+            tool_words = set(re.findall(r"\b\w+\b", f"{tool_name} {tool_description}"))
+            has_regex_match = bool(query_words & tool_words)
+
+            if bm25_score > bm25_threshold or has_regex_match:
+                filtered.append(tool)
+
+        return filtered
+
+    @classmethod
+    def get_tool_summaries(cls, tools: list[type[BaseTool]]) -> str:
+        """Format tool list for LLM output.
+
+        Args:
+            tools: List of tool classes to summarize.
+
+        Returns:
+            Formatted string with tool names and descriptions.
+        """
+        lines = []
+        for i, tool in enumerate(tools, start=1):
+            name = tool.tool_name or tool.__name__
+            desc = tool.description or ""
+            lines.append(f"{i}. {name}: {desc}")
+        return "\n".join(lines)
@@ -0,0 +1,53 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from pydantic import Field
+
+from sgr_agent_core.base_tool import BaseTool
+
+from ..services.tool_filter_service import ToolFilterService
+
+if TYPE_CHECKING:
+    from sgr_agent_core.agent_definition import AgentConfig
+    from sgr_agent_core.models import AgentContext
+
+
+class SearchToolsTool(BaseTool):
+    """Search for available tools by capability description.
+
+    Use this tool when you need a capability that is not in your current
+    toolkit. Describe what you need in natural language and matching
+    tools will be added to your active toolkit for subsequent use.
+    """
+
+    isSystemTool = True
+
+    query: str = Field(description="Natural language description of the capability you need (e.g. 'search the web')")
+
+    async def __call__(self, context: AgentContext, config: AgentConfig, **kwargs) -> str:
+        custom = context.custom_context
+        if not isinstance(custom, dict):
+            return "Error: custom_context is not initialized as dict"
+
+        all_tools = custom.get("all_tools", [])
+        if not all_tools:
+            return "No additional tools available for discovery."
+
+        discovered = custom.setdefault("discovered_tools", [])
+
+        matched = ToolFilterService.filter_tools(self.query, all_tools)
+
+        already_discovered_names = {t.tool_name for t in discovered}
+        new_tools = [t for t in matched if t.tool_name not in already_discovered_names]
+
+        if not new_tools:
+            return f"No new tools found for query '{self.query}'. Already discovered: {already_discovered_names}"
+
+        discovered.extend(new_tools)
+
+        summary = ToolFilterService.get_tool_summaries(new_tools)
+        return (
+            f"Found {len(new_tools)} new tool(s) for '{self.query}':\n{summary}\n\n"
+            "These tools are now available in your toolkit. You can use them in subsequent steps."
+        )