vamplabAI
diff --git a/‎examples/progressive_discovery/README.md‎
Lines changed: 53 additions & 0 deletions b/‎examples/progressive_discovery/README.md‎
Lines changed: 53 additions & 0 deletions
diff --git a/‎examples/progressive_discovery/__init__.py‎ b/‎examples/progressive_discovery/__init__.py‎
diff --git a/‎examples/progressive_discovery/config.yaml.example‎
Lines changed: 42 additions & 0 deletions b/‎examples/progressive_discovery/config.yaml.example‎
Lines changed: 42 additions & 0 deletions
diff --git a/‎examples/progressive_discovery/models.py‎
Lines changed: 19 additions & 0 deletions b/‎examples/progressive_discovery/models.py‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎examples/progressive_discovery/progressive_discovery_agent.py‎
Lines changed: 77 additions & 0 deletions b/‎examples/progressive_discovery/progressive_discovery_agent.py‎
Lines changed: 77 additions & 0 deletions
diff --git a/‎examples/progressive_discovery/services/__init__.py‎ b/‎examples/progressive_discovery/services/__init__.py‎
diff --git a/‎examples/progressive_discovery/services/tool_filter_service.py‎
Lines changed: 82 additions & 0 deletions b/‎examples/progressive_discovery/services/tool_filter_service.py‎
Lines changed: 82 additions & 0 deletions
diff --git a/‎examples/progressive_discovery/tools/__init__.py‎ b/‎examples/progressive_discovery/tools/__init__.py‎
diff --git a/‎examples/progressive_discovery/tools/search_tools_tool.py‎
Lines changed: 48 additions & 0 deletions b/‎examples/progressive_discovery/tools/search_tools_tool.py‎
Lines changed: 48 additions & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 2 additions & 0 deletions
@@ -0,0 +1,53 @@
+# Progressive Tool Discovery
+
+Example agent demonstrating dynamic tool discovery for SGR Agent Core.
+
+## Problem
+
+When using multiple MCP servers (Jira, Confluence, GitHub, GDrive), each adds dozens of tools. With ~60 tools the LLM context becomes bloated — local models can't handle it, and paid APIs waste tokens on irrelevant tool descriptions.
+
+## Solution
+
+The agent starts with a minimal set of **system tools** (reasoning, planning, clarification, final answer) and dynamically discovers additional tools via `SearchToolsTool`.
+
+```
+User query → Agent reasons → needs web search → calls SearchToolsTool("search the web")
+→ WebSearchTool discovered and added to active toolkit → Agent uses WebSearchTool
+```
+
+### How it works
+
+1. **Init**: Toolkit is split into system tools (subclasses of `SystemBaseTool`) and discoverable tools
+2. **Runtime**: Only system tools + already discovered tools are sent to LLM
+3. **Discovery**: Agent calls `SearchToolsTool` with a natural language query
+4. **Matching**: `ToolFilterService` uses BM25 ranking + regex keyword overlap to find relevant tools
+5. **Activation**: Matched tools are added to the active toolkit for subsequent calls
+
+### Key components
+
+| Component                   | Description                                                   |
+| --------------------------- | ------------------------------------------------------------- |
+| `ProgressiveDiscoveryAgent` | Agent subclass that manages system/discovered tool split      |
+| `SearchToolsTool`           | Meta-tool for discovering new tools by capability description |
+| `ToolFilterService`         | Stateless BM25 + regex matching service                       |
+
+## Usage
+
+```bash
+cp config.yaml.example config.yaml
+# Edit config.yaml with your API key and MCP servers
+sgr --config-file config.yaml
+```
+
+## Architecture
+
+```
+ProgressiveDiscoveryAgent
+├── self.toolkit = [ReasoningTool, SearchToolsTool, ...]  (system tools)
+├── context.all_tools = [WebSearchTool, ...]  (discoverable)
+└── context.discovered_tools = []  (accumulates at runtime)
+```
+
+`context` is a `ProgressiveDiscoveryContext(AgentContext)` — extends the base context with discovery-specific fields.
+
+`_get_active_tools()` returns `system_tools + discovered_tools` — used by both `_prepare_tools()` and `_prepare_context()`.
@@ -0,0 +1,42 @@
+# Progressive Discovery Agent Configuration
+#
+# This agent starts with minimal system tools and dynamically discovers
+# additional tools as needed via SearchToolsTool (BM25 + regex matching).
+#
+# Useful when you have many MCP servers with dozens of tools — keeps
+# the LLM context small and focused.
+
+llm:
+  model: "gpt-4o"
+  base_url: "https://api.openai.com/v1"
+  api_key: "sk-..."
+  temperature: 0.1
+  max_tokens: 16000
+
+execution:
+  max_iterations: 15
+  max_clarifications: 2
+
+# MCP servers provide additional tools that will be discoverable
+# (not loaded into context until agent searches for them)
+#
+# mcp:
+#   mcpServers:
+#     jira:
+#       url: "https://your-jira-mcp-server.com/mcp"
+#     github:
+#       url: "https://your-github-mcp-server.com/mcp"
+
+agents:
+  progressive_discovery:
+    base_class: "examples.progressive_discovery.progressive_discovery_agent.ProgressiveDiscoveryAgent"
+    tools:
+      - "reasoning_tool"
+      - "clarification_tool"
+      - "generate_plan_tool"
+      - "adapt_plan_tool"
+      - "create_report_tool"
+      - "final_answer_tool"
+      # Non-system tools — discoverable via SearchToolsTool
+      - "web_search_tool"
+      - "extract_page_content_tool"
@@ -0,0 +1,19 @@
+from pydantic import Field
+
+from sgr_agent_core.base_tool import BaseTool
+from sgr_agent_core.models import AgentContext
+
+
+class ProgressiveDiscoveryContext(AgentContext):
+    """Extended agent context for progressive discovery.
+
+    Inherits all standard AgentContext fields (iteration, state,
+    searches, etc.) and adds tool lists used by the discovery mechanism.
+    """
+
+    all_tools: list[type[BaseTool]] = Field(
+        default_factory=list, description="Full list of non-system tools available for discovery"
+    )
+    discovered_tools: list[type[BaseTool]] = Field(
+        default_factory=list, description="Tools discovered so far via SearchToolsTool"
+    )
@@ -0,0 +1,77 @@
+from __future__ import annotations
+
+from typing import Type
+
+from openai import AsyncOpenAI, pydantic_function_tool
+
+from sgr_agent_core.agent_definition import AgentConfig
+from sgr_agent_core.agents.sgr_tool_calling_agent import SGRToolCallingAgent
+from sgr_agent_core.base_tool import BaseTool, SystemBaseTool
+from sgr_agent_core.services.prompt_loader import PromptLoader
+
+from .models import ProgressiveDiscoveryContext
+from .tools.search_tools_tool import SearchToolsTool
+
+
+class ProgressiveDiscoveryAgent(SGRToolCallingAgent):
+    """Agent that starts with minimal system tools and dynamically discovers
+    additional tools via SearchToolsTool.
+
+    On init, splits the toolkit into:
+    - system tools (subclasses of SystemBaseTool) -> self.toolkit (always available)
+    - non-system tools -> stored in context.all_tools
+
+    SearchToolsTool is automatically added if not already present.
+    Discovered tools accumulate in context.discovered_tools.
+    """
+
+    name: str = "progressive_discovery_agent"
+
+    def __init__(
+        self,
+        task_messages: list,
+        openai_client: AsyncOpenAI,
+        agent_config: AgentConfig,
+        toolkit: list[Type[BaseTool]],
+        def_name: str | None = None,
+        **kwargs: dict,
+    ):
+        system_tools = [t for t in toolkit if issubclass(t, SystemBaseTool)]
+        non_system_tools = [t for t in toolkit if not issubclass(t, SystemBaseTool)]
+
+        if SearchToolsTool not in system_tools:
+            system_tools.append(SearchToolsTool)
+
+        super().__init__(
+            task_messages=task_messages,
+            openai_client=openai_client,
+            agent_config=agent_config,
+            toolkit=system_tools,
+            def_name=def_name,
+            **kwargs,
+        )
+
+        self._context = ProgressiveDiscoveryContext(
+            all_tools=non_system_tools,
+        )
+
+    def _get_active_tools(self) -> list[Type[BaseTool]]:
+        """Return system tools + discovered tools."""
+        return list(self.toolkit) + list(self._context.discovered_tools)
+
+    async def _prepare_tools(self) -> list[dict]:
+        """Override to return only active tools (system + discovered)."""
+        active_tools = self._get_active_tools()
+        if self._context.iteration >= self.config.execution.max_iterations:
+            raise RuntimeError("Max iterations reached")
+        return [pydantic_function_tool(tool, name=tool.tool_name) for tool in active_tools]
+
+    async def _prepare_context(self) -> list[dict]:
+        """Override to pass only active tools to system prompt."""
+        active_tools = self._get_active_tools()
+        return [
+            {"role": "system", "content": PromptLoader.get_system_prompt(active_tools, self.config.prompts)},
+            *self.task_messages,
+            {"role": "user", "content": PromptLoader.get_initial_user_request(self.task_messages, self.config.prompts)},
+            *self.conversation,
+        ]
@@ -0,0 +1,82 @@
+from __future__ import annotations
+
+import re
+from typing import TYPE_CHECKING
+
+from rank_bm25 import BM25Okapi
+
+if TYPE_CHECKING:
+    from sgr_agent_core.base_tool import BaseTool
+
+
+class ToolFilterService:
+    """Stateless service for filtering tools by relevance to a query.
+
+    Uses BM25 ranking + regex keyword overlap to find tools matching a
+    query.
+    """
+
+    @classmethod
+    def filter_tools(
+        cls,
+        query: str,
+        tools: list[type[BaseTool]],
+        bm25_threshold: float = 0.1,
+    ) -> list[type[BaseTool]]:
+        """Filter tools by relevance to query using BM25 + regex.
+
+        Args:
+            query: Natural language description of needed capability.
+            tools: Full list of available tool classes.
+            bm25_threshold: Minimum BM25 score to consider a tool relevant.
+
+        Returns:
+            List of tool classes matching the query.
+        """
+        if not query or not query.strip() or not tools:
+            return list(tools)
+
+        query_lower = query.strip().lower()
+
+        tool_documents = []
+        for tool in tools:
+            tool_name = (tool.tool_name or tool.__name__).lower()
+            tool_description = (tool.description or "").lower()
+            tool_documents.append(f"{tool_name} {tool_description}")
+
+        tokenized_docs = [doc.split() for doc in tool_documents]
+        bm25 = BM25Okapi(tokenized_docs)
+
+        query_tokens = query_lower.split()
+        scores = bm25.get_scores(query_tokens)
+
+        query_words = set(re.findall(r"\b\w+\b", query_lower))
+
+        filtered = []
+        for i, tool in enumerate(tools):
+            bm25_score = scores[i]
+
+            tool_words = set(re.findall(r"\b\w+\b", tool_documents[i]))
+            has_regex_match = bool(query_words & tool_words)
+
+            if bm25_score > bm25_threshold or has_regex_match:
+                filtered.append(tool)
+
+        return filtered
+
+    @classmethod
+    def get_tool_summaries(cls, tools: list[type[BaseTool]]) -> str:
+        """Format tool list for LLM output.
+
+        Args:
+            tools: List of tool classes to summarize.
+
+        Returns:
+            Formatted string with tool names and descriptions.
+        """
+        lines = []
+        for i, tool in enumerate(tools, start=1):
+            name = tool.tool_name or tool.__name__
+            desc = tool.description or ""
+            lines.append(f"{i}. {name}: {desc}")
+        return "\n".join(lines)
@@ -0,0 +1,48 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from pydantic import Field
+
+from sgr_agent_core.base_tool import SystemBaseTool
+
+from ..models import ProgressiveDiscoveryContext
+from ..services.tool_filter_service import ToolFilterService
+
+if TYPE_CHECKING:
+    from sgr_agent_core.agent_definition import AgentConfig
+    from sgr_agent_core.models import AgentContext
+
+
+class SearchToolsTool(SystemBaseTool):
+    """Search for available tools by capability description.
+
+    Use this tool when you need a capability that is not in your current
+    toolkit. Describe what you need in natural language and matching
+    tools will be added to your active toolkit for subsequent use.
+    """
+
+    query: str = Field(description="Natural language description of the capability you need (e.g. 'search the web')")
+
+    async def __call__(self, context: AgentContext, config: AgentConfig, **kwargs) -> str:
+        if not isinstance(context, ProgressiveDiscoveryContext):
+            return "Error: context is not initialized as ProgressiveDiscoveryContext"
+
+        if not context.all_tools:
+            return "No additional tools available for discovery."
+
+        matched = ToolFilterService.filter_tools(self.query, context.all_tools)
+
+        already_discovered_names = {t.tool_name for t in context.discovered_tools}
+        new_tools = [t for t in matched if t.tool_name not in already_discovered_names]
+
+        if not new_tools:
+            return f"No new tools found for query '{self.query}'. Already discovered: {already_discovered_names}"
+
+        context.discovered_tools.extend(new_tools)
+
+        summary = ToolFilterService.get_tool_summaries(new_tools)
+        return (
+            f"Found {len(new_tools)} new tool(s) for '{self.query}':\n{summary}\n\n"
+            "These tools are now available in your toolkit. You can use them in subsequent steps."
+        )
@@ -46,6 +46,8 @@ dependencies = [
     "uvicorn>=0.35.0",
     "fastmcp>=2.12.4",
     "jambo>=0.1.3.post2",
+    # Tools filtering
+    "rank-bm25>=0.2.2",
 ]
 
 [project.urls]
Original file line number	Diff line number	Diff line change
`@@ -46,6 +46,8 @@ dependencies = [`
`46`	`46`	`"uvicorn>=0.35.0",`
`47`	`47`	`"fastmcp>=2.12.4",`
`48`	`48`	`"jambo>=0.1.3.post2",`
	`49`	`+ # Tools filtering`
	`50`	`+ "rank-bm25>=0.2.2",`
`49`	`51`	`]`
`50`	`52`
`51`	`53`	`[project.urls]`