Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions examples/progressive_discovery/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Progressive Tool Discovery

Example agent demonstrating dynamic tool discovery for SGR Agent Core.

## Problem

When using multiple MCP servers (Jira, Confluence, GitHub, GDrive), each adds dozens of tools. With ~60 tools the LLM context becomes bloated — local models can't handle it, and paid APIs waste tokens on irrelevant tool descriptions.

## Solution

The agent starts with a minimal set of **system tools** (reasoning, planning, clarification, final answer) and dynamically discovers additional tools via `SearchToolsTool`.

```
User query → Agent reasons → needs web search → calls SearchToolsTool("search the web")
→ WebSearchTool discovered and added to active toolkit → Agent uses WebSearchTool
```

### How it works

1. **Init**: Toolkit is split into system tools (subclasses of `SystemBaseTool`) and discoverable tools
2. **Runtime**: Only system tools + already discovered tools are sent to LLM
3. **Discovery**: Agent calls `SearchToolsTool` with a natural language query
4. **Matching**: `ToolFilterService` uses BM25 ranking + regex keyword overlap to find relevant tools
5. **Activation**: Matched tools are added to the active toolkit for subsequent calls

### Key components

| Component | Description |
| --------------------------- | ------------------------------------------------------------- |
| `ProgressiveDiscoveryAgent` | Agent subclass that manages system/discovered tool split |
| `SearchToolsTool` | Meta-tool for discovering new tools by capability description |
| `ToolFilterService` | Stateless BM25 + regex matching service |

## Usage

```bash
cp config.yaml.example config.yaml
# Edit config.yaml with your API key and MCP servers
sgr --config-file config.yaml
```

## Architecture

```
ProgressiveDiscoveryAgent
├── self.toolkit = [ReasoningTool, SearchToolsTool, ...] (system tools)
├── context.all_tools = [WebSearchTool, ...] (discoverable)
└── context.discovered_tools = [] (accumulates at runtime)
```

`context` is a `ProgressiveDiscoveryContext(AgentContext)` — extends the base context with discovery-specific fields.

`_get_active_tools()` returns `system_tools + discovered_tools` — used by both `_prepare_tools()` and `_prepare_context()`.
Empty file.
42 changes: 42 additions & 0 deletions examples/progressive_discovery/config.yaml.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Progressive Discovery Agent Configuration
#
# This agent starts with minimal system tools and dynamically discovers
# additional tools as needed via SearchToolsTool (BM25 + regex matching).
#
# Useful when you have many MCP servers with dozens of tools — keeps
# the LLM context small and focused.

llm:
model: "gpt-4o"
base_url: "https://api.openai.com/v1"
api_key: "sk-..."
temperature: 0.1
max_tokens: 16000

execution:
max_iterations: 15
max_clarifications: 2

# MCP servers provide additional tools that will be discoverable
# (not loaded into context until agent searches for them)
#
# mcp:
# mcpServers:
# jira:
# url: "https://your-jira-mcp-server.com/mcp"
# github:
# url: "https://your-github-mcp-server.com/mcp"

agents:
progressive_discovery:
base_class: "examples.progressive_discovery.progressive_discovery_agent.ProgressiveDiscoveryAgent"
tools:
- "reasoning_tool"
- "clarification_tool"
- "generate_plan_tool"
- "adapt_plan_tool"
- "create_report_tool"
- "final_answer_tool"
# Non-system tools — discoverable via SearchToolsTool
- "web_search_tool"
- "extract_page_content_tool"
19 changes: 19 additions & 0 deletions examples/progressive_discovery/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from pydantic import Field

from sgr_agent_core.base_tool import BaseTool
from sgr_agent_core.models import AgentContext


class ProgressiveDiscoveryContext(AgentContext):
"""Extended agent context for progressive discovery.

Inherits all standard AgentContext fields (iteration, state,
searches, etc.) and adds tool lists used by the discovery mechanism.
"""

all_tools: list[type[BaseTool]] = Field(
default_factory=list, description="Full list of non-system tools available for discovery"
)
discovered_tools: list[type[BaseTool]] = Field(
default_factory=list, description="Tools discovered so far via SearchToolsTool"
)
77 changes: 77 additions & 0 deletions examples/progressive_discovery/progressive_discovery_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from __future__ import annotations

from typing import Type

from openai import AsyncOpenAI, pydantic_function_tool

from sgr_agent_core.agent_definition import AgentConfig
from sgr_agent_core.agents.sgr_tool_calling_agent import SGRToolCallingAgent
from sgr_agent_core.base_tool import BaseTool, SystemBaseTool
from sgr_agent_core.services.prompt_loader import PromptLoader

from .models import ProgressiveDiscoveryContext
from .tools.search_tools_tool import SearchToolsTool


class ProgressiveDiscoveryAgent(SGRToolCallingAgent):
"""Agent that starts with minimal system tools and dynamically discovers
additional tools via SearchToolsTool.

On init, splits the toolkit into:
- system tools (subclasses of SystemBaseTool) -> self.toolkit (always available)
- non-system tools -> stored in context.all_tools

SearchToolsTool is automatically added if not already present.
Discovered tools accumulate in context.discovered_tools.
"""

name: str = "progressive_discovery_agent"

def __init__(
self,
task_messages: list,
openai_client: AsyncOpenAI,
agent_config: AgentConfig,
toolkit: list[Type[BaseTool]],
def_name: str | None = None,
**kwargs: dict,
):
system_tools = [t for t in toolkit if issubclass(t, SystemBaseTool)]
non_system_tools = [t for t in toolkit if not issubclass(t, SystemBaseTool)]

if SearchToolsTool not in system_tools:
system_tools.append(SearchToolsTool)

super().__init__(
task_messages=task_messages,
openai_client=openai_client,
agent_config=agent_config,
toolkit=system_tools,
def_name=def_name,
**kwargs,
)

self._context = ProgressiveDiscoveryContext(
all_tools=non_system_tools,
)

def _get_active_tools(self) -> list[Type[BaseTool]]:
"""Return system tools + discovered tools."""
return list(self.toolkit) + list(self._context.discovered_tools)

async def _prepare_tools(self) -> list[dict]:
"""Override to return only active tools (system + discovered)."""
active_tools = self._get_active_tools()
if self._context.iteration >= self.config.execution.max_iterations:
raise RuntimeError("Max iterations reached")
return [pydantic_function_tool(tool, name=tool.tool_name) for tool in active_tools]

async def _prepare_context(self) -> list[dict]:
"""Override to pass only active tools to system prompt."""
active_tools = self._get_active_tools()
return [
{"role": "system", "content": PromptLoader.get_system_prompt(active_tools, self.config.prompts)},
*self.task_messages,
{"role": "user", "content": PromptLoader.get_initial_user_request(self.task_messages, self.config.prompts)},
*self.conversation,
]
Empty file.
82 changes: 82 additions & 0 deletions examples/progressive_discovery/services/tool_filter_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
from __future__ import annotations

import re
from typing import TYPE_CHECKING

from rank_bm25 import BM25Okapi

if TYPE_CHECKING:
from sgr_agent_core.base_tool import BaseTool


class ToolFilterService:
"""Stateless service for filtering tools by relevance to a query.

Uses BM25 ranking + regex keyword overlap to find tools matching a
query.
"""

@classmethod
def filter_tools(
cls,
query: str,
tools: list[type[BaseTool]],
bm25_threshold: float = 0.1,
) -> list[type[BaseTool]]:
"""Filter tools by relevance to query using BM25 + regex.

Args:
query: Natural language description of needed capability.
tools: Full list of available tool classes.
bm25_threshold: Minimum BM25 score to consider a tool relevant.

Returns:
List of tool classes matching the query.
"""
if not query or not query.strip() or not tools:
return list(tools)

query_lower = query.strip().lower()

tool_documents = []
for tool in tools:
tool_name = (tool.tool_name or tool.__name__).lower()
tool_description = (tool.description or "").lower()
tool_documents.append(f"{tool_name} {tool_description}")

tokenized_docs = [doc.split() for doc in tool_documents]
bm25 = BM25Okapi(tokenized_docs)

query_tokens = query_lower.split()
scores = bm25.get_scores(query_tokens)

query_words = set(re.findall(r"\b\w+\b", query_lower))

filtered = []
for i, tool in enumerate(tools):
bm25_score = scores[i]

tool_words = set(re.findall(r"\b\w+\b", tool_documents[i]))
has_regex_match = bool(query_words & tool_words)

if bm25_score > bm25_threshold or has_regex_match:
filtered.append(tool)

return filtered

@classmethod
def get_tool_summaries(cls, tools: list[type[BaseTool]]) -> str:
"""Format tool list for LLM output.

Args:
tools: List of tool classes to summarize.

Returns:
Formatted string with tool names and descriptions.
"""
lines = []
for i, tool in enumerate(tools, start=1):
name = tool.tool_name or tool.__name__
desc = tool.description or ""
lines.append(f"{i}. {name}: {desc}")
return "\n".join(lines)
Empty file.
48 changes: 48 additions & 0 deletions examples/progressive_discovery/tools/search_tools_tool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from __future__ import annotations

from typing import TYPE_CHECKING

from pydantic import Field

from sgr_agent_core.base_tool import SystemBaseTool

from ..models import ProgressiveDiscoveryContext
from ..services.tool_filter_service import ToolFilterService

if TYPE_CHECKING:
from sgr_agent_core.agent_definition import AgentConfig
from sgr_agent_core.models import AgentContext


class SearchToolsTool(SystemBaseTool):
"""Search for available tools by capability description.

Use this tool when you need a capability that is not in your current
toolkit. Describe what you need in natural language and matching
tools will be added to your active toolkit for subsequent use.
"""

query: str = Field(description="Natural language description of the capability you need (e.g. 'search the web')")

async def __call__(self, context: AgentContext, config: AgentConfig, **kwargs) -> str:
if not isinstance(context, ProgressiveDiscoveryContext):
return "Error: context is not initialized as ProgressiveDiscoveryContext"

if not context.all_tools:
return "No additional tools available for discovery."

matched = ToolFilterService.filter_tools(self.query, context.all_tools)

already_discovered_names = {t.tool_name for t in context.discovered_tools}
new_tools = [t for t in matched if t.tool_name not in already_discovered_names]

if not new_tools:
return f"No new tools found for query '{self.query}'. Already discovered: {already_discovered_names}"

context.discovered_tools.extend(new_tools)

summary = ToolFilterService.get_tool_summaries(new_tools)
return (
f"Found {len(new_tools)} new tool(s) for '{self.query}':\n{summary}\n\n"
"These tools are now available in your toolkit. You can use them in subsequent steps."
)
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ dependencies = [
"uvicorn>=0.35.0",
"fastmcp>=2.12.4",
"jambo>=0.1.3.post2",
# Tools filtering
"rank-bm25>=0.2.2",
]

[project.urls]
Expand Down
3 changes: 2 additions & 1 deletion sgr_agent_core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from sgr_agent_core.agent_factory import AgentFactory
from sgr_agent_core.agents import * # noqa: F403
from sgr_agent_core.base_agent import BaseAgent
from sgr_agent_core.base_tool import BaseTool, MCPBaseTool
from sgr_agent_core.base_tool import BaseTool, MCPBaseTool, SystemBaseTool
from sgr_agent_core.models import (
AgentContext,
AgentStatesEnum,
Expand All @@ -40,6 +40,7 @@
# Base classes
"BaseAgent",
"BaseTool",
"SystemBaseTool",
"MCPBaseTool",
# Models
"AgentStatesEnum",
Expand Down
4 changes: 2 additions & 2 deletions sgr_agent_core/agent_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def _definitions_from_dict(cls, data: dict) -> Self:
# Check for agents that will be overridden
overridden = set(cls._instance.agents.keys()) & set(custom_agents.keys())
if overridden:
logger.warning(f"Loaded agents will override existing agents: " f"{', '.join(sorted(overridden))}")
logger.warning(f"Loaded agents will override existing agents: {', '.join(sorted(overridden))}")

cls._instance.agents.update(custom_agents)

Expand All @@ -90,7 +90,7 @@ def _definitions_from_dict(cls, data: dict) -> Self:
# Check for tools that will be overridden
overridden_tools = set(cls._instance.tools.keys()) & set(custom_tools.keys())
if overridden_tools:
logger.warning(f"Loaded tools will override existing tools: " f"{', '.join(sorted(overridden_tools))}")
logger.warning(f"Loaded tools will override existing tools: {', '.join(sorted(overridden_tools))}")

cls._instance.tools.update(custom_tools)
return cls._instance
Expand Down
Loading