#143 tools filtering logic

EvilFreelancer · EvilFreelancer · commit 1ee8bf06982d · 2026-01-27T20:21:46.000+03:00
diff --git a/docs/en/framework/agents.md b/docs/en/framework/agents.md
@@ -319,6 +319,44 @@ class ResearchSGRAgent(SGRAgent):
 !!! Tip "State Machine for Tool Management... Or Something More"
     For more complex tool management logic, you can use a more serious state engine. This will allow you to explicitly define agent states and transition rules, simplifying the management of available tools at each stage of work.
 
+### Tools Filtering
+
+The framework provides an intelligent tools filtering mechanism that automatically selects only relevant tools based on the user prompt. This helps reduce context size and save tokens when working with large toolkits.
+
+**How it works:**
+
+- **System tools** (tools with `isSystemTool=True`) are always included regardless of the prompt
+- **Other tools** are filtered using a combination of:
+  - **BM25 ranking**: Semantic relevance scoring based on tool names and descriptions
+  - **Regex matching**: Keyword matching between prompt and tool metadata
+
+**Enabling tools filtering:**
+
+```yaml
+agents:
+  my_agent:
+    base_class: "SGRAgent"
+    execution:
+      tools_filtering: true  # Enable intelligent tools filtering
+    tools:
+      - "web_search_tool"
+      - "extract_page_content_tool"
+      - "create_report_tool"
+      # ... many more tools
+```
+
+**Example:**
+
+When a user asks "Search for information about Python", the filtering system will:
+1. Always include system tools (e.g., `ReasoningTool`, `ClarificationTool`)
+2. Include `WebSearchTool` because the prompt contains "search" keyword
+3. Exclude irrelevant tools that don't match the prompt
+
+This reduces the number of tools passed to the LLM, saving tokens and improving performance.
+
+!!! Note "Default behavior"
+    Tools filtering is disabled by default (`tools_filtering: false`). Enable it when you have many tools (>20) and want to optimize context usage.
+
 ### Example 2: Data Analysis Agent
 
 ```python
diff --git a/docs/ru/framework/agents.md b/docs/ru/framework/agents.md
@@ -319,6 +319,44 @@ class ResearchSGRAgent(SGRAgent):
 !!! Tip "Стейтмашина для управления инструментами... Или что-то большее"
     Для более сложной логики управления инструментами можно использовать более серьёзный движок состояний. Это позволит явно определить состояния агента и правила перехода между ними, что упростит управление доступными инструментами на каждом этапе работы.
 
+### Фильтрация инструментов
+
+Фреймворк предоставляет механизм интеллектуальной фильтрации инструментов, который автоматически выбирает только релевантные инструменты на основе промпта пользователя. Это помогает уменьшить размер контекста и сэкономить токены при работе с большими наборами инструментов.
+
+**Как это работает:**
+
+- **Системные инструменты** (инструменты с `isSystemTool=True`) всегда включаются независимо от промпта
+- **Остальные инструменты** фильтруются с использованием комбинации:
+  - **BM25 ранжирование**: Семантическая оценка релевантности на основе названий и описаний инструментов
+  - **Regex сопоставление**: Сопоставление ключевых слов между промптом и метаданными инструмента
+
+**Включение фильтрации инструментов:**
+
+```yaml
+agents:
+  my_agent:
+    base_class: "SGRAgent"
+    execution:
+      tools_filtering: true  # Включить интеллектуальную фильтрацию инструментов
+    tools:
+      - "web_search_tool"
+      - "extract_page_content_tool"
+      - "create_report_tool"
+      # ... много других инструментов
+```
+
+**Пример:**
+
+Когда пользователь спрашивает "Найди информацию о Python", система фильтрации:
+1. Всегда включает системные инструменты (например, `ReasoningTool`, `ClarificationTool`)
+2. Включает `WebSearchTool`, потому что промпт содержит ключевое слово "найди"
+3. Исключает нерелевантные инструменты, которые не соответствуют промпту
+
+Это уменьшает количество инструментов, передаваемых в LLM, экономя токены и улучшая производительность.
+
+!!! Note "Поведение по умолчанию"
+    Фильтрация инструментов отключена по умолчанию (`tools_filtering: false`). Включите её, когда у вас много инструментов (>20) и вы хотите оптимизировать использование контекста.
+
 ### Пример 2: Агент для анализа данных
 
 ```python
diff --git a/pyproject.toml b/pyproject.toml
@@ -46,6 +46,8 @@ dependencies = [
     "uvicorn>=0.35.0",
     "fastmcp>=2.12.4",
     "jambo>=0.1.3.post2",
+    # Tools filtering
+    "rank-bm25>=0.2.2",
 ]
 
 [project.urls]
diff --git a/sgr_agent_core/agent_definition.py b/sgr_agent_core/agent_definition.py
@@ -146,6 +146,9 @@ class ExecutionConfig(BaseModel, extra="allow"):
         default="logs", description="Directory for saving bot logs. Set to None or empty string to disable logging."
     )
     reports_dir: str = Field(default="reports", description="Directory for saving reports")
+    tools_filtering: bool = Field(
+        default=False, description="Enable intelligent tools filtering based on user prompt using Regex and BM25"
+    )
 
 
 class AgentConfig(BaseModel, extra="allow"):
diff --git a/sgr_agent_core/base_agent.py b/sgr_agent_core/base_agent.py
@@ -2,13 +2,15 @@
 import json
 import logging
 import os
+import re
 import traceback
 import uuid
 from datetime import datetime
 from typing import Type
 
 from openai import AsyncOpenAI, pydantic_function_tool
 from openai.types.chat import ChatCompletionFunctionToolParam, ChatCompletionMessageParam
+from rank_bm25 import BM25Okapi
 
 from sgr_agent_core.agent_definition import AgentConfig
 from sgr_agent_core.models import AgentContext, AgentStatesEnum
@@ -159,6 +161,78 @@ async def _prepare_context(self) -> list[dict]:
             *self.conversation,
         ]
 
+    async def _filter_tools_by_prompt(self) -> list[Type[BaseTool]]:
+        """Filter tools based on user prompt using Regex and BM25 search.
+
+        System tools (isSystemTool=True) are always included.
+        Other tools are filtered based on relevance to the user prompt.
+
+        Returns:
+            List of filtered tool classes
+        """
+        if not self.config.execution.tools_filtering:
+            return list(self.toolkit)
+
+        # Extract user prompt text from task_messages
+        prompt_text = ""
+        for message in self.task_messages:
+            if isinstance(message.get("content"), str):
+                prompt_text += " " + message["content"]
+            elif isinstance(message.get("content"), list):
+                for content_item in message.get("content", []):
+                    if isinstance(content_item, dict) and content_item.get("type") == "text":
+                        prompt_text += " " + content_item.get("text", "")
+
+        prompt_text = prompt_text.strip().lower()
+
+        # Always include system tools
+        system_tools = [tool for tool in self.toolkit if getattr(tool, "isSystemTool", False)]
+        non_system_tools = [tool for tool in self.toolkit if not getattr(tool, "isSystemTool", False)]
+
+        if not prompt_text or not non_system_tools:
+            return system_tools + non_system_tools
+
+        # Prepare documents for BM25: tool name + description
+        tool_documents = []
+        for tool in non_system_tools:
+            tool_name = getattr(tool, "tool_name", tool.__name__).lower()
+            tool_description = getattr(tool, "description", "").lower()
+            doc_text = f"{tool_name} {tool_description}"
+            tool_documents.append(doc_text)
+
+        # Tokenize documents for BM25
+        tokenized_docs = [doc.split() for doc in tool_documents]
+        bm25 = BM25Okapi(tokenized_docs)
+
+        # Tokenize query
+        query_tokens = prompt_text.split()
+        scores = bm25.get_scores(query_tokens)
+
+        # Regex matching: check if tool name or description matches prompt keywords
+        regex_matches = []
+        prompt_words = set(re.findall(r"\b\w+\b", prompt_text))
+        for tool in non_system_tools:
+            tool_name = getattr(tool, "tool_name", tool.__name__).lower()
+            tool_description = getattr(tool, "description", "").lower()
+            tool_words = set(re.findall(r"\b\w+\b", f"{tool_name} {tool_description}"))
+
+            # Check for keyword matches
+            matches = prompt_words.intersection(tool_words)
+            regex_matches.append(len(matches) > 0)
+
+        # Combine BM25 scores and regex matches
+        # Tools are included if they have high BM25 score OR regex match
+        filtered_non_system_tools = []
+        for i, tool in enumerate(non_system_tools):
+            bm25_score = scores[i]
+            has_regex_match = regex_matches[i]
+
+            # Include tool if BM25 score is above threshold or has regex match
+            if bm25_score > 0.1 or has_regex_match:
+                filtered_non_system_tools.append(tool)
+
+        return system_tools + filtered_non_system_tools
+
     async def _prepare_tools(self) -> list[ChatCompletionFunctionToolParam]:
         """Prepare available tools for the current agent state and progress.
 
@@ -168,9 +242,11 @@ async def _prepare_tools(self) -> list[ChatCompletionFunctionToolParam]:
         Returns a list of ChatCompletionFunctionToolParam based
         available tools.
         """
-        tools = set(self.toolkit)
         if self._context.iteration >= self.config.execution.max_iterations:
             raise RuntimeError("Max iterations reached")
+
+        # Apply tools filtering if enabled
+        tools = await self._filter_tools_by_prompt()
         return [pydantic_function_tool(tool, name=tool.tool_name) for tool in tools]
 
     async def _reasoning_phase(self) -> ReasoningTool:
diff --git a/tests/test_base_agent.py b/tests/test_base_agent.py
@@ -595,3 +595,151 @@ def test_save_agent_log_creates_file_when_logs_dir_is_set(self, tmp_path):
         log_files = list(os.listdir(logs_dir))
         assert len(log_files) == 1
         assert log_files[0].endswith("-log.json")
+
+
+class TestBaseAgentToolsFiltering:
+    """Tests for tools filtering functionality in BaseAgent."""
+
+    @pytest.mark.asyncio
+    async def test_filter_tools_disabled_by_default(self):
+        """Test that tools filtering is disabled by default."""
+        from sgr_agent_core.tools import (
+            ClarificationTool,
+            ReasoningTool,
+            WebSearchTool,
+        )
+
+        agent = create_test_agent(
+            BaseAgent,
+            task_messages=[{"role": "user", "content": "Search for information about Python"}],
+            toolkit=[ReasoningTool, WebSearchTool, ClarificationTool],
+        )
+
+        filtered_tools = await agent._filter_tools_by_prompt()
+        assert len(filtered_tools) == 3
+        assert ReasoningTool in filtered_tools
+        assert WebSearchTool in filtered_tools
+        assert ClarificationTool in filtered_tools
+
+    @pytest.mark.asyncio
+    async def test_filter_tools_always_includes_system_tools(self):
+        """Test that system tools (isSystemTool=True) are always included."""
+        from sgr_agent_core.agent_definition import ExecutionConfig
+        from sgr_agent_core.tools import (
+            ClarificationTool,
+            ReasoningTool,
+            WebSearchTool,
+        )
+
+        execution_config = ExecutionConfig(tools_filtering=True)
+        agent = create_test_agent(
+            BaseAgent,
+            task_messages=[{"role": "user", "content": "Random unrelated text"}],
+            execution_config=execution_config,
+            toolkit=[ReasoningTool, WebSearchTool, ClarificationTool],
+        )
+
+        filtered_tools = await agent._filter_tools_by_prompt()
+        # ReasoningTool and ClarificationTool have isSystemTool=True, so they should always be included
+        assert ReasoningTool in filtered_tools
+        assert ClarificationTool in filtered_tools
+
+    @pytest.mark.asyncio
+    async def test_filter_tools_filters_by_relevance(self):
+        """Test that tools are filtered based on prompt relevance using BM25
+        and regex."""
+        from sgr_agent_core.agent_definition import ExecutionConfig
+        from sgr_agent_core.tools import (
+            ClarificationTool,
+            ReasoningTool,
+            WebSearchTool,
+        )
+
+        execution_config = ExecutionConfig(tools_filtering=True)
+        agent = create_test_agent(
+            BaseAgent,
+            task_messages=[{"role": "user", "content": "Search the web for information about machine learning"}],
+            execution_config=execution_config,
+            toolkit=[ReasoningTool, WebSearchTool, ClarificationTool],
+        )
+
+        filtered_tools = await agent._filter_tools_by_prompt()
+        # System tools should always be included
+        assert ReasoningTool in filtered_tools
+        assert ClarificationTool in filtered_tools
+        # WebSearchTool should be included because prompt mentions "search"
+        assert WebSearchTool in filtered_tools
+
+    @pytest.mark.asyncio
+    async def test_filter_tools_with_multiple_non_system_tools(self):
+        """Test filtering when there are multiple non-system tools."""
+        from sgr_agent_core.agent_definition import ExecutionConfig
+        from sgr_agent_core.tools import (
+            ClarificationTool,
+            ExtractPageContentTool,
+            ReasoningTool,
+            WebSearchTool,
+        )
+
+        execution_config = ExecutionConfig(tools_filtering=True)
+        agent = create_test_agent(
+            BaseAgent,
+            task_messages=[{"role": "user", "content": "Extract content from web pages"}],
+            execution_config=execution_config,
+            toolkit=[ReasoningTool, WebSearchTool, ExtractPageContentTool, ClarificationTool],
+        )
+
+        filtered_tools = await agent._filter_tools_by_prompt()
+        # System tools should always be included
+        assert ReasoningTool in filtered_tools
+        assert ClarificationTool in filtered_tools
+        # ExtractPageContentTool should be included because prompt mentions "extract"
+        assert ExtractPageContentTool in filtered_tools
+
+    @pytest.mark.asyncio
+    async def test_filter_tools_empty_prompt(self):
+        """Test filtering with empty prompt."""
+        from sgr_agent_core.agent_definition import ExecutionConfig
+        from sgr_agent_core.tools import (
+            ClarificationTool,
+            ReasoningTool,
+            WebSearchTool,
+        )
+
+        execution_config = ExecutionConfig(tools_filtering=True)
+        agent = create_test_agent(
+            BaseAgent,
+            task_messages=[{"role": "user", "content": ""}],
+            execution_config=execution_config,
+            toolkit=[ReasoningTool, WebSearchTool, ClarificationTool],
+        )
+
+        filtered_tools = await agent._filter_tools_by_prompt()
+        # Only system tools should be included when prompt is empty
+        assert ReasoningTool in filtered_tools
+        assert ClarificationTool in filtered_tools
+        # WebSearchTool might not be included if prompt is empty
+        # This depends on implementation, but system tools must be included
+
+    @pytest.mark.asyncio
+    async def test_prepare_tools_uses_filtering_when_enabled(self):
+        """Test that _prepare_tools uses filtering when tools_filtering is
+        enabled."""
+        from sgr_agent_core.agent_definition import ExecutionConfig
+        from sgr_agent_core.tools import (
+            ClarificationTool,
+            ReasoningTool,
+            WebSearchTool,
+        )
+
+        execution_config = ExecutionConfig(tools_filtering=True)
+        agent = create_test_agent(
+            BaseAgent,
+            task_messages=[{"role": "user", "content": "Search for Python tutorials"}],
+            execution_config=execution_config,
+            toolkit=[ReasoningTool, WebSearchTool, ClarificationTool],
+        )
+
+        tools = await agent._prepare_tools()
+        # Should return filtered tools converted to ChatCompletionFunctionToolParam
+        assert len(tools) >= 2  # At least system tools should be present

Original file line number	Diff line number	Diff line change
`@@ -46,6 +46,8 @@ dependencies = [`
`46`	`46`	`"uvicorn>=0.35.0",`
`47`	`47`	`"fastmcp>=2.12.4",`
`48`	`48`	`"jambo>=0.1.3.post2",`
	`49`	`+ # Tools filtering`
	`50`	`+ "rank-bm25>=0.2.2",`
`49`	`51`	`]`
`50`	`52`
`51`	`53`	`[project.urls]`
Original file line number	Diff line number	Diff line change
`@@ -146,6 +146,9 @@ class ExecutionConfig(BaseModel, extra="allow"):`
`146`	`146`	`default="logs", description="Directory for saving bot logs. Set to None or empty string to disable logging."`
`147`	`147`	`)`
`148`	`148`	`reports_dir: str = Field(default="reports", description="Directory for saving reports")`
	`149`	`+ tools_filtering: bool = Field(`
	`150`	`+ default=False, description="Enable intelligent tools filtering based on user prompt using Regex and BM25"`
	`151`	`+ )`
`149`	`152`
`150`	`153`
`151`	`154`	`class AgentConfig(BaseModel, extra="allow"):`