From 33eac4406659a3efad82bffe3ff6204d52b11609 Mon Sep 17 00:00:00 2001 From: Niels Rogge Date: Mon, 21 Jul 2025 18:36:33 +0200 Subject: [PATCH 1/2] Add tools to config --- .gitignore | 5 +- examples/tool_filtering_demo/README.md | 55 ++++++++++++++++ examples/tool_filtering_demo/agent.json | 14 +++++ .../inference/_mcp/mcp_client.py | 63 ++++++++++++++++++- src/huggingface_hub/inference/_mcp/types.py | 8 +++ tiny_agents.md | 46 ++++++++++++++ 6 files changed, 188 insertions(+), 3 deletions(-) create mode 100644 examples/tool_filtering_demo/README.md create mode 100644 examples/tool_filtering_demo/agent.json create mode 100644 tiny_agents.md diff --git a/.gitignore b/.gitignore index 705fdcc38b..f989a75311 100644 --- a/.gitignore +++ b/.gitignore @@ -139,4 +139,7 @@ dmypy.json # Spell checker config cspell.json -tmp* \ No newline at end of file +tmp* + +# Claude Code +CLAUDE.md \ No newline at end of file diff --git a/examples/tool_filtering_demo/README.md b/examples/tool_filtering_demo/README.md new file mode 100644 index 0000000000..c4c15f9ccd --- /dev/null +++ b/examples/tool_filtering_demo/README.md @@ -0,0 +1,55 @@ +# Tool Filtering Demo + +This example demonstrates the new tool filtering feature for tiny agents. + +## Configuration + +The `agent.json` shows how to filter tools from MCP servers: + +```json +{ + "servers": [ + { + "type": "stdio", + "command": "npx", + "args": ["@playwright/mcp@latest"], + "tools": { + "include": ["browser_click", "browser_close"] + } + } + ] +} +``` + +## Tool Filtering Options + +### Include only specific tools +```json +"tools": { + "include": ["tool1", "tool2", "tool3"] +} +``` + +### Exclude specific tools +```json +"tools": { + "exclude": ["unwanted_tool1", "unwanted_tool2"] +} +``` + +### Combine both (exclude takes precedence) +```json +"tools": { + "include": ["tool1", "tool2", "tool3"], + "exclude": ["tool2"] +} +``` +Result: Only `tool1` and `tool3` will be available. + +## Running the Example + +```bash +tiny-agents run examples/tool_filtering_demo +``` + +This agent will have access to only the `browser_click` and `browser_close` tools from Playwright, instead of all 30+ tools that Playwright provides by default. \ No newline at end of file diff --git a/examples/tool_filtering_demo/agent.json b/examples/tool_filtering_demo/agent.json new file mode 100644 index 0000000000..db4ba88cb2 --- /dev/null +++ b/examples/tool_filtering_demo/agent.json @@ -0,0 +1,14 @@ +{ + "model": "meta-llama/Meta-Llama-3-8B-Instruct", + "provider": "auto", + "servers": [ + { + "type": "stdio", + "command": "npx", + "args": ["@playwright/mcp@latest"], + "tools": { + "include": ["browser_click", "browser_close"] + } + } + ] +} \ No newline at end of file diff --git a/src/huggingface_hub/inference/_mcp/mcp_client.py b/src/huggingface_hub/inference/_mcp/mcp_client.py index 2712dea121..a5d22c97f6 100644 --- a/src/huggingface_hub/inference/_mcp/mcp_client.py +++ b/src/huggingface_hub/inference/_mcp/mcp_client.py @@ -139,21 +139,27 @@ async def add_mcp_server(self, type: ServerType, **params: Any): - args (List[str], optional): Arguments for the command - env (Dict[str, str], optional): Environment variables for the command - cwd (Union[str, Path, None], optional): Working directory for the command + - tools (Dict, optional): Tool filtering configuration with 'include' and/or 'exclude' lists - For SSE servers: - url (str): The URL of the SSE server - headers (Dict[str, Any], optional): Headers for the SSE connection - timeout (float, optional): Connection timeout - sse_read_timeout (float, optional): SSE read timeout + - tools (Dict, optional): Tool filtering configuration with 'include' and/or 'exclude' lists - For StreamableHTTP servers: - url (str): The URL of the StreamableHTTP server - headers (Dict[str, Any], optional): Headers for the StreamableHTTP connection - timeout (timedelta, optional): Connection timeout - sse_read_timeout (timedelta, optional): SSE read timeout - terminate_on_close (bool, optional): Whether to terminate on close + - tools (Dict, optional): Tool filtering configuration with 'include' and/or 'exclude' lists """ from mcp import ClientSession, StdioServerParameters from mcp import types as mcp_types + # Extract tools configuration if provided + tools_config = params.pop("tools", None) + # Determine server type and create appropriate parameters if type == "stdio": # Handle stdio server @@ -209,9 +215,18 @@ async def add_mcp_server(self, type: ServerType, **params: Any): # List available tools response = await session.list_tools() - logger.debug("Connected to server with tools:", [tool.name for tool in response.tools]) + all_tool_names = [tool.name for tool in response.tools] + logger.debug("Connected to server with tools:", all_tool_names) + + # Filter tools based on configuration + filtered_tools = self._filter_tools(response.tools, tools_config, all_tool_names) + + if tools_config: + logger.info( + f"Tool filtering applied. Using {len(filtered_tools)} of {len(response.tools)} available tools: {[tool.name for tool in filtered_tools]}" + ) - for tool in response.tools: + for tool in filtered_tools: if tool.name in self.sessions: logger.warning(f"Tool '{tool.name}' already defined by another server. Skipping.") continue @@ -233,6 +248,50 @@ async def add_mcp_server(self, type: ServerType, **params: Any): ) ) + def _filter_tools( + self, tools: List[Any], tools_config: Optional[Dict[str, Any]], all_tool_names: List[str] + ) -> List[Any]: + """Filter tools based on include/exclude configuration. + + Args: + tools: List of MCP tool objects + tools_config: Optional tools configuration dict with 'include' and/or 'exclude' keys + all_tool_names: List of all available tool names for validation + + Returns: + Filtered list of tools + """ + if not tools_config: + return tools + + include_list = tools_config.get("include") + exclude_list = tools_config.get("exclude") + + # Validate that specified tools exist + if include_list: + missing_tools = set(include_list) - set(all_tool_names) + if missing_tools: + logger.warning(f"Tools specified in 'include' list not found on server: {list(missing_tools)}") + + if exclude_list: + missing_tools = set(exclude_list) - set(all_tool_names) + if missing_tools: + logger.warning(f"Tools specified in 'exclude' list not found on server: {list(missing_tools)}") + + filtered_tools = [] + for tool in tools: + # If include list is specified, only include tools in that list + if include_list and tool.name not in include_list: + continue + + # If exclude list is specified, exclude tools in that list + if exclude_list and tool.name in exclude_list: + continue + + filtered_tools.append(tool) + + return filtered_tools + async def process_single_turn_with_tools( self, messages: List[Union[Dict, ChatCompletionInputMessage]], diff --git a/src/huggingface_hub/inference/_mcp/types.py b/src/huggingface_hub/inference/_mcp/types.py index cfb5e0eac9..7305b0a56b 100644 --- a/src/huggingface_hub/inference/_mcp/types.py +++ b/src/huggingface_hub/inference/_mcp/types.py @@ -10,24 +10,32 @@ class InputConfig(TypedDict, total=False): password: bool +class ToolsConfig(TypedDict, total=False): + include: NotRequired[List[str]] + exclude: NotRequired[List[str]] + + class StdioServerConfig(TypedDict): type: Literal["stdio"] command: str args: List[str] env: Dict[str, str] cwd: str + tools: NotRequired[ToolsConfig] class HTTPServerConfig(TypedDict): type: Literal["http"] url: str headers: Dict[str, str] + tools: NotRequired[ToolsConfig] class SSEServerConfig(TypedDict): type: Literal["sse"] url: str headers: Dict[str, str] + tools: NotRequired[ToolsConfig] ServerConfig = Union[StdioServerConfig, HTTPServerConfig, SSEServerConfig] diff --git a/tiny_agents.md b/tiny_agents.md new file mode 100644 index 0000000000..00fe9b9e84 --- /dev/null +++ b/tiny_agents.md @@ -0,0 +1,46 @@ +Tiny agents (https://huggingface.co/blog/python-tiny-agents) is a minimalistic framework for running AI agents. When running a tiny agent with `huggingface_hub` using the `tiny-agents run agent` command, the command will look for an `agent.json` file which defines the configuration of the agent. Each agent is defined by an LLM (powered by Hugging Face Inference Providers which is similar to the OpenAI API) as well as a set of MCP servers, whose tools will be provided to the LLM. Currently, one can just add certain MCP servers to the config, such as the one below: + +```json +{ + "model": "Qwen/Qwen2.5-72B-Instruct", + "provider": "nebius", + "inputs": [ + { + "type": "promptString", + "id": "github-personal-access-token", + "description": "Github Personal Access Token (read-only)", + "password": true + } + ], + "servers": [ + { + "type": "stdio", + "command": "docker", + "args": [ + "run", + "-i", + "--rm", + "-e", + "GITHUB_PERSONAL_ACCESS_TOKEN", + "-e", + "GITHUB_TOOLSETS=repos,issues,pull_requests", + "ghcr.io/github/github-mcp-server" + ], + "env": { + "GITHUB_PERSONAL_ACCESS_TOKEN": "${input:github-personal-access-token}" + } + }, + { + "type": "stdio", + "command": "npx", + "args": [ + "@playwright/mcp@latest" + ] + } + ] +} +``` + +However it would be nice to have a feature that allows users to define which tools to enable/disable in the config JSON file. For example, for the Playwright MCP server (which by default has more than 30 tools), I actually only need the `browser_click` and `browser_close` tools. Enabling only a handful of tools makes AI agents much more reliable. + +Would you be able to implement this feature? \ No newline at end of file From 19b5b9e7de1c1f4c7a63f13bcc44454583d52264 Mon Sep 17 00:00:00 2001 From: Niels Rogge Date: Fri, 8 Aug 2025 21:24:47 +0200 Subject: [PATCH 2/2] Simplify MCP tool filtering to use allowed_tools parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace complex include/exclude structure with simple allowed_tools list - Follow OpenAI specs for tool filtering (allowlist approach only) - Simplify _filter_tools method to use list comprehension - Update type definitions in types.py - Remove example files as requested by maintainers - Add comprehensive unit tests for allowed_tools functionality This addresses PR feedback from @julien-c and @Wauplin: - Eliminates confusion about include/exclude precedence - Aligns with industry standards (OpenAI) - Provides cleaner, simpler API 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- examples/tool_filtering_demo/README.md | 55 --------- examples/tool_filtering_demo/agent.json | 14 --- .../inference/_mcp/mcp_client.py | 58 +++------- src/huggingface_hub/inference/_mcp/types.py | 11 +- tests/test_mcp_client.py | 104 ++++++++++++++++++ tiny_agents.md | 46 -------- 6 files changed, 125 insertions(+), 163 deletions(-) delete mode 100644 examples/tool_filtering_demo/README.md delete mode 100644 examples/tool_filtering_demo/agent.json create mode 100644 tests/test_mcp_client.py delete mode 100644 tiny_agents.md diff --git a/examples/tool_filtering_demo/README.md b/examples/tool_filtering_demo/README.md deleted file mode 100644 index c4c15f9ccd..0000000000 --- a/examples/tool_filtering_demo/README.md +++ /dev/null @@ -1,55 +0,0 @@ -# Tool Filtering Demo - -This example demonstrates the new tool filtering feature for tiny agents. - -## Configuration - -The `agent.json` shows how to filter tools from MCP servers: - -```json -{ - "servers": [ - { - "type": "stdio", - "command": "npx", - "args": ["@playwright/mcp@latest"], - "tools": { - "include": ["browser_click", "browser_close"] - } - } - ] -} -``` - -## Tool Filtering Options - -### Include only specific tools -```json -"tools": { - "include": ["tool1", "tool2", "tool3"] -} -``` - -### Exclude specific tools -```json -"tools": { - "exclude": ["unwanted_tool1", "unwanted_tool2"] -} -``` - -### Combine both (exclude takes precedence) -```json -"tools": { - "include": ["tool1", "tool2", "tool3"], - "exclude": ["tool2"] -} -``` -Result: Only `tool1` and `tool3` will be available. - -## Running the Example - -```bash -tiny-agents run examples/tool_filtering_demo -``` - -This agent will have access to only the `browser_click` and `browser_close` tools from Playwright, instead of all 30+ tools that Playwright provides by default. \ No newline at end of file diff --git a/examples/tool_filtering_demo/agent.json b/examples/tool_filtering_demo/agent.json deleted file mode 100644 index db4ba88cb2..0000000000 --- a/examples/tool_filtering_demo/agent.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "model": "meta-llama/Meta-Llama-3-8B-Instruct", - "provider": "auto", - "servers": [ - { - "type": "stdio", - "command": "npx", - "args": ["@playwright/mcp@latest"], - "tools": { - "include": ["browser_click", "browser_close"] - } - } - ] -} \ No newline at end of file diff --git a/src/huggingface_hub/inference/_mcp/mcp_client.py b/src/huggingface_hub/inference/_mcp/mcp_client.py index a5d22c97f6..ed410057a0 100644 --- a/src/huggingface_hub/inference/_mcp/mcp_client.py +++ b/src/huggingface_hub/inference/_mcp/mcp_client.py @@ -139,26 +139,26 @@ async def add_mcp_server(self, type: ServerType, **params: Any): - args (List[str], optional): Arguments for the command - env (Dict[str, str], optional): Environment variables for the command - cwd (Union[str, Path, None], optional): Working directory for the command - - tools (Dict, optional): Tool filtering configuration with 'include' and/or 'exclude' lists + - allowed_tools (List[str], optional): List of tool names to allow from this server - For SSE servers: - url (str): The URL of the SSE server - headers (Dict[str, Any], optional): Headers for the SSE connection - timeout (float, optional): Connection timeout - sse_read_timeout (float, optional): SSE read timeout - - tools (Dict, optional): Tool filtering configuration with 'include' and/or 'exclude' lists + - allowed_tools (List[str], optional): List of tool names to allow from this server - For StreamableHTTP servers: - url (str): The URL of the StreamableHTTP server - headers (Dict[str, Any], optional): Headers for the StreamableHTTP connection - timeout (timedelta, optional): Connection timeout - sse_read_timeout (timedelta, optional): SSE read timeout - terminate_on_close (bool, optional): Whether to terminate on close - - tools (Dict, optional): Tool filtering configuration with 'include' and/or 'exclude' lists + - allowed_tools (List[str], optional): List of tool names to allow from this server """ from mcp import ClientSession, StdioServerParameters from mcp import types as mcp_types - # Extract tools configuration if provided - tools_config = params.pop("tools", None) + # Extract allowed_tools configuration if provided + allowed_tools = params.pop("allowed_tools", None) # Determine server type and create appropriate parameters if type == "stdio": @@ -218,10 +218,10 @@ async def add_mcp_server(self, type: ServerType, **params: Any): all_tool_names = [tool.name for tool in response.tools] logger.debug("Connected to server with tools:", all_tool_names) - # Filter tools based on configuration - filtered_tools = self._filter_tools(response.tools, tools_config, all_tool_names) + # Filter tools based on allowed_tools configuration + filtered_tools = self._filter_tools(response.tools, allowed_tools) - if tools_config: + if allowed_tools: logger.info( f"Tool filtering applied. Using {len(filtered_tools)} of {len(response.tools)} available tools: {[tool.name for tool in filtered_tools]}" ) @@ -248,49 +248,27 @@ async def add_mcp_server(self, type: ServerType, **params: Any): ) ) - def _filter_tools( - self, tools: List[Any], tools_config: Optional[Dict[str, Any]], all_tool_names: List[str] - ) -> List[Any]: - """Filter tools based on include/exclude configuration. + def _filter_tools(self, tools: List[Any], allowed_tools: Optional[List[str]]) -> List[Any]: + """Filter tools based on allowed_tools list. Args: tools: List of MCP tool objects - tools_config: Optional tools configuration dict with 'include' and/or 'exclude' keys - all_tool_names: List of all available tool names for validation + allowed_tools: Optional list of tool names to allow Returns: Filtered list of tools """ - if not tools_config: + if allowed_tools is None: return tools - include_list = tools_config.get("include") - exclude_list = tools_config.get("exclude") - # Validate that specified tools exist - if include_list: - missing_tools = set(include_list) - set(all_tool_names) - if missing_tools: - logger.warning(f"Tools specified in 'include' list not found on server: {list(missing_tools)}") - - if exclude_list: - missing_tools = set(exclude_list) - set(all_tool_names) - if missing_tools: - logger.warning(f"Tools specified in 'exclude' list not found on server: {list(missing_tools)}") - - filtered_tools = [] - for tool in tools: - # If include list is specified, only include tools in that list - if include_list and tool.name not in include_list: - continue - - # If exclude list is specified, exclude tools in that list - if exclude_list and tool.name in exclude_list: - continue - - filtered_tools.append(tool) + all_tool_names = [tool.name for tool in tools] + missing_tools = set(allowed_tools) - set(all_tool_names) + if missing_tools: + logger.warning(f"Tools specified in 'allowed_tools' not found on server: {list(missing_tools)}") - return filtered_tools + # Filter tools using list comprehension + return [tool for tool in tools if tool.name in allowed_tools] async def process_single_turn_with_tools( self, diff --git a/src/huggingface_hub/inference/_mcp/types.py b/src/huggingface_hub/inference/_mcp/types.py index 7305b0a56b..100f67832e 100644 --- a/src/huggingface_hub/inference/_mcp/types.py +++ b/src/huggingface_hub/inference/_mcp/types.py @@ -10,32 +10,27 @@ class InputConfig(TypedDict, total=False): password: bool -class ToolsConfig(TypedDict, total=False): - include: NotRequired[List[str]] - exclude: NotRequired[List[str]] - - class StdioServerConfig(TypedDict): type: Literal["stdio"] command: str args: List[str] env: Dict[str, str] cwd: str - tools: NotRequired[ToolsConfig] + allowed_tools: NotRequired[List[str]] class HTTPServerConfig(TypedDict): type: Literal["http"] url: str headers: Dict[str, str] - tools: NotRequired[ToolsConfig] + allowed_tools: NotRequired[List[str]] class SSEServerConfig(TypedDict): type: Literal["sse"] url: str headers: Dict[str, str] - tools: NotRequired[ToolsConfig] + allowed_tools: NotRequired[List[str]] ServerConfig = Union[StdioServerConfig, HTTPServerConfig, SSEServerConfig] diff --git a/tests/test_mcp_client.py b/tests/test_mcp_client.py new file mode 100644 index 0000000000..7dd9798e0e --- /dev/null +++ b/tests/test_mcp_client.py @@ -0,0 +1,104 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest +from unittest.mock import MagicMock + +from huggingface_hub.inference._mcp.mcp_client import MCPClient + + +class TestMCPClient(unittest.TestCase): + def setUp(self): + self.client = MCPClient(model="test-model", provider="test-provider") + + def test_filter_tools_no_allowed_tools(self): + """Test that _filter_tools returns all tools when no allowed_tools is specified.""" + # Create mock tools + mock_tools = [ + MagicMock(name="tool1"), + MagicMock(name="tool2"), + MagicMock(name="tool3"), + ] + + result = self.client._filter_tools(mock_tools, None) + + self.assertEqual(len(result), 3) + self.assertEqual(result, mock_tools) + + def test_filter_tools_with_allowed_tools(self): + """Test that _filter_tools correctly filters tools based on allowed_tools list.""" + # Create mock tools + mock_tool1 = MagicMock() + mock_tool1.name = "tool1" + mock_tool2 = MagicMock() + mock_tool2.name = "tool2" + mock_tool3 = MagicMock() + mock_tool3.name = "tool3" + + mock_tools = [mock_tool1, mock_tool2, mock_tool3] + allowed_tools = ["tool1", "tool3"] + + result = self.client._filter_tools(mock_tools, allowed_tools) + + self.assertEqual(len(result), 2) + self.assertIn(mock_tool1, result) + self.assertIn(mock_tool3, result) + self.assertNotIn(mock_tool2, result) + + def test_filter_tools_with_empty_allowed_tools(self): + """Test that _filter_tools returns empty list when allowed_tools is empty.""" + mock_tools = [ + MagicMock(name="tool1"), + MagicMock(name="tool2"), + ] + + result = self.client._filter_tools(mock_tools, []) + + self.assertEqual(len(result), 0) + + def test_filter_tools_with_nonexistent_tools(self): + """Test that _filter_tools handles non-existent tool names gracefully.""" + mock_tool1 = MagicMock() + mock_tool1.name = "tool1" + mock_tools = [mock_tool1] + + # Include a non-existent tool in allowed_tools + allowed_tools = ["tool1", "nonexistent_tool"] + + with self.assertLogs(level="WARNING") as log: + result = self.client._filter_tools(mock_tools, allowed_tools) + + # Should only return existing tools + self.assertEqual(len(result), 1) + self.assertEqual(result[0], mock_tool1) + + # Should log a warning about missing tools + self.assertIn("not found on server", log.output[0]) + + def test_filter_tools_all_nonexistent_tools(self): + """Test that _filter_tools returns empty list when all allowed_tools are non-existent.""" + mock_tool1 = MagicMock() + mock_tool1.name = "tool1" + mock_tools = [mock_tool1] + + allowed_tools = ["nonexistent_tool1", "nonexistent_tool2"] + + with self.assertLogs(level="WARNING") as log: + result = self.client._filter_tools(mock_tools, allowed_tools) + + self.assertEqual(len(result), 0) + self.assertIn("not found on server", log.output[0]) + + +if __name__ == "__main__": + unittest.main() diff --git a/tiny_agents.md b/tiny_agents.md deleted file mode 100644 index 00fe9b9e84..0000000000 --- a/tiny_agents.md +++ /dev/null @@ -1,46 +0,0 @@ -Tiny agents (https://huggingface.co/blog/python-tiny-agents) is a minimalistic framework for running AI agents. When running a tiny agent with `huggingface_hub` using the `tiny-agents run agent` command, the command will look for an `agent.json` file which defines the configuration of the agent. Each agent is defined by an LLM (powered by Hugging Face Inference Providers which is similar to the OpenAI API) as well as a set of MCP servers, whose tools will be provided to the LLM. Currently, one can just add certain MCP servers to the config, such as the one below: - -```json -{ - "model": "Qwen/Qwen2.5-72B-Instruct", - "provider": "nebius", - "inputs": [ - { - "type": "promptString", - "id": "github-personal-access-token", - "description": "Github Personal Access Token (read-only)", - "password": true - } - ], - "servers": [ - { - "type": "stdio", - "command": "docker", - "args": [ - "run", - "-i", - "--rm", - "-e", - "GITHUB_PERSONAL_ACCESS_TOKEN", - "-e", - "GITHUB_TOOLSETS=repos,issues,pull_requests", - "ghcr.io/github/github-mcp-server" - ], - "env": { - "GITHUB_PERSONAL_ACCESS_TOKEN": "${input:github-personal-access-token}" - } - }, - { - "type": "stdio", - "command": "npx", - "args": [ - "@playwright/mcp@latest" - ] - } - ] -} -``` - -However it would be nice to have a feature that allows users to define which tools to enable/disable in the config JSON file. For example, for the Playwright MCP server (which by default has more than 30 tools), I actually only need the `browser_click` and `browser_close` tools. Enabling only a handful of tools makes AI agents much more reliable. - -Would you be able to implement this feature? \ No newline at end of file