feat: add configurable max tool rounds and limit handling (#31)

jonigl · jonigl · commit 33e4fea31fa0 · 2025-10-23T14:15:55.000+02:00
- Add --max-tool-rounds CLI option and MAX_TOOL_ROUNDS env var support
- Pass max_tool_rounds into app state and MCPManager
- Enforce validation in validate_cli_inputs (must be &gt;= 1)
- Implement round limiting in streaming and non-streaming proxy flows
  (including final LLM call/stream when limit reached)
- Add _make_final_llm_call and _stream_final_llm_call helpers
- Update README with usage and docs for max tool rounds
- Add unit test for max_tool_rounds validation"
diff --git a/README.md b/README.md
@@ -45,8 +45,9 @@
 - 🚀 **Pre-loaded Servers**: All MCP servers are connected at startup from JSON configuration
 - 📝 **JSON Configuration**: Configure multiple servers with complex commands and environments
 - 🔗 **Tool Integration**: Automatic tool call processing and response integration
-- � **Multi-Round Tool Execution**: Automatically loops through multiple rounds of tool calls until completion
-- �🛠️ **All Tools Available**: Ollama can use any tool from any connected server simultaneously
+- 🔄 **Multi-Round Tool Execution**: Automatically loops through multiple rounds of tool calls until completion
+- 🛡️ **Configurable Tool Limits**: Set maximum tool execution rounds to prevent excessive tool calls
+- 🛠️ **All Tools Available**: Ollama can use any tool from any connected server simultaneously
 - 🔌 **Complete API Compatibility**: `/api/chat` adds tools while all other Ollama API endpoints are transparently proxied
 - 🔧 **Configurable Ollama**: Specify custom Ollama server URL via CLI (supports local and cloud models)
 - ☁️ **Cloud Model Support**: Works with Ollama cloud models
@@ -193,12 +194,16 @@ CORS_ORIGINS="http://localhost:3000,http://localhost:8080,https://app.example.co
 ```
 
 **Environment Variables:**
+- `CORS_ORIGINS`: Comma-separated list of allowed origins (default: `*`)
+  - `*` allows all origins (shows warning in logs)
+  - Example: `CORS_ORIGINS="http://localhost:3000,https://myapp.com" ollama-mcp-bridge`
+- `MAX_TOOL_ROUNDS`: Maximum number of tool execution rounds (default: unlimited)
+  - Can be overridden with `--max-tool-rounds` CLI parameter (CLI takes precedence)
+  - Example: `MAX_TOOL_ROUNDS=5 ollama-mcp-bridge`
 - `OLLAMA_URL`: URL of the Ollama server (default: `http://localhost:11434`)
   - Can be overridden with `--ollama-url` CLI parameter
   - Useful for Docker deployments and configuration management
-- `CORS_ORIGINS`: Comma-separated list of allowed origins (default: `*`)
-  - `*` allows all origins (shows warning in logs)
-  - Specific origins like `http://localhost:3000,https://myapp.com` for production
+  - Example: `OLLAMA_URL=http://192.168.1.100:11434 ollama-mcp-bridge`
 
 **CORS Logging:**
 - The bridge logs CORS configuration at startup
@@ -227,8 +232,11 @@ ollama-mcp-bridge --host 0.0.0.0 --port 8080
 # Custom Ollama server URL (local or cloud)
 ollama-mcp-bridge --ollama-url http://192.168.1.100:11434
 
+# Limit tool execution rounds (prevents excessive tool calls)
+ollama-mcp-bridge --max-tool-rounds 5
+
 # Combine options
-ollama-mcp-bridge --config custom.json --host 0.0.0.0 --port 8080 --ollama-url http://remote-ollama:11434
+ollama-mcp-bridge --config custom.json --host 0.0.0.0 --port 8080 --ollama-url http://remote-ollama:11434 --max-tool-rounds 10
 
 # Check version and available updates
 ollama-mcp-bridge --version
@@ -245,6 +253,8 @@ ollama-mcp-bridge --version
 - `--host`: Host to bind the server (default: `0.0.0.0`)
 - `--port`: Port to bind the server (default: `8000`)
 - `--ollama-url`: Ollama server URL (default: `http://localhost:11434`)
+- `--max-tool-rounds`: Maximum tool execution rounds (default: unlimited, can also be set via `MAX_TOOL_ROUNDS` environment variable)
+- `--reload`: Enable auto-reload during development
 - `--version`: Show version information, check for updates and exit
 
 ### API Usage
diff --git a/src/ollama_mcp_bridge/lifecycle.py b/src/ollama_mcp_bridge/lifecycle.py
@@ -23,11 +23,13 @@ async def lifespan(fastapi_app: FastAPI):
         # Get config from app state with explicit defaults
         config_file = getattr(fastapi_app.state, 'config_file', 'mcp-config.json')
         ollama_url = getattr(fastapi_app.state, 'ollama_url', 'http://localhost:11434')
+        max_tool_rounds = getattr(fastapi_app.state, 'max_tool_rounds', None)
 
-        logger.info(f"Starting with config file: {config_file}, Ollama URL: {ollama_url}")
+        logger.info(f"Starting with config file: {config_file}, Ollama URL: {ollama_url}, Max tool rounds: {max_tool_rounds if max_tool_rounds else 'unlimited'}")
 
         # Initialize manager and load servers
         mcp_manager = MCPManager(ollama_url=ollama_url)
+        mcp_manager.max_tool_rounds = max_tool_rounds
         await mcp_manager.load_servers(config_file)
 
         # Initialize services
diff --git a/src/ollama_mcp_bridge/main.py b/src/ollama_mcp_bridge/main.py
@@ -1,9 +1,10 @@
 """Simple CLI entry point for MCP Proxy"""
-import asyncio
 import os
+import asyncio
 import typer
 import uvicorn
 from loguru import logger
+from typing import Optional
 
 from .api import app
 from .utils import check_ollama_health, check_for_updates, validate_cli_inputs
@@ -14,6 +15,7 @@ def cli_app(
     host: str = typer.Option("0.0.0.0", "--host", help="Host to bind to"),
     port: int = typer.Option(8000, "--port", help="Port to bind to"),
     ollama_url: str = typer.Option(os.getenv("OLLAMA_URL", "http://localhost:11434"), "--ollama-url", help="Ollama server URL"),
+    max_tool_rounds: Optional[int] = typer.Option(os.getenv("MAX_TOOL_ROUNDS", None), "--max-tool-rounds", help="Maximum tool execution rounds (default: unlimited)"),
     reload: bool = typer.Option(False, "--reload", help="Enable auto-reload"),
     version: bool = typer.Option(False, "--version", help="Show version information, check for updates and exit"),
 ):
@@ -23,10 +25,12 @@ def cli_app(
         # Check for updates and print if available
         asyncio.run(check_for_updates(__version__, print_message=True))
         raise typer.Exit(0)
-    validate_cli_inputs(config, host, port, ollama_url)
+    validate_cli_inputs(config, host, port, ollama_url, max_tool_rounds)
+
     # Store config in app state so lifespan can access it
     app.state.config_file = config
     app.state.ollama_url = ollama_url
+    app.state.max_tool_rounds = max_tool_rounds
 
     logger.info(f"Starting MCP proxy server on {host}:{port}")
     logger.info(f"Using Ollama server: {ollama_url}")
diff --git a/src/ollama_mcp_bridge/proxy_service.py b/src/ollama_mcp_bridge/proxy_service.py
@@ -58,12 +58,36 @@ async def proxy_chat_with_tools(self, payload: Dict[str, Any], stream: bool = Fa
             logger.error(f"Chat proxy failed: {e}")
             raise
 
+    async def _make_final_llm_call(self, endpoint: str, payload: Dict[str, Any], messages: list) -> Dict[str, Any]:
+        """Make a final LLM call without tools to get final answer after tool execution"""
+        final_payload = dict(payload)
+        final_payload["messages"] = messages
+        final_payload["tools"] = None  # Don't allow more tool calls
+        resp = await self.http_client.post(f"{self.mcp_manager.ollama_url}{endpoint}", json=final_payload)
+        resp.raise_for_status()
+        return resp.json()
+
+    async def _stream_final_llm_call(self, stream_ollama, payload: Dict[str, Any], messages: list) -> AsyncGenerator[bytes, None]:
+        """Stream a final LLM call without tools to get final answer after tool execution"""
+        final_payload = dict(payload)
+        final_payload["messages"] = messages
+        final_payload["tools"] = None  # Don't allow more tool calls
+
+        ndjson_iter = iter_ndjson_chunks(stream_ollama(final_payload))
+        async for json_obj in ndjson_iter:
+            buffer_chunk = json.dumps(json_obj).encode() + b"\n"
+            yield buffer_chunk
+
     async def _proxy_with_tools_non_streaming(self, endpoint: str, payload: Dict[str, Any]) -> Dict[str, Any]:
         """Handle non-streaming chat requests with tools"""
         payload = dict(payload)
         payload["tools"] = self.mcp_manager.all_tools if self.mcp_manager.all_tools else None
         messages = payload.get("messages") or []
 
+        # Get max tool rounds from app state (None means unlimited)
+        max_rounds = getattr(self.mcp_manager, 'max_tool_rounds', None)
+        current_round = 0
+
         # Loop to handle potentially multiple rounds of tool calls
         while True:
             # Call Ollama
@@ -85,6 +109,13 @@ async def _proxy_with_tools_non_streaming(self, endpoint: str, payload: Dict[str
 
             # Execute tool calls and add results to messages
             messages = await self._handle_tool_calls(messages, tool_calls)
+
+            # Check if we've reached the maximum number of rounds
+            current_round += 1
+            if max_rounds is not None and current_round >= max_rounds:
+                logger.warning(f"Reached maximum tool execution rounds ({max_rounds}), making final LLM call with tool results")
+                return await self._make_final_llm_call(endpoint, payload, messages)
+
             # Continue loop to get next response
 
     async def _proxy_with_tools_streaming(self, endpoint: str, payload: Dict[str, Any]) -> AsyncGenerator[bytes, None]:
@@ -100,6 +131,10 @@ async def stream_ollama(payload_to_send):
                     async for chunk in resp.aiter_bytes():
                         yield chunk
 
+        # Get max tool rounds from app state (None means unlimited)
+        max_rounds = getattr(self.mcp_manager, 'max_tool_rounds', None)
+        current_round = 0
+
         # Loop to handle potentially multiple rounds of tool calls
         while True:
             current_payload = dict(payload)
@@ -128,14 +163,23 @@ async def stream_ollama(payload_to_send):
                 # No tool calls required, streaming complete
                 break
 
-            # Tool calls detected; execute them and loop for the follow-up response
+            # Tool calls detected; execute them
             messages.append({
                 "role": "assistant",
                 "content": response_text,
                 "tool_calls": tool_calls
             })
             messages = await self._handle_tool_calls(messages, tool_calls)
 
+            # Check if we've reached the maximum number of rounds
+            current_round += 1
+            if max_rounds is not None and current_round >= max_rounds:
+                logger.warning(f"Reached maximum tool execution rounds ({max_rounds}), making final LLM call with tool results")
+                # Stream the final LLM response with tool results (no more tools allowed)
+                async for chunk in self._stream_final_llm_call(stream_ollama, payload, messages):
+                    yield chunk
+                break
+
     def _extract_tool_calls(self, result: Dict[str, Any]) -> list:
         """Extract tool calls from response"""
         tool_calls = result.get("message", {}).get("tool_calls", [])
diff --git a/src/ollama_mcp_bridge/utils.py b/src/ollama_mcp_bridge/utils.py
@@ -80,8 +80,8 @@ async def iter_ndjson_chunks(chunk_iterator):
         except json.JSONDecodeError as e:
             logger.debug(f"Error parsing trailing NDJSON: {e}")
 
-def validate_cli_inputs(config: str, host: str, port: int, ollama_url: str):
-    """Validate CLI inputs for config file, host, port, and ollama_url."""
+def validate_cli_inputs(config: str, host: str, port: int, ollama_url: str, max_tool_rounds: int = None):
+    """Validate CLI inputs for config file, host, port, ollama_url, and max_tool_rounds."""
     # Validate config file exists
     if not os.path.isfile(config):
         raise BadParameter(f"Config file not found: {config}")
@@ -99,6 +99,10 @@ def validate_cli_inputs(config: str, host: str, port: int, ollama_url: str):
     if not url_pattern.match(ollama_url):
         raise BadParameter(f"Invalid Ollama URL: {ollama_url}")
 
+    # Validate max_tool_rounds
+    if max_tool_rounds is not None and max_tool_rounds < 1:
+        raise BadParameter(f"max_tool_rounds must be at least 1, got {max_tool_rounds}")
+
 async def check_for_updates(current_version: str, print_message: bool = False) -> str:
     """
     Check if a newer version of ollama-mcp-bridge is available on PyPI.
diff --git a/tests/test_unit.py b/tests/test_unit.py
@@ -131,6 +131,32 @@ def test_example_config_structure():
             assert "args" in server_config
             assert isinstance(server_config["args"], list)
 
+def test_validate_cli_max_tool_rounds():
+    """Test that validate_cli_inputs enforces max_tool_rounds validation."""
+    try:
+        from ollama_mcp_bridge.utils import validate_cli_inputs
+    except ImportError:
+        sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
+        from ollama_mcp_bridge.utils import validate_cli_inputs
+
+    # Valid case: None
+    validate_cli_inputs("mcp-config.json", "0.0.0.0", 8000, "http://localhost:11434", None)
+
+    # Invalid max_tool_rounds (zero)
+    from typer import BadParameter
+    try:
+        validate_cli_inputs("mcp-config.json", "0.0.0.0", 8000, "http://localhost:11434", 0)
+        assert False, "Expected BadParameter for max_tool_rounds=0"
+    except BadParameter:
+        pass
+
+    # Invalid max_tool_rounds (negative)
+    try:
+        validate_cli_inputs("mcp-config.json", "0.0.0.0", 8000, "http://localhost:11434", -1)
+        assert False, "Expected BadParameter for max_tool_rounds=-1"
+    except BadParameter:
+        pass
+
 def test_script_installed():
     try:
         result = subprocess.run(["ollama-mcp-bridge", "--help"], check=False)