vllm-project
diff --git a/‎config/config-mcp-classifier-example.yaml‎
Lines changed: 168 additions & 0 deletions b/‎config/config-mcp-classifier-example.yaml‎
Lines changed: 168 additions & 0 deletions
diff --git a/‎examples/mcp-classifier-server/README.md‎
Lines changed: 134 additions & 0 deletions b/‎examples/mcp-classifier-server/README.md‎
Lines changed: 134 additions & 0 deletions
diff --git a/‎examples/mcp-classifier-server/requirements.txt‎
Lines changed: 2 additions & 0 deletions b/‎examples/mcp-classifier-server/requirements.txt‎
Lines changed: 2 additions & 0 deletions
@@ -0,0 +1,168 @@
+# Example Configuration for MCP-Based Category Classifier (HTTP Transport)
+#
+# This configuration demonstrates how to use an external MCP (Model Context Protocol)
+# service via HTTP for category classification instead of the built-in Candle/ModernBERT models.
+#
+# Use cases:
+# - Offload classification to a remote HTTP service
+# - Use custom classification models not supported in-tree
+# - Scale classification independently from the router
+# - Integrate with existing ML infrastructure via REST API
+#
+# Note: This example uses HTTP transport. The MCP server should expose an HTTP endpoint
+# that implements the MCP protocol (e.g., http://localhost:8080/mcp)
+
+# BERT model for semantic caching and tool selection
+bert_model:
+  model_id: "sentence-transformers/all-MiniLM-L6-v2"
+  threshold: 0.85
+  use_cpu: true
+
+# Classifier configuration
+classifier:
+  # Disable in-tree category classifier (leave model_id empty)
+  category_model:
+    model_id: ""  # Empty = disabled
+
+  # Enable MCP-based category classifier (HTTP transport only)
+  mcp_category_model:
+    enabled: true                    # Enable MCP classifier
+    transport_type: "http"           # HTTP transport
+    url: "http://localhost:8090/mcp" # MCP server endpoint
+    
+    # tool_name: Optional - auto-discovers classification tool if not specified
+    # Will search for tools like: classify_text, classify, categorize, etc.
+    # Uncomment to explicitly specify:
+    # tool_name: "classify_text"
+    
+    threshold: 0.6                   # Confidence threshold
+    timeout_seconds: 30              # Request timeout
+
+# Categories for routing queries
+# 
+# Categories are automatically loaded from MCP server via 'list_categories' tool.
+# The MCP server controls BOTH classification AND routing decisions.
+#
+# How it works:
+#   1. Router connects to MCP server at startup
+#   2. Calls 'list_categories' tool: MCP returns {"categories": ["business", "law", ...]}
+#   3. For each request, calls 'classify_text' tool which returns:
+#      {
+#        "class": 3,
+#        "confidence": 0.85,
+#        "model": "openai/gpt-oss-20b",        # MCP decides which model to use
+#        "use_reasoning": true                  # MCP decides whether to use reasoning
+#      }
+#   4. Router uses the model and reasoning settings from MCP response
+#
+# BENEFITS:
+#   - MCP server makes intelligent routing decisions per query
+#   - No hardcoded routing rules needed in config
+#   - MCP can adapt routing based on query complexity, content, etc.
+#   - Centralized routing logic in MCP server
+#
+# FALLBACK:
+#   - If MCP doesn't return model/use_reasoning, uses default_model below
+#   - Can also add category-specific overrides here if needed
+#
+categories: []
+
+# Default model to use when category can't be determined
+default_model: openai/gpt-oss-20b
+
+# vLLM endpoints configuration
+vllm_endpoints:
+  - name: endpoint1
+    address: 127.0.0.1
+    port: 8000
+    models:
+      - openai/gpt-oss-20b
+    weight: 1
+    health_check_path: /health
+
+# Model-specific configuration
+model_config:
+  openai/gpt-oss-20b:
+    reasoning_family: gpt-oss
+    preferred_endpoints:
+      - endpoint1
+    pii_policy:
+      allow_by_default: true
+
+# Reasoning family configurations
+reasoning_families:
+  deepseek:
+    type: chat_template_kwargs
+    parameter: thinking
+  qwen3:
+    type: chat_template_kwargs
+    parameter: enable_thinking
+  gpt-oss:
+    type: reasoning_effort
+    parameter: reasoning_effort
+  gpt:
+    type: reasoning_effort
+    parameter: reasoning_effort
+
+# Default reasoning effort level
+default_reasoning_effort: high
+
+# Tools configuration (optional)
+tools:
+  enabled: false
+  top_k: 5
+  similarity_threshold: 0.7
+  tools_db_path: "config/tools_db.json"
+  fallback_to_empty: true
+
+# API configuration
+api:
+  batch_classification:
+    max_batch_size: 100
+    concurrency_threshold: 5
+    max_concurrency: 8
+    metrics:
+      enabled: true
+      detailed_goroutine_tracking: true
+      high_resolution_timing: false
+      sample_rate: 1.0
+      duration_buckets:
+        - 0.001
+        - 0.005
+        - 0.01
+        - 0.025
+        - 0.05
+        - 0.1
+        - 0.25
+        - 0.5
+        - 1
+        - 2.5
+        - 5
+        - 10
+        - 30
+      size_buckets:
+        - 1
+        - 2
+        - 5
+        - 10
+        - 20
+        - 50
+        - 100
+        - 200
+
+# Observability configuration
+observability:
+  tracing:
+    enabled: false
+    provider: "opentelemetry"
+    exporter:
+      type: "otlp"
+      endpoint: "localhost:4317"
+      insecure: true
+    sampling:
+      type: "always_on"
+    resource:
+      service_name: "semantic-router"
+      service_version: "1.0.0"
+      deployment_environment: "production"
+
@@ -0,0 +1,134 @@
+# MCP Classification Server
+
+Example MCP server that provides text classification with intelligent routing for the semantic router.
+
+## Features
+
+- **Dynamic Categories**: Loaded from MCP server at runtime via `list_categories`
+- **Intelligent Routing**: Returns `model` and `use_reasoning` in classification response  
+- **Regex-Based**: Simple pattern matching (replace with ML models for production)
+- **Dual Transport**: Supports both HTTP and stdio
+
+## Categories
+
+| Index | Category | Example Keywords |
+|-------|----------|------------------|
+| 0 | math | calculate, equation, formula, integral |
+| 1 | science | physics, chemistry, biology, atom, DNA |
+| 2 | technology | computer, programming, AI, cloud |
+| 3 | history | ancient, war, empire, civilization |
+| 4 | general | Catch-all for other queries |
+
+## Quick Start
+
+```bash
+# Install dependencies
+pip install -r requirements.txt
+
+# HTTP mode (for semantic router)
+python server.py --http --port 8090
+
+# Stdio mode (for MCP clients)
+python server.py
+```
+
+**Test the server:**
+
+```bash
+curl http://localhost:8090/health
+# → {"status": "ok", "categories": ["math", "science", "technology", "history", "general"]}
+```
+
+## Configuration
+
+**Router config (`config-mcp-classifier-example.yaml`):**
+
+```yaml
+classifier:
+  category_model:
+    model_id: ""  # Empty = use MCP
+  
+  mcp_category_model:
+    enabled: true
+    transport_type: "http"
+    url: "http://localhost:8090/mcp"
+    # tool_name: optional - auto-discovers classification tool if not specified
+    threshold: 0.6
+    timeout_seconds: 30
+
+categories: []  # Loaded dynamically from MCP
+default_model: openai/gpt-oss-20b
+```
+
+**Tool Auto-Discovery:**
+The router automatically discovers classification tools from the MCP server by:
+
+1. Listing available tools on connection
+2. Looking for common names: `classify_text`, `classify`, `categorize`, `categorize_text`
+3. Pattern matching for tools containing "classif" in name/description
+4. Optionally specify `tool_name` to use a specific tool
+
+## Protocol API
+
+This server implements the MCP classification protocol defined in:
+
+```
+github.com/vllm-project/semantic-router/src/semantic-router/pkg/connectivity/mcp/api
+```
+
+**Required Tools:**
+
+1. **`list_categories`** - Returns `ListCategoriesResponse`:
+
+   ```json
+   {"categories": ["math", "science", "technology", ...]}
+   ```
+
+2. **`classify_text`** - Returns `ClassifyResponse`:
+
+   ```json
+   {
+     "class": 1,
+     "confidence": 0.85,
+     "model": "openai/gpt-oss-20b",
+     "use_reasoning": true
+   }
+   ```
+
+See the `api` package for full type definitions and documentation.
+
+## How It Works
+
+**Intelligent Routing Rules:**
+
+- Long query (>20 words) + complex words (`why`, `how`, `explain`) → `use_reasoning: true`
+- Math + short query → `use_reasoning: false`  
+- High confidence (>0.9) → `use_reasoning: false`
+- Low confidence (<0.6) → `use_reasoning: true`
+- Default → `use_reasoning: true`
+
+## Customization
+
+Edit `CATEGORIES` to add categories:
+
+```python
+CATEGORIES = {
+    "your_category": {
+        "patterns": [r"\b(keyword1|keyword2)\b"],
+        "description": "Your description"
+    }
+}
+```
+
+Edit `decide_routing()` for custom routing logic:
+
+```python
+def decide_routing(text, category, confidence):
+    if category == "math":
+        return "deepseek/deepseek-math", False
+    return "openai/gpt-oss-20b", True
+```
+
+## License
+
+MIT
@@ -0,0 +1,2 @@
+mcp>=1.0.0
+aiohttp>=3.9.0