Skip to content

Commit bb6ceb4

Browse files
committed
add example regex based classification mcp server
Signed-off-by: Huamin Chen <[email protected]>
1 parent 75cd389 commit bb6ceb4

File tree

9 files changed

+1052
-160
lines changed

9 files changed

+1052
-160
lines changed

config/config-mcp-classifier-example.yaml

Lines changed: 89 additions & 129 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,20 @@
1-
# Example Configuration for MCP-Based Category Classifier
1+
# Example Configuration for MCP-Based Category Classifier (HTTP Transport)
22
#
33
# This configuration demonstrates how to use an external MCP (Model Context Protocol)
4-
# service for category classification instead of the built-in Candle/ModernBERT models.
4+
# service via HTTP for category classification instead of the built-in Candle/ModernBERT models.
55
#
66
# Use cases:
7-
# - Offload classification to a remote service
7+
# - Offload classification to a remote HTTP service
88
# - Use custom classification models not supported in-tree
99
# - Scale classification independently from the router
10-
# - Integrate with existing ML infrastructure
10+
# - Integrate with existing ML infrastructure via REST API
11+
#
12+
# Note: This example uses HTTP transport. The MCP server should expose an HTTP endpoint
13+
# that implements the MCP protocol (e.g., http://localhost:8080/mcp)
1114

1215
# BERT model for semantic caching and tool selection
1316
bert_model:
14-
model_id: "models/all-MiniLM-L6-v2"
17+
model_id: "sentence-transformers/all-MiniLM-L6-v2"
1518
threshold: 0.85
1619
use_cpu: true
1720

@@ -20,156 +23,85 @@ classifier:
2023
# Disable in-tree category classifier (leave model_id empty)
2124
category_model:
2225
model_id: "" # Empty = disabled
23-
threshold: 0.6
24-
use_cpu: true
25-
use_modernbert: false
26-
category_mapping_path: ""
2726

28-
# Enable MCP-based category classifier
27+
# Enable MCP-based category classifier (HTTP transport only)
2928
mcp_category_model:
3029
enabled: true # Enable MCP classifier
31-
transport_type: "stdio" # "stdio" or "http"
32-
33-
# For stdio transport: run a local Python MCP server
34-
command: "python"
35-
args: ["-m", "mcp_category_classifier"]
36-
env:
37-
PYTHONPATH: "/opt/ml/models"
38-
MODEL_PATH: "/opt/ml/models/category_classifier"
39-
LOG_LEVEL: "INFO"
30+
transport_type: "http" # HTTP transport
31+
url: "http://localhost:8090/mcp" # MCP server endpoint
4032

41-
# For http transport: use this instead
42-
# transport_type: "http"
43-
# url: "http://localhost:8080/mcp"
44-
45-
tool_name: "classify_text" # MCP tool name to call
33+
tool_name: "classify_text" # MCP tool name to call
4634
threshold: 0.6 # Confidence threshold
4735
timeout_seconds: 30 # Request timeout
4836

49-
# PII model configuration (unchanged)
50-
pii_model:
51-
model_id: "models/pii_classifier"
52-
threshold: 0.7
53-
use_cpu: true
54-
pii_mapping_path: "models/pii_classifier/pii_type_mapping.json"
55-
56-
# Prompt guard configuration (unchanged)
57-
prompt_guard:
58-
enabled: true
59-
model_id: "models/jailbreak_classifier"
60-
threshold: 0.8
61-
use_cpu: true
62-
use_modernbert: true
63-
jailbreak_mapping_path: "models/jailbreak_classifier/jailbreak_mapping.json"
64-
6537
# Categories for routing queries
66-
categories:
67-
- name: "math"
68-
description: "Mathematical problems, equations, calculus, algebra, statistics"
69-
model_scores:
70-
- model: "deepseek/deepseek-r1:70b"
71-
score: 0.95
72-
use_reasoning: true
73-
- model: "qwen/qwen3-235b"
74-
score: 0.90
75-
use_reasoning: true
76-
mmlu_categories:
77-
- "mathematics"
78-
- "statistics"
79-
80-
- name: "coding"
81-
description: "Programming, software development, debugging, algorithms"
82-
model_scores:
83-
- model: "deepseek/deepseek-r1-coder:33b"
84-
score: 0.95
85-
use_reasoning: true
86-
- model: "meta/llama3.1-70b"
87-
score: 0.85
88-
use_reasoning: false
89-
mmlu_categories:
90-
- "computer_science"
91-
- "engineering"
92-
93-
- name: "general"
94-
description: "General knowledge, conversation, misc queries"
95-
model_scores:
96-
- model: "meta/llama3.1-70b"
97-
score: 0.90
98-
use_reasoning: false
99-
- model: "qwen/qwen3-235b"
100-
score: 0.85
101-
use_reasoning: false
38+
#
39+
# Categories are automatically loaded from MCP server via 'list_categories' tool.
40+
# The MCP server controls BOTH classification AND routing decisions.
41+
#
42+
# How it works:
43+
# 1. Router connects to MCP server at startup
44+
# 2. Calls 'list_categories' tool: MCP returns {"categories": ["business", "law", ...]}
45+
# 3. For each request, calls 'classify_text' tool which returns:
46+
# {
47+
# "class": 3,
48+
# "confidence": 0.85,
49+
# "model": "openai/gpt-oss-20b", # MCP decides which model to use
50+
# "use_reasoning": true # MCP decides whether to use reasoning
51+
# }
52+
# 4. Router uses the model and reasoning settings from MCP response
53+
#
54+
# BENEFITS:
55+
# - MCP server makes intelligent routing decisions per query
56+
# - No hardcoded routing rules needed in config
57+
# - MCP can adapt routing based on query complexity, content, etc.
58+
# - Centralized routing logic in MCP server
59+
#
60+
# FALLBACK:
61+
# - If MCP doesn't return model/use_reasoning, uses default_model below
62+
# - Can also add category-specific overrides here if needed
63+
#
64+
categories: []
10265

10366
# Default model to use when category can't be determined
104-
default_model: "meta/llama3.1-70b"
67+
default_model: openai/gpt-oss-20b
10568

10669
# vLLM endpoints configuration
10770
vllm_endpoints:
108-
- name: "deepseek-endpoint"
109-
address: "10.0.1.10"
71+
- name: endpoint1
72+
address: 127.0.0.1
11073
port: 8000
11174
models:
112-
- "deepseek/deepseek-r1:70b"
113-
- "deepseek/deepseek-r1-coder:33b"
114-
weight: 100
115-
116-
- name: "qwen-endpoint"
117-
address: "10.0.1.11"
118-
port: 8000
119-
models:
120-
- "qwen/qwen3-235b"
121-
weight: 100
122-
123-
- name: "llama-endpoint"
124-
address: "10.0.1.12"
125-
port: 8000
126-
models:
127-
- "meta/llama3.1-70b"
128-
weight: 100
129-
130-
# Semantic cache configuration (optional)
131-
semantic_cache:
132-
enabled: true
133-
backend_type: "in-memory"
134-
similarity_threshold: 0.90
135-
max_entries: 1000
136-
ttl_seconds: 3600
137-
eviction_policy: "lru"
75+
- openai/gpt-oss-20b
76+
weight: 1
77+
health_check_path: /health
13878

13979
# Model-specific configuration
14080
model_config:
141-
"deepseek/deepseek-r1:70b":
142-
reasoning_family: "deepseek"
143-
pii_policy:
144-
allow_by_default: false
145-
pii_types_allowed: []
146-
147-
"deepseek/deepseek-r1-coder:33b":
148-
reasoning_family: "deepseek"
149-
pii_policy:
150-
allow_by_default: false
151-
pii_types_allowed: []
152-
153-
"qwen/qwen3-235b":
154-
reasoning_family: "qwen3"
155-
pii_policy:
156-
allow_by_default: true
157-
158-
"meta/llama3.1-70b":
81+
openai/gpt-oss-20b:
82+
reasoning_family: gpt-oss
83+
preferred_endpoints:
84+
- endpoint1
15985
pii_policy:
16086
allow_by_default: true
16187

16288
# Reasoning family configurations
16389
reasoning_families:
16490
deepseek:
165-
type: "chat_template_kwargs"
166-
parameter: "thinking"
91+
type: chat_template_kwargs
92+
parameter: thinking
16793
qwen3:
168-
type: "reasoning_effort"
169-
parameter: "reasoning_effort"
94+
type: chat_template_kwargs
95+
parameter: enable_thinking
17096
gpt-oss:
171-
type: "chat_template_kwargs"
172-
parameter: "enable_thinking"
97+
type: reasoning_effort
98+
parameter: reasoning_effort
99+
gpt:
100+
type: reasoning_effort
101+
parameter: reasoning_effort
102+
103+
# Default reasoning effort level
104+
default_reasoning_effort: high
173105

174106
# Tools configuration (optional)
175107
tools:
@@ -182,9 +114,37 @@ tools:
182114
# API configuration
183115
api:
184116
batch_classification:
117+
max_batch_size: 100
118+
concurrency_threshold: 5
119+
max_concurrency: 8
185120
metrics:
186121
enabled: true
122+
detailed_goroutine_tracking: true
123+
high_resolution_timing: false
187124
sample_rate: 1.0
125+
duration_buckets:
126+
- 0.001
127+
- 0.005
128+
- 0.01
129+
- 0.025
130+
- 0.05
131+
- 0.1
132+
- 0.25
133+
- 0.5
134+
- 1
135+
- 2.5
136+
- 5
137+
- 10
138+
- 30
139+
size_buckets:
140+
- 1
141+
- 2
142+
- 5
143+
- 10
144+
- 20
145+
- 50
146+
- 100
147+
- 200
188148

189149
# Observability configuration
190150
observability:
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
# MCP Classification Server
2+
3+
Example MCP server that provides text classification with intelligent routing for the semantic router.
4+
5+
## Features
6+
7+
- **Dynamic Categories**: Loaded from MCP server at runtime via `list_categories`
8+
- **Intelligent Routing**: Returns `model` and `use_reasoning` in classification response
9+
- **Regex-Based**: Simple pattern matching (replace with ML models for production)
10+
- **Dual Transport**: Supports both HTTP and stdio
11+
12+
## Categories
13+
14+
| Index | Category | Example Keywords |
15+
|-------|----------|------------------|
16+
| 0 | math | calculate, equation, formula, integral |
17+
| 1 | science | physics, chemistry, biology, atom, DNA |
18+
| 2 | technology | computer, programming, AI, cloud |
19+
| 3 | history | ancient, war, empire, civilization |
20+
| 4 | general | Catch-all for other queries |
21+
22+
## Quick Start
23+
24+
```bash
25+
# Install dependencies
26+
pip install -r requirements.txt
27+
28+
# HTTP mode (for semantic router)
29+
python server.py --http --port 8090
30+
31+
# Stdio mode (for MCP clients)
32+
python server.py
33+
```
34+
35+
**Test the server:**
36+
37+
```bash
38+
curl http://localhost:8090/health
39+
# → {"status": "ok", "categories": ["math", "science", "technology", "history", "general"]}
40+
```
41+
42+
## Configuration
43+
44+
**Router config (`config-mcp-classifier-example.yaml`):**
45+
46+
```yaml
47+
classifier:
48+
category_model:
49+
model_id: "" # Empty = use MCP
50+
51+
mcp_category_model:
52+
enabled: true
53+
transport_type: "http"
54+
url: "http://localhost:8090/mcp"
55+
tool_name: "classify_text"
56+
threshold: 0.6
57+
timeout_seconds: 30
58+
59+
categories: [] # Loaded dynamically from MCP
60+
default_model: openai/gpt-oss-20b
61+
```
62+
63+
## How It Works
64+
65+
**Intelligent Routing Rules:**
66+
67+
- Long query (>20 words) + complex words (`why`, `how`, `explain`) → `use_reasoning: true`
68+
- Math + short query → `use_reasoning: false`
69+
- High confidence (>0.9) → `use_reasoning: false`
70+
- Low confidence (<0.6) → `use_reasoning: true`
71+
- Default → `use_reasoning: true`
72+
73+
**Response Format:**
74+
75+
```json
76+
{
77+
"class": 1,
78+
"confidence": 0.85,
79+
"model": "openai/gpt-oss-20b",
80+
"use_reasoning": true
81+
}
82+
```
83+
84+
## Customization
85+
86+
Edit `CATEGORIES` to add categories:
87+
88+
```python
89+
CATEGORIES = {
90+
"your_category": {
91+
"patterns": [r"\b(keyword1|keyword2)\b"],
92+
"description": "Your description"
93+
}
94+
}
95+
```
96+
97+
Edit `decide_routing()` for custom routing logic:
98+
99+
```python
100+
def decide_routing(text, category, confidence):
101+
if category == "math":
102+
return "deepseek/deepseek-math", False
103+
return "openai/gpt-oss-20b", True
104+
```
105+
106+
## License
107+
108+
MIT
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
mcp>=1.0.0
2+
aiohttp>=3.9.0

0 commit comments

Comments
 (0)