Skip to content

Commit 72031a5

Browse files
committed
add example regex based classification mcp server
Signed-off-by: Huamin Chen <[email protected]>
1 parent 75cd389 commit 72031a5

File tree

8 files changed

+944
-160
lines changed

8 files changed

+944
-160
lines changed

config/config-mcp-classifier-example.yaml

Lines changed: 89 additions & 129 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,20 @@
1-
# Example Configuration for MCP-Based Category Classifier
1+
# Example Configuration for MCP-Based Category Classifier (HTTP Transport)
22
#
33
# This configuration demonstrates how to use an external MCP (Model Context Protocol)
4-
# service for category classification instead of the built-in Candle/ModernBERT models.
4+
# service via HTTP for category classification instead of the built-in Candle/ModernBERT models.
55
#
66
# Use cases:
7-
# - Offload classification to a remote service
7+
# - Offload classification to a remote HTTP service
88
# - Use custom classification models not supported in-tree
99
# - Scale classification independently from the router
10-
# - Integrate with existing ML infrastructure
10+
# - Integrate with existing ML infrastructure via REST API
11+
#
12+
# Note: This example uses HTTP transport. The MCP server should expose an HTTP endpoint
13+
# that implements the MCP protocol (e.g., http://localhost:8080/mcp)
1114

1215
# BERT model for semantic caching and tool selection
1316
bert_model:
14-
model_id: "models/all-MiniLM-L6-v2"
17+
model_id: "sentence-transformers/all-MiniLM-L6-v2"
1518
threshold: 0.85
1619
use_cpu: true
1720

@@ -20,156 +23,85 @@ classifier:
2023
# Disable in-tree category classifier (leave model_id empty)
2124
category_model:
2225
model_id: "" # Empty = disabled
23-
threshold: 0.6
24-
use_cpu: true
25-
use_modernbert: false
26-
category_mapping_path: ""
2726

28-
# Enable MCP-based category classifier
27+
# Enable MCP-based category classifier (HTTP transport only)
2928
mcp_category_model:
3029
enabled: true # Enable MCP classifier
31-
transport_type: "stdio" # "stdio" or "http"
32-
33-
# For stdio transport: run a local Python MCP server
34-
command: "python"
35-
args: ["-m", "mcp_category_classifier"]
36-
env:
37-
PYTHONPATH: "/opt/ml/models"
38-
MODEL_PATH: "/opt/ml/models/category_classifier"
39-
LOG_LEVEL: "INFO"
30+
transport_type: "http" # HTTP transport
31+
url: "http://localhost:8090/mcp" # MCP server endpoint
4032

41-
# For http transport: use this instead
42-
# transport_type: "http"
43-
# url: "http://localhost:8080/mcp"
44-
45-
tool_name: "classify_text" # MCP tool name to call
33+
tool_name: "classify_text" # MCP tool name to call
4634
threshold: 0.6 # Confidence threshold
4735
timeout_seconds: 30 # Request timeout
4836

49-
# PII model configuration (unchanged)
50-
pii_model:
51-
model_id: "models/pii_classifier"
52-
threshold: 0.7
53-
use_cpu: true
54-
pii_mapping_path: "models/pii_classifier/pii_type_mapping.json"
55-
56-
# Prompt guard configuration (unchanged)
57-
prompt_guard:
58-
enabled: true
59-
model_id: "models/jailbreak_classifier"
60-
threshold: 0.8
61-
use_cpu: true
62-
use_modernbert: true
63-
jailbreak_mapping_path: "models/jailbreak_classifier/jailbreak_mapping.json"
64-
6537
# Categories for routing queries
66-
categories:
67-
- name: "math"
68-
description: "Mathematical problems, equations, calculus, algebra, statistics"
69-
model_scores:
70-
- model: "deepseek/deepseek-r1:70b"
71-
score: 0.95
72-
use_reasoning: true
73-
- model: "qwen/qwen3-235b"
74-
score: 0.90
75-
use_reasoning: true
76-
mmlu_categories:
77-
- "mathematics"
78-
- "statistics"
79-
80-
- name: "coding"
81-
description: "Programming, software development, debugging, algorithms"
82-
model_scores:
83-
- model: "deepseek/deepseek-r1-coder:33b"
84-
score: 0.95
85-
use_reasoning: true
86-
- model: "meta/llama3.1-70b"
87-
score: 0.85
88-
use_reasoning: false
89-
mmlu_categories:
90-
- "computer_science"
91-
- "engineering"
92-
93-
- name: "general"
94-
description: "General knowledge, conversation, misc queries"
95-
model_scores:
96-
- model: "meta/llama3.1-70b"
97-
score: 0.90
98-
use_reasoning: false
99-
- model: "qwen/qwen3-235b"
100-
score: 0.85
101-
use_reasoning: false
38+
#
39+
# Categories are automatically loaded from MCP server via 'list_categories' tool.
40+
# The MCP server controls BOTH classification AND routing decisions.
41+
#
42+
# How it works:
43+
# 1. Router connects to MCP server at startup
44+
# 2. Calls 'list_categories' tool: MCP returns {"categories": ["business", "law", ...]}
45+
# 3. For each request, calls 'classify_text' tool which returns:
46+
# {
47+
# "class": 3,
48+
# "confidence": 0.85,
49+
# "model": "openai/gpt-oss-20b", # MCP decides which model to use
50+
# "use_reasoning": true # MCP decides whether to use reasoning
51+
# }
52+
# 4. Router uses the model and reasoning settings from MCP response
53+
#
54+
# BENEFITS:
55+
# - MCP server makes intelligent routing decisions per query
56+
# - No hardcoded routing rules needed in config
57+
# - MCP can adapt routing based on query complexity, content, etc.
58+
# - Centralized routing logic in MCP server
59+
#
60+
# FALLBACK:
61+
# - If MCP doesn't return model/use_reasoning, uses default_model below
62+
# - Can also add category-specific overrides here if needed
63+
#
64+
categories: []
10265

10366
# Default model to use when category can't be determined
104-
default_model: "meta/llama3.1-70b"
67+
default_model: openai/gpt-oss-20b
10568

10669
# vLLM endpoints configuration
10770
vllm_endpoints:
108-
- name: "deepseek-endpoint"
109-
address: "10.0.1.10"
71+
- name: endpoint1
72+
address: 127.0.0.1
11073
port: 8000
11174
models:
112-
- "deepseek/deepseek-r1:70b"
113-
- "deepseek/deepseek-r1-coder:33b"
114-
weight: 100
115-
116-
- name: "qwen-endpoint"
117-
address: "10.0.1.11"
118-
port: 8000
119-
models:
120-
- "qwen/qwen3-235b"
121-
weight: 100
122-
123-
- name: "llama-endpoint"
124-
address: "10.0.1.12"
125-
port: 8000
126-
models:
127-
- "meta/llama3.1-70b"
128-
weight: 100
129-
130-
# Semantic cache configuration (optional)
131-
semantic_cache:
132-
enabled: true
133-
backend_type: "in-memory"
134-
similarity_threshold: 0.90
135-
max_entries: 1000
136-
ttl_seconds: 3600
137-
eviction_policy: "lru"
75+
- openai/gpt-oss-20b
76+
weight: 1
77+
health_check_path: /health
13878

13979
# Model-specific configuration
14080
model_config:
141-
"deepseek/deepseek-r1:70b":
142-
reasoning_family: "deepseek"
143-
pii_policy:
144-
allow_by_default: false
145-
pii_types_allowed: []
146-
147-
"deepseek/deepseek-r1-coder:33b":
148-
reasoning_family: "deepseek"
149-
pii_policy:
150-
allow_by_default: false
151-
pii_types_allowed: []
152-
153-
"qwen/qwen3-235b":
154-
reasoning_family: "qwen3"
155-
pii_policy:
156-
allow_by_default: true
157-
158-
"meta/llama3.1-70b":
81+
openai/gpt-oss-20b:
82+
reasoning_family: gpt-oss
83+
preferred_endpoints:
84+
- endpoint1
15985
pii_policy:
16086
allow_by_default: true
16187

16288
# Reasoning family configurations
16389
reasoning_families:
16490
deepseek:
165-
type: "chat_template_kwargs"
166-
parameter: "thinking"
91+
type: chat_template_kwargs
92+
parameter: thinking
16793
qwen3:
168-
type: "reasoning_effort"
169-
parameter: "reasoning_effort"
94+
type: chat_template_kwargs
95+
parameter: enable_thinking
17096
gpt-oss:
171-
type: "chat_template_kwargs"
172-
parameter: "enable_thinking"
97+
type: reasoning_effort
98+
parameter: reasoning_effort
99+
gpt:
100+
type: reasoning_effort
101+
parameter: reasoning_effort
102+
103+
# Default reasoning effort level
104+
default_reasoning_effort: high
173105

174106
# Tools configuration (optional)
175107
tools:
@@ -182,9 +114,37 @@ tools:
182114
# API configuration
183115
api:
184116
batch_classification:
117+
max_batch_size: 100
118+
concurrency_threshold: 5
119+
max_concurrency: 8
185120
metrics:
186121
enabled: true
122+
detailed_goroutine_tracking: true
123+
high_resolution_timing: false
187124
sample_rate: 1.0
125+
duration_buckets:
126+
- 0.001
127+
- 0.005
128+
- 0.01
129+
- 0.025
130+
- 0.05
131+
- 0.1
132+
- 0.25
133+
- 0.5
134+
- 1
135+
- 2.5
136+
- 5
137+
- 10
138+
- 30
139+
size_buckets:
140+
- 1
141+
- 2
142+
- 5
143+
- 10
144+
- 20
145+
- 50
146+
- 100
147+
- 200
188148

189149
# Observability configuration
190150
observability:
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
mcp>=1.0.0
2+
aiohttp>=3.9.0

0 commit comments

Comments
 (0)