Skip to content

Commit 51c60a3

Browse files
committed
feat: implement decision-based routing with plugin architecture
Signed-off-by: bitliu <[email protected]>
1 parent 6a4ebf4 commit 51c60a3

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+3891
-4775
lines changed

config/intelligent-routing/in-tree/embedding.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ categories:
136136
score: 0.9
137137
use_reasoning: true
138138
jailbreak_enabled: true
139-
pii_detection_enabled: true
139+
pii_enabled: true
140140

141141
- name: product_inquiry
142142
system_prompt: "You are a product specialist. Provide accurate information about products, features, pricing, and availability. Be helpful and informative."
@@ -145,7 +145,7 @@ categories:
145145
score: 0.85
146146
use_reasoning: false
147147
jailbreak_enabled: true
148-
pii_detection_enabled: false
148+
pii_enabled: false
149149

150150
- name: account_management
151151
system_prompt: "You are an account management assistant. Help users with account-related tasks such as password resets, profile updates, and subscription management. Prioritize security and privacy."
@@ -154,7 +154,7 @@ categories:
154154
score: 0.88
155155
use_reasoning: false
156156
jailbreak_enabled: true
157-
pii_detection_enabled: true
157+
pii_enabled: true
158158

159159
- name: general_inquiry
160160
system_prompt: "You are a helpful general assistant. Answer questions clearly and concisely. If you need more information, ask clarifying questions."
@@ -163,7 +163,7 @@ categories:
163163
score: 0.75
164164
use_reasoning: false
165165
jailbreak_enabled: true
166-
pii_detection_enabled: false
166+
pii_enabled: false
167167

168168
# Embedding Models Configuration
169169
# These models provide intelligent embedding generation with automatic routing:
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
{{- if .Values.crds.install }}
2+
---
3+
apiVersion: apiextensions.k8s.io/v1
4+
kind: CustomResourceDefinition
5+
metadata:
6+
name: intelligentpools.vllm.ai
7+
annotations:
8+
"helm.sh/resource-policy": keep
9+
spec:
10+
group: vllm.ai
11+
names:
12+
kind: IntelligentPool
13+
listKind: IntelligentPoolList
14+
plural: intelligentpools
15+
singular: intelligentpool
16+
shortNames:
17+
- ipool
18+
scope: Namespaced
19+
versions:
20+
- name: v1alpha1
21+
served: true
22+
storage: true
23+
schema:
24+
openAPIV3Schema:
25+
type: object
26+
properties:
27+
spec:
28+
type: object
29+
required:
30+
- defaultModel
31+
- models
32+
properties:
33+
defaultModel:
34+
type: string
35+
models:
36+
type: array
37+
items:
38+
type: object
39+
required:
40+
- name
41+
properties:
42+
name:
43+
type: string
44+
reasoningFamily:
45+
type: string
46+
piiPolicy:
47+
type: object
48+
properties:
49+
allowByDefault:
50+
type: boolean
51+
pricing:
52+
type: object
53+
properties:
54+
inputTokenPrice:
55+
type: number
56+
format: double
57+
outputTokenPrice:
58+
type: number
59+
format: double
60+
loras:
61+
type: array
62+
items:
63+
type: object
64+
required:
65+
- name
66+
properties:
67+
name:
68+
type: string
69+
description:
70+
type: string
71+
status:
72+
type: object
73+
properties:
74+
conditions:
75+
type: array
76+
items:
77+
type: object
78+
required:
79+
- type
80+
- status
81+
properties:
82+
type:
83+
type: string
84+
status:
85+
type: string
86+
reason:
87+
type: string
88+
message:
89+
type: string
90+
lastTransitionTime:
91+
type: string
92+
format: date-time
93+
observedGeneration:
94+
type: integer
95+
format: int64
96+
observedGeneration:
97+
type: integer
98+
format: int64
99+
modelCount:
100+
type: integer
101+
format: int32
102+
subresources:
103+
status: {}
104+
additionalPrinterColumns:
105+
- name: Default Model
106+
type: string
107+
jsonPath: .spec.defaultModel
108+
- name: Models
109+
type: integer
110+
jsonPath: .status.modelCount
111+
- name: Status
112+
type: string
113+
jsonPath: .status.conditions[?(@.type=="Ready")].status
114+
- name: Age
115+
type: date
116+
jsonPath: .metadata.creationTimestamp
117+
---
118+
apiVersion: apiextensions.k8s.io/v1
119+
kind: CustomResourceDefinition
120+
metadata:
121+
name: intelligentroutes.vllm.ai
122+
annotations:
123+
"helm.sh/resource-policy": keep
124+
spec:
125+
group: vllm.ai
126+
names:
127+
kind: IntelligentRoute
128+
listKind: IntelligentRouteList
129+
plural: intelligentroutes
130+
singular: intelligentroute
131+
shortNames:
132+
- iroute
133+
scope: Namespaced
134+
versions:
135+
- name: v1alpha1
136+
served: true
137+
storage: true
138+
schema:
139+
openAPIV3Schema:
140+
type: object
141+
# Note: Full schema is too large for this template
142+
# See deploy/kubernetes/crds/intelligentroute-crd.yaml for complete definition
143+
x-kubernetes-preserve-unknown-fields: true
144+
subresources:
145+
status: {}
146+
{{- end }}
147+

deploy/helm/semantic-router/values.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@ global:
77
# -- Namespace for all resources (if not specified, uses Release.Namespace)
88
namespace: ""
99

10+
# CRD configuration
11+
crds:
12+
# -- Install CRDs (IntelligentPool and IntelligentRoute)
13+
install: true
14+
1015
# -- Number of replicas for the deployment
1116
replicaCount: 1
1217

deploy/kubernetes/ai-gateway/semantic-router/config.yaml

Lines changed: 75 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ model_config:
2020
- name: "general-expert"
2121
description: "General-purpose adapter for diverse topics"
2222

23+
default_model: general-expert
24+
2325
# Categories with LoRA routing
2426
# Each category uses the base-model model with a specific LoRA adapter
2527
categories:
@@ -129,14 +131,85 @@ categories:
129131
lora_name: science-expert
130132
score: 0.7
131133
use_reasoning: false
132-
- name: thinking
134+
- name: urgent request
133135
system_prompt: "You are a thinking expert, should think multiple steps before answering. Please answer the question step by step."
134136
model_scores:
135137
- model: general-expert
136138
score: 0.7
137139
use_reasoning: true
140+
# Embedding-based categories
141+
- name: technical_support
142+
system_prompt: "You are a technical support specialist. Provide detailed, step-by-step guidance for technical issues. Use clear explanations and include relevant troubleshooting steps."
143+
model_scores:
144+
- model: general-expert
145+
score: 0.9
146+
use_reasoning: true
147+
jailbreak_enabled: true
148+
pii_enabled: true
149+
- name: business
150+
system_prompt: "You are a product specialist. Provide accurate information about products, features, pricing, and availability. Be helpful and informative."
151+
model_scores:
152+
- model: general-expert
153+
score: 0.85
154+
use_reasoning: false
155+
jailbreak_enabled: true
156+
pii_enabled: true
138157

139-
default_model: general-expert
158+
keyword_rules:
159+
# Keyword Rule 1: Emergency/Urgent Requests
160+
# Use case: Fast routing for time-sensitive queries that need immediate attention
161+
# Examples: "URGENT: server down", "EMERGENCY: data loss", "CRITICAL: security breach"
162+
- category: "urgent request"
163+
operator: "OR"
164+
keywords: ["urgent", "emergency", "critical", "asap", "immediately", "help!", "sos"]
165+
case_sensitive: false
166+
167+
# Keyword Rule 2: Programming Language Detection
168+
# Use case: Route code-related queries to appropriate handlers based on language
169+
# Examples: "python error", "java exception", "golang panic", "rust compiler error"
170+
- category: "computer science"
171+
operator: "OR"
172+
keywords: ["python", "java", "golang", "rust", "javascript", "typescript", "c++", "ruby", "php"]
173+
case_sensitive: false
174+
175+
# Embedding-based classification rules
176+
# These rules use semantic similarity between query text and keywords
177+
embedding_rules:
178+
# Embedding Rule 1: Customer Complaint/Feedback Detection
179+
# Use case: Identify negative sentiment and complaints regardless of exact wording
180+
# Examples: "I'm disappointed with the service", "This product doesn't work as expected",
181+
# "Not satisfied with my purchase", "The quality is poor"
182+
- category: "technical_support"
183+
threshold: 0.72
184+
keywords:
185+
- "I'm not satisfied with the product quality"
186+
- "The service didn't meet my expectations"
187+
- "I'm experiencing issues and need help"
188+
- "Something is broken and not working properly"
189+
- "I'm disappointed with the performance"
190+
- "This is not what I expected when I ordered"
191+
aggregation_method: "max" # Use max to catch any strong complaint signal
192+
model: "auto"
193+
dimension: 768
194+
quality_priority: 0.8 # High quality needed for sentiment detection
195+
latency_priority: 0.2
196+
197+
# Embedding Rule 2: Account/Billing Related Queries
198+
# Use case: Route financial and account queries even with varied phrasing
199+
# Examples: "How much do I owe?", "Check my balance", "Update payment method",
200+
# "Why was I charged twice?", "Cancel my subscription"
201+
- category: "business"
202+
threshold: 0.68
203+
keywords:
204+
- "I need to check my account balance and payment history"
205+
- "How can I update my billing information and payment method"
206+
- "I was charged incorrectly and need a refund"
207+
- "I want to cancel my subscription and stop recurring payments"
208+
- "What are the fees and charges on my account"
209+
- "I need to review my invoice and transaction details"
210+
aggregation_method: "avg" # Use avg for balanced matching across billing topics
211+
model: "qwen3" # Use high-quality model for financial queries
212+
dimension: 1024
140213

141214
bert_model:
142215
model_id: models/all-MiniLM-L12-v2
@@ -195,12 +268,6 @@ classifier:
195268
use_cpu: true
196269
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
197270

198-
keyword_rules:
199-
- category: "thinking"
200-
operator: "OR"
201-
keywords: ["urgent", "immediate", "asap", "think", "careful"]
202-
case_sensitive: false
203-
204271

205272
# Router Configuration for Dual-Path Selection
206273
router:
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
apiVersion: vllm.ai/v1alpha1
2+
kind: IntelligentPool
3+
metadata:
4+
name: default-pool
5+
namespace: default
6+
spec:
7+
defaultModel: "general-expert"
8+
models:
9+
- name: "base-model"
10+
reasoningFamily: "qwen3"
11+
piiPolicy:
12+
allowByDefault: false
13+
pricing:
14+
inputTokenPrice: 0.000001 # $0.001 per 1M tokens
15+
outputTokenPrice: 0.000002 # $0.002 per 1M tokens
16+
loras:
17+
- name: "math-expert"
18+
description: "Math specialist LoRA adapter"
19+
- name: "code-expert"
20+
description: "Code specialist LoRA adapter"
21+
22+
- name: "general-expert"
23+
reasoningFamily: "deepseek"
24+
piiPolicy:
25+
allowByDefault: true
26+
pricing:
27+
inputTokenPrice: 0.000002
28+
outputTokenPrice: 0.000004
29+
loras:
30+
- name: "general-assistant"
31+
description: "General purpose assistant"
32+
33+
- name: "fast-model"
34+
piiPolicy:
35+
allowByDefault: true
36+
pricing:
37+
inputTokenPrice: 0.0000005
38+
outputTokenPrice: 0.000001
39+

0 commit comments

Comments
 (0)