Skip to content

Commit 3cac7ed

Browse files
committed
feat: implement decision-based routing with plugin architecture
Signed-off-by: bitliu <[email protected]>
1 parent ac2df4b commit 3cac7ed

File tree

5 files changed

+157
-10
lines changed

5 files changed

+157
-10
lines changed

deploy/kubernetes/ai-gateway/semantic-router-values/values.yaml

Lines changed: 75 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,6 @@ config:
33
model_config:
44
"base-model":
55
reasoning_family: "qwen3" # This model uses Qwen-3 reasoning syntax
6-
# preferred_endpoints omitted - let upstream handle endpoint selection
7-
pii_policy:
8-
allow_by_default: false
96
# Define available LoRA adapters for this base model
107
# These names must match the LoRA modules registered with vLLM at startup
118
loras:
@@ -69,6 +66,11 @@ config:
6966
lora_name: social-expert
7067
use_reasoning: false
7168
plugins:
69+
- type: "pii"
70+
configuration:
71+
enabled: true
72+
allow_by_default: false
73+
pii_types_allowed: []
7274
- type: "system_prompt"
7375
configuration:
7476
enabled: true
@@ -88,6 +90,11 @@ config:
8890
lora_name: law-expert
8991
use_reasoning: false
9092
plugins:
93+
- type: "pii"
94+
configuration:
95+
enabled: true
96+
allow_by_default: false
97+
pii_types_allowed: []
9198
- type: "system_prompt"
9299
configuration:
93100
enabled: true
@@ -107,6 +114,11 @@ config:
107114
lora_name: humanities-expert
108115
use_reasoning: false
109116
plugins:
117+
- type: "pii"
118+
configuration:
119+
enabled: true
120+
allow_by_default: false
121+
pii_types_allowed: []
110122
- type: "semantic-cache"
111123
configuration:
112124
enabled: true
@@ -130,6 +142,11 @@ config:
130142
lora_name: science-expert
131143
use_reasoning: false
132144
plugins:
145+
- type: "pii"
146+
configuration:
147+
enabled: true
148+
allow_by_default: false
149+
pii_types_allowed: []
133150
- type: "system_prompt"
134151
configuration:
135152
enabled: true
@@ -149,6 +166,11 @@ config:
149166
lora_name: science-expert
150167
use_reasoning: true
151168
plugins:
169+
- type: "pii"
170+
configuration:
171+
enabled: true
172+
allow_by_default: false
173+
pii_types_allowed: []
152174
- type: "system_prompt"
153175
configuration:
154176
enabled: true
@@ -168,6 +190,11 @@ config:
168190
lora_name: humanities-expert
169191
use_reasoning: false
170192
plugins:
193+
- type: "pii"
194+
configuration:
195+
enabled: true
196+
allow_by_default: false
197+
pii_types_allowed: []
171198
- type: "system_prompt"
172199
configuration:
173200
enabled: true
@@ -187,6 +214,11 @@ config:
187214
lora_name: science-expert
188215
use_reasoning: false
189216
plugins:
217+
- type: "pii"
218+
configuration:
219+
enabled: true
220+
allow_by_default: false
221+
pii_types_allowed: []
190222
- type: "semantic-cache"
191223
configuration:
192224
enabled: true
@@ -210,6 +242,11 @@ config:
210242
lora_name: social-expert
211243
use_reasoning: false
212244
plugins:
245+
- type: "pii"
246+
configuration:
247+
enabled: true
248+
allow_by_default: false
249+
pii_types_allowed: []
213250
- type: "system_prompt"
214251
configuration:
215252
enabled: true
@@ -229,6 +266,11 @@ config:
229266
lora_name: math-expert
230267
use_reasoning: true
231268
plugins:
269+
- type: "pii"
270+
configuration:
271+
enabled: true
272+
allow_by_default: false
273+
pii_types_allowed: []
232274
- type: "system_prompt"
233275
configuration:
234276
enabled: true
@@ -248,6 +290,11 @@ config:
248290
lora_name: science-expert
249291
use_reasoning: true
250292
plugins:
293+
- type: "pii"
294+
configuration:
295+
enabled: true
296+
allow_by_default: false
297+
pii_types_allowed: []
251298
- type: "system_prompt"
252299
configuration:
253300
enabled: true
@@ -267,6 +314,11 @@ config:
267314
lora_name: science-expert
268315
use_reasoning: false
269316
plugins:
317+
- type: "pii"
318+
configuration:
319+
enabled: true
320+
allow_by_default: false
321+
pii_types_allowed: []
270322
- type: "system_prompt"
271323
configuration:
272324
enabled: true
@@ -286,6 +338,11 @@ config:
286338
lora_name: humanities-expert
287339
use_reasoning: false
288340
plugins:
341+
- type: "pii"
342+
configuration:
343+
enabled: true
344+
allow_by_default: false
345+
pii_types_allowed: []
289346
- type: "system_prompt"
290347
configuration:
291348
enabled: true
@@ -305,6 +362,11 @@ config:
305362
lora_name: science-expert
306363
use_reasoning: false
307364
plugins:
365+
- type: "pii"
366+
configuration:
367+
enabled: true
368+
allow_by_default: false
369+
pii_types_allowed: []
308370
- type: "system_prompt"
309371
configuration:
310372
enabled: true
@@ -324,6 +386,11 @@ config:
324386
lora_name: general-expert
325387
use_reasoning: true
326388
plugins:
389+
- type: "pii"
390+
configuration:
391+
enabled: true
392+
allow_by_default: false
393+
pii_types_allowed: []
327394
- type: "system_prompt"
328395
configuration:
329396
enabled: true
@@ -343,6 +410,11 @@ config:
343410
lora_name: general-expert
344411
use_reasoning: false
345412
plugins:
413+
- type: "pii"
414+
configuration:
415+
enabled: true
416+
allow_by_default: false
417+
pii_types_allowed: []
346418
- type: "semantic-cache"
347419
configuration:
348420
enabled: true

e2e/profiles/ai-gateway/profile.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ func (p *Profile) deploySemanticRouter(ctx context.Context, deployer *helm.Deplo
146146
"image.pullPolicy": "Never", // Use local image, don't pull from registry
147147
},
148148
Wait: true,
149-
Timeout: "10m",
149+
Timeout: "30m",
150150
}
151151

152152
if err := deployer.Install(ctx, installOpts); err != nil {
@@ -164,7 +164,7 @@ func (p *Profile) deployEnvoyGateway(ctx context.Context, deployer *helm.Deploye
164164
Version: "v0.0.0-latest",
165165
ValuesFiles: []string{"https://raw.githubusercontent.com/envoyproxy/ai-gateway/main/manifests/envoy-gateway-values.yaml"},
166166
Wait: true,
167-
Timeout: "5m",
167+
Timeout: "10m",
168168
}
169169

170170
if err := deployer.Install(ctx, installOpts); err != nil {
@@ -182,7 +182,7 @@ func (p *Profile) deployEnvoyAIGateway(ctx context.Context, deployer *helm.Deplo
182182
Namespace: "envoy-ai-gateway-system",
183183
Version: "v0.0.0-latest",
184184
Wait: true,
185-
Timeout: "5m",
185+
Timeout: "10m",
186186
}
187187

188188
if err := deployer.Install(ctx, crdOpts); err != nil {

e2e/profiles/dynamic-config/crds/intelligentpool.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ metadata:
44
name: ai-gateway-pool
55
namespace: default
66
spec:
7-
defaultModel: "base-model"
7+
defaultModel: "general-expert"
88
models:
99
- name: "base-model"
1010
reasoningFamily: "qwen3"

0 commit comments

Comments
 (0)