Skip to content

Commit 4cda616

Browse files
committed
feat: implement decision-based routing with plugin architecture
Signed-off-by: bitliu <[email protected]>
1 parent 7be81a1 commit 4cda616

File tree

2 files changed

+21
-16
lines changed

2 files changed

+21
-16
lines changed

deploy/helm/semantic-router/values.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,11 @@ initContainer:
152152
repo: LLM-Semantic-Router/jailbreak_classifier_modernbert-base_model
153153
- name: pii_classifier_modernbert-base_presidio_token_model
154154
repo: LLM-Semantic-Router/pii_classifier_modernbert-base_presidio_token_model
155+
# Embedding models for semantic cache and tools
156+
- name: Qwen3-Embedding-0.6B
157+
repo: Qwen/Qwen3-Embedding-0.6B
158+
- name: embeddinggemma-300m
159+
repo: google/embeddinggemma-300m
155160

156161
# Autoscaling configuration
157162
autoscaling:

deploy/kubernetes/ai-gateway/semantic-router-values/values.yaml

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ config:
6363
operator: "OR"
6464
conditions:
6565
- type: "domain"
66-
category: "business"
66+
name: "business"
6767
modelRefs:
6868
- model: base-model
6969
lora_name: social-expert
@@ -82,7 +82,7 @@ config:
8282
operator: "OR"
8383
conditions:
8484
- type: "domain"
85-
category: "law"
85+
name: "law"
8686
modelRefs:
8787
- model: base-model
8888
lora_name: law-expert
@@ -101,7 +101,7 @@ config:
101101
operator: "OR"
102102
conditions:
103103
- type: "domain"
104-
category: "psychology"
104+
name: "psychology"
105105
modelRefs:
106106
- model: base-model
107107
lora_name: humanities-expert
@@ -124,7 +124,7 @@ config:
124124
operator: "OR"
125125
conditions:
126126
- type: "domain"
127-
category: "biology"
127+
name: "biology"
128128
modelRefs:
129129
- model: base-model
130130
lora_name: science-expert
@@ -143,7 +143,7 @@ config:
143143
operator: "OR"
144144
conditions:
145145
- type: "domain"
146-
category: "chemistry"
146+
name: "chemistry"
147147
modelRefs:
148148
- model: base-model
149149
lora_name: science-expert
@@ -162,7 +162,7 @@ config:
162162
operator: "OR"
163163
conditions:
164164
- type: "domain"
165-
category: "history"
165+
name: "history"
166166
modelRefs:
167167
- model: base-model
168168
lora_name: humanities-expert
@@ -181,7 +181,7 @@ config:
181181
operator: "OR"
182182
conditions:
183183
- type: "domain"
184-
category: "health"
184+
name: "health"
185185
modelRefs:
186186
- model: base-model
187187
lora_name: science-expert
@@ -204,7 +204,7 @@ config:
204204
operator: "OR"
205205
conditions:
206206
- type: "domain"
207-
category: "economics"
207+
name: "economics"
208208
modelRefs:
209209
- model: base-model
210210
lora_name: social-expert
@@ -223,7 +223,7 @@ config:
223223
operator: "OR"
224224
conditions:
225225
- type: "domain"
226-
category: "math"
226+
name: "math"
227227
modelRefs:
228228
- model: base-model
229229
lora_name: math-expert
@@ -242,7 +242,7 @@ config:
242242
operator: "OR"
243243
conditions:
244244
- type: "domain"
245-
category: "physics"
245+
name: "physics"
246246
modelRefs:
247247
- model: base-model
248248
lora_name: science-expert
@@ -261,7 +261,7 @@ config:
261261
operator: "OR"
262262
conditions:
263263
- type: "domain"
264-
category: "computer_science"
264+
name: "computer_science"
265265
modelRefs:
266266
- model: base-model
267267
lora_name: science-expert
@@ -280,7 +280,7 @@ config:
280280
operator: "OR"
281281
conditions:
282282
- type: "domain"
283-
category: "philosophy"
283+
name: "philosophy"
284284
modelRefs:
285285
- model: base-model
286286
lora_name: humanities-expert
@@ -299,7 +299,7 @@ config:
299299
operator: "OR"
300300
conditions:
301301
- type: "domain"
302-
category: "engineering"
302+
name: "engineering"
303303
modelRefs:
304304
- model: base-model
305305
lora_name: science-expert
@@ -337,7 +337,7 @@ config:
337337
operator: "OR"
338338
conditions:
339339
- type: "domain"
340-
category: "other"
340+
name: "other"
341341
modelRefs:
342342
- model: base-model
343343
lora_name: general-expert
@@ -505,9 +505,9 @@ config:
505505
[0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]
506506
size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]
507507

508-
# Embedding Models Configuration
508+
# Embedding Models Configuration (Optional)
509509
# These models provide intelligent embedding generation with automatic routing:
510-
# - Qwen3-Embedding-0.6B: Up to 32K context, high quality,
510+
# - Qwen3-Embedding-0.6B: Up to 32K context, high quality, 1024-dim embeddings
511511
# - EmbeddingGemma-300M: Up to 8K context, fast inference, Matryoshka support (768/512/256/128)
512512
embedding_models:
513513
qwen3_model_path: "models/Qwen3-Embedding-0.6B"

0 commit comments

Comments
 (0)