Skip to content

Commit 7be81a1

Browse files
committed
feat: implement decision-based routing with plugin architecture
Signed-off-by: bitliu <[email protected]>
1 parent b14398e commit 7be81a1

File tree

3 files changed

+23
-127
lines changed

3 files changed

+23
-127
lines changed

deploy/helm/semantic-router/values.yaml

Lines changed: 0 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -250,21 +250,6 @@ config:
250250
use_cpu: true
251251
jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json"
252252

253-
# vLLM endpoints configuration
254-
vllm_endpoints:
255-
- name: "endpoint1"
256-
address: "172.28.0.20"
257-
port: 8002
258-
weight: 1
259-
260-
# Model configuration
261-
model_config:
262-
"qwen3":
263-
reasoning_family: "qwen3"
264-
preferred_endpoints: ["endpoint1"]
265-
pii_policy:
266-
allow_by_default: true
267-
268253
# Classifier configuration
269254
classifier:
270255
category_model:
@@ -280,102 +265,6 @@ config:
280265
use_cpu: true
281266
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
282267

283-
# Categories configuration
284-
categories:
285-
- name: business
286-
system_prompt: "You are a senior business consultant and strategic advisor with expertise in corporate strategy, operations management, financial analysis, marketing, and organizational development. Provide practical, actionable business advice backed by proven methodologies and industry best practices. Consider market dynamics, competitive landscape, and stakeholder interests in your recommendations."
287-
model_scores:
288-
- model: qwen3
289-
score: 0.7
290-
use_reasoning: false
291-
- name: law
292-
system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters."
293-
model_scores:
294-
- model: qwen3
295-
score: 0.4
296-
use_reasoning: false
297-
- name: psychology
298-
system_prompt: "You are a psychology expert with deep knowledge of cognitive processes, behavioral patterns, mental health, developmental psychology, social psychology, and therapeutic approaches. Provide evidence-based insights grounded in psychological research and theory. When discussing mental health topics, emphasize the importance of professional consultation and avoid providing diagnostic or therapeutic advice."
299-
semantic_cache_enabled: true
300-
semantic_cache_similarity_threshold: 0.92
301-
model_scores:
302-
- model: qwen3
303-
score: 0.6
304-
use_reasoning: false
305-
- name: biology
306-
system_prompt: "You are a biology expert with comprehensive knowledge spanning molecular biology, genetics, cell biology, ecology, evolution, anatomy, physiology, and biotechnology. Explain biological concepts with scientific accuracy, use appropriate terminology, and provide examples from current research. Connect biological principles to real-world applications and emphasize the interconnectedness of biological systems."
307-
model_scores:
308-
- model: qwen3
309-
score: 0.9
310-
use_reasoning: false
311-
- name: chemistry
312-
system_prompt: "You are a chemistry expert specializing in chemical reactions, molecular structures, and laboratory techniques. Provide detailed, step-by-step explanations."
313-
model_scores:
314-
- model: qwen3
315-
score: 0.6
316-
use_reasoning: true
317-
- name: history
318-
system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis."
319-
model_scores:
320-
- model: qwen3
321-
score: 0.7
322-
use_reasoning: false
323-
- name: other
324-
system_prompt: "You are a helpful and knowledgeable assistant. Provide accurate, helpful responses across a wide range of topics."
325-
semantic_cache_enabled: true
326-
semantic_cache_similarity_threshold: 0.75
327-
model_scores:
328-
- model: qwen3
329-
score: 0.7
330-
use_reasoning: false
331-
- name: health
332-
system_prompt: "You are a health and medical information expert with knowledge of anatomy, physiology, diseases, treatments, preventive care, nutrition, and wellness. Provide accurate, evidence-based health information while emphasizing that your responses are for educational purposes only and should never replace professional medical advice, diagnosis, or treatment. Always encourage users to consult healthcare professionals for medical concerns and emergencies."
333-
semantic_cache_enabled: true
334-
semantic_cache_similarity_threshold: 0.95
335-
model_scores:
336-
- model: qwen3
337-
score: 0.5
338-
use_reasoning: false
339-
- name: economics
340-
system_prompt: "You are an economics expert with deep understanding of microeconomics, macroeconomics, econometrics, financial markets, monetary policy, fiscal policy, international trade, and economic theory. Analyze economic phenomena using established economic principles, provide data-driven insights, and explain complex economic concepts in accessible terms. Consider both theoretical frameworks and real-world applications in your responses."
341-
model_scores:
342-
- model: qwen3
343-
score: 1.0
344-
use_reasoning: false
345-
- name: math
346-
system_prompt: "You are a mathematics expert. Provide step-by-step solutions, show your work clearly, and explain mathematical concepts in an understandable way."
347-
model_scores:
348-
- model: qwen3
349-
score: 1.0
350-
use_reasoning: true
351-
- name: physics
352-
system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate."
353-
model_scores:
354-
- model: qwen3
355-
score: 0.7
356-
use_reasoning: true
357-
- name: computer science
358-
system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful."
359-
model_scores:
360-
- model: qwen3
361-
score: 0.6
362-
use_reasoning: false
363-
- name: philosophy
364-
system_prompt: "You are a philosophy expert with comprehensive knowledge of philosophical traditions, ethical theories, logic, metaphysics, epistemology, political philosophy, and the history of philosophical thought. Engage with complex philosophical questions by presenting multiple perspectives, analyzing arguments rigorously, and encouraging critical thinking. Draw connections between philosophical concepts and contemporary issues while maintaining intellectual honesty about the complexity and ongoing nature of philosophical debates."
365-
model_scores:
366-
- model: qwen3
367-
score: 0.5
368-
use_reasoning: false
369-
- name: engineering
370-
system_prompt: "You are an engineering expert with knowledge across multiple engineering disciplines including mechanical, electrical, civil, chemical, software, and systems engineering. Apply engineering principles, design methodologies, and problem-solving approaches to provide practical solutions. Consider safety, efficiency, sustainability, and cost-effectiveness in your recommendations. Use technical precision while explaining concepts clearly, and emphasize the importance of proper engineering practices and standards."
371-
model_scores:
372-
- model: qwen3
373-
score: 0.7
374-
use_reasoning: false
375-
376-
# Default model
377-
default_model: "qwen3"
378-
379268
# Reasoning families
380269
reasoning_families:
381270
deepseek:

deploy/kubernetes/ai-gateway/semantic-router-values/values.yaml

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ config:
6464
conditions:
6565
- type: "domain"
6666
category: "business"
67-
model_refs:
67+
modelRefs:
6868
- model: base-model
6969
lora_name: social-expert
7070
use_reasoning: false
@@ -83,7 +83,7 @@ config:
8383
conditions:
8484
- type: "domain"
8585
category: "law"
86-
model_refs:
86+
modelRefs:
8787
- model: base-model
8888
lora_name: law-expert
8989
use_reasoning: false
@@ -102,7 +102,7 @@ config:
102102
conditions:
103103
- type: "domain"
104104
category: "psychology"
105-
model_refs:
105+
modelRefs:
106106
- model: base-model
107107
lora_name: humanities-expert
108108
use_reasoning: false
@@ -125,7 +125,7 @@ config:
125125
conditions:
126126
- type: "domain"
127127
category: "biology"
128-
model_refs:
128+
modelRefs:
129129
- model: base-model
130130
lora_name: science-expert
131131
use_reasoning: false
@@ -144,7 +144,7 @@ config:
144144
conditions:
145145
- type: "domain"
146146
category: "chemistry"
147-
model_refs:
147+
modelRefs:
148148
- model: base-model
149149
lora_name: science-expert
150150
use_reasoning: true
@@ -163,7 +163,7 @@ config:
163163
conditions:
164164
- type: "domain"
165165
category: "history"
166-
model_refs:
166+
modelRefs:
167167
- model: base-model
168168
lora_name: humanities-expert
169169
use_reasoning: false
@@ -182,7 +182,7 @@ config:
182182
conditions:
183183
- type: "domain"
184184
category: "health"
185-
model_refs:
185+
modelRefs:
186186
- model: base-model
187187
lora_name: science-expert
188188
use_reasoning: false
@@ -205,7 +205,7 @@ config:
205205
conditions:
206206
- type: "domain"
207207
category: "economics"
208-
model_refs:
208+
modelRefs:
209209
- model: base-model
210210
lora_name: social-expert
211211
use_reasoning: false
@@ -224,7 +224,7 @@ config:
224224
conditions:
225225
- type: "domain"
226226
category: "math"
227-
model_refs:
227+
modelRefs:
228228
- model: base-model
229229
lora_name: math-expert
230230
use_reasoning: true
@@ -243,7 +243,7 @@ config:
243243
conditions:
244244
- type: "domain"
245245
category: "physics"
246-
model_refs:
246+
modelRefs:
247247
- model: base-model
248248
lora_name: science-expert
249249
use_reasoning: true
@@ -262,7 +262,7 @@ config:
262262
conditions:
263263
- type: "domain"
264264
category: "computer_science"
265-
model_refs:
265+
modelRefs:
266266
- model: base-model
267267
lora_name: science-expert
268268
use_reasoning: false
@@ -281,7 +281,7 @@ config:
281281
conditions:
282282
- type: "domain"
283283
category: "philosophy"
284-
model_refs:
284+
modelRefs:
285285
- model: base-model
286286
lora_name: humanities-expert
287287
use_reasoning: false
@@ -300,7 +300,7 @@ config:
300300
conditions:
301301
- type: "domain"
302302
category: "engineering"
303-
model_refs:
303+
modelRefs:
304304
- model: base-model
305305
lora_name: science-expert
306306
use_reasoning: false
@@ -319,7 +319,7 @@ config:
319319
conditions:
320320
- type: "keyword"
321321
rule_name: "thinking"
322-
model_refs:
322+
modelRefs:
323323
- model: base-model
324324
lora_name: general-expert
325325
use_reasoning: true
@@ -338,7 +338,7 @@ config:
338338
conditions:
339339
- type: "domain"
340340
category: "other"
341-
model_refs:
341+
modelRefs:
342342
- model: base-model
343343
lora_name: general-expert
344344
use_reasoning: false

src/semantic-router/pkg/config/validator.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,14 @@ func getIPAddressType(address string) string {
8585

8686
// validateConfigStructure performs additional validation on the parsed config
8787
func validateConfigStructure(cfg *RouterConfig) error {
88-
// Validate decisions have at least one model ref
88+
// In Kubernetes mode, decisions and model_config will be loaded from CRDs
89+
// Skip validation for these fields during initial config parse
90+
if cfg.ConfigSource == ConfigSourceKubernetes {
91+
// Skip validation for decisions and model_config
92+
return nil
93+
}
94+
95+
// File mode: validate decisions have at least one model ref
8996
for _, decision := range cfg.Decisions {
9097
if len(decision.ModelRefs) == 0 {
9198
return fmt.Errorf("decision '%s' has no modelRefs defined - each decision must have at least one model", decision.Name)

0 commit comments

Comments
 (0)