Skip to content

Commit 94109ce

Browse files
committed
feat: implement decision-based routing with plugin architecture
Signed-off-by: bitliu <[email protected]>
1 parent 25f68a9 commit 94109ce

35 files changed

+3951
-18
lines changed

deploy/kubernetes/crds/examples/intelligentroute-example.yaml

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,20 +38,9 @@ spec:
3838

3939
# Domain-based signals (MMLU domain categories)
4040
domains:
41-
- "business"
42-
- "law"
43-
- "psychology"
44-
- "biology"
45-
- "chemistry"
46-
- "history"
47-
- "other"
48-
- "health"
49-
- "economics"
5041
- "math"
51-
- "physics"
5242
- "computer_science"
53-
- "philosophy"
54-
- "engineering"
43+
- "physics"
5544

5645
# Decisions (priority used when multiple decisions match)
5746
decisions:

src/semantic-router/pkg/k8s/converter_test.go

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -202,11 +202,20 @@ func TestConverterWithTestData(t *testing.T) {
202202
testdataDir := "testdata"
203203
inputDir := filepath.Join(testdataDir, "input")
204204
outputDir := filepath.Join(testdataDir, "output")
205+
baseConfigPath := filepath.Join(testdataDir, "base-config.yaml")
205206

206207
// Ensure output directory exists
207208
err := os.MkdirAll(outputDir, 0755)
208209
require.NoError(t, err, "Failed to create output directory")
209210

211+
// Load base config (static parts)
212+
baseConfigData, err := os.ReadFile(baseConfigPath)
213+
require.NoError(t, err, "Failed to read base config file: %s", baseConfigPath)
214+
215+
var baseConfig config.RouterConfig
216+
err = yaml.Unmarshal(baseConfigData, &baseConfig)
217+
require.NoError(t, err, "Failed to unmarshal base config")
218+
210219
// Read all input files
211220
inputFiles, err := os.ReadDir(inputDir)
212221
require.NoError(t, err, "Failed to read input directory")
@@ -240,12 +249,8 @@ func TestConverterWithTestData(t *testing.T) {
240249
intelligentRouting, err := converter.ConvertIntelligentRoute(route)
241250
require.NoError(t, err, "Failed to convert IntelligentRoute")
242251

243-
// Create output config
244-
outputConfig := &config.RouterConfig{
245-
ConfigSource: config.ConfigSourceKubernetes,
246-
BackendModels: *backendModels,
247-
IntelligentRouting: *intelligentRouting,
248-
}
252+
// Merge base config with CRD-derived config
253+
outputConfig := mergeConfigs(&baseConfig, backendModels, intelligentRouting)
249254

250255
// Marshal to YAML
251256
outputData, err := yaml.Marshal(outputConfig)
@@ -271,6 +276,28 @@ func TestConverterWithTestData(t *testing.T) {
271276
}
272277
}
273278

279+
// mergeConfigs merges base config with CRD-derived dynamic parts
280+
func mergeConfigs(baseConfig *config.RouterConfig, backendModels *config.BackendModels, intelligentRouting *config.IntelligentRouting) *config.RouterConfig {
281+
// Start with a copy of base config (contains all static parts)
282+
merged := *baseConfig
283+
284+
// Override config source
285+
merged.ConfigSource = config.ConfigSourceKubernetes
286+
287+
// Override dynamic parts from CRDs
288+
merged.BackendModels = *backendModels
289+
290+
// Merge IntelligentRouting while preserving ReasoningConfig from base
291+
merged.IntelligentRouting.KeywordRules = intelligentRouting.KeywordRules
292+
merged.IntelligentRouting.EmbeddingRules = intelligentRouting.EmbeddingRules
293+
merged.IntelligentRouting.Categories = intelligentRouting.Categories
294+
merged.IntelligentRouting.Decisions = intelligentRouting.Decisions
295+
merged.IntelligentRouting.Strategy = intelligentRouting.Strategy
296+
// Keep ReasoningConfig from base (ReasoningFamilies, DefaultReasoningEffort)
297+
298+
return &merged
299+
}
300+
274301
// parseInputYAML parses a multi-document YAML file containing IntelligentPool and IntelligentRoute
275302
func parseInputYAML(data []byte) (*v1alpha1.IntelligentPool, *v1alpha1.IntelligentRoute, error) {
276303
decoder := k8syaml.NewYAMLOrJSONDecoder(strings.NewReader(string(data)), 4096)
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
# Test Data for CRD Converter
2+
3+
This directory contains test data for the Kubernetes CRD to RouterConfig converter.
4+
5+
## Directory Structure
6+
7+
```
8+
testdata/
9+
├── base-config.yaml # Static base configuration (shared across all tests)
10+
├── input/ # Input CRD YAML files (IntelligentPool + IntelligentRoute)
11+
│ ├── 01-basic.yaml
12+
│ ├── 02-keyword-only.yaml
13+
│ ├── ...
14+
│ └── 15-keyword-embedding-domain-no-plugin.yaml
15+
└── output/ # Generated RouterConfig YAML files
16+
├── 01-basic.yaml
17+
├── 02-keyword-only.yaml
18+
├── ...
19+
└── 15-keyword-embedding-domain-no-plugin.yaml
20+
```
21+
22+
## Base Configuration
23+
24+
`base-config.yaml` contains static configuration that doesn't come from CRDs:
25+
- Reasoning families (deepseek, qwen3, gpt)
26+
- Default reasoning effort level
27+
- BERT model configuration
28+
- Semantic cache settings
29+
- Tools configuration
30+
- Prompt guard settings
31+
- Classifier configuration
32+
- Router options
33+
- Embedding models paths
34+
- API configuration
35+
- Observability settings
36+
37+
## Test Scenarios Overview
38+
39+
| # | File | Keyword | Embedding | Domain | Plugin | Use Case |
40+
|---|------|---------|-----------|--------|--------|----------|
41+
| 1 | 01-basic.yaml ||||| Basic comprehensive example |
42+
| 2 | 02-keyword-only.yaml ||||| FAQ detection, greetings |
43+
| 3 | 03-embedding-only.yaml ||||| Customer support, technical issues |
44+
| 4 | 04-domain-only.yaml ||||| STEM queries, subject routing |
45+
| 5 | 05-keyword-embedding.yaml ||||| Urgent support with semantics |
46+
| 6 | 06-keyword-domain.yaml ||||| Academic homework assistance |
47+
| 7 | 07-domain-embedding.yaml ||||| Research queries by domain |
48+
| 8 | 08-keyword-embedding-domain.yaml ||||| Comprehensive tech support |
49+
| 9 | 09-keyword-plugin.yaml ||||| FAQ with caching |
50+
| 10 | 10-embedding-plugin.yaml ||||| PII-protected queries |
51+
| 11 | 11-domain-plugin.yaml ||||| Legal advice with disclaimers |
52+
| 12 | 12-keyword-embedding-plugin.yaml ||||| Security queries with protection |
53+
| 13 | 13-keyword-domain-plugin.yaml ||||| Medical queries with PII |
54+
| 14 | 14-domain-embedding-plugin.yaml ||||| Financial advice with protection |
55+
| 15 | 15-keyword-embedding-domain-plugin.yaml ||||| Enterprise compliance (full) |
56+
| 16 | 16-keyword-embedding-domain-no-plugin.yaml ||||| Educational tutorials |
57+
58+
## Test Scenarios Details
59+
60+
### Signal Type Combinations (No Plugins)
61+
62+
1. **02-keyword-only.yaml** - Only keyword signals
63+
- Use case: FAQ detection, greeting responses
64+
- Signals: urgent, greeting keywords
65+
66+
2. **03-embedding-only.yaml** - Only embedding signals
67+
- Use case: Customer support, technical issue detection
68+
- Signals: customer_support, technical_issue embeddings
69+
70+
3. **04-domain-only.yaml** - Only domain signals
71+
- Use case: STEM queries, subject-specific routing
72+
- Signals: math, physics, computer_science, chemistry domains
73+
74+
4. **05-keyword-embedding.yaml** - Keyword + Embedding
75+
- Use case: Urgent support requests with semantic matching
76+
- Signals: urgent keywords + support_request embeddings
77+
78+
5. **06-keyword-domain.yaml** - Keyword + Domain
79+
- Use case: Academic homework assistance
80+
- Signals: homework keywords + math/physics/chemistry domains
81+
82+
6. **07-domain-embedding.yaml** - Domain + Embedding
83+
- Use case: Research queries in specific domains
84+
- Signals: research_question embeddings + biology/chemistry/physics domains
85+
86+
7. **08-keyword-embedding-domain.yaml** - All three signal types
87+
- Use case: Comprehensive technical support routing
88+
- Signals: urgent keywords + technical_help embeddings + CS/engineering/math domains
89+
90+
### Signal Type Combinations (With Plugins)
91+
92+
8. **09-keyword-plugin.yaml** - Keyword + Plugins
93+
- Use case: FAQ with aggressive caching
94+
- Plugins: semantic-cache, header_mutation
95+
96+
9. **10-embedding-plugin.yaml** - Embedding + Plugins
97+
- Use case: PII-protected sensitive data handling
98+
- Plugins: pii (redaction), jailbreak protection
99+
100+
10. **11-domain-plugin.yaml** - Domain + Plugins
101+
- Use case: Legal advice with disclaimers
102+
- Plugins: system_prompt, semantic-cache
103+
104+
11. **12-keyword-embedding-plugin.yaml** - Keyword + Embedding + Plugins
105+
- Use case: Security queries with protection
106+
- Plugins: jailbreak, system_prompt, header_mutation
107+
108+
12. **13-keyword-domain-plugin.yaml** - Keyword + Domain + Plugins
109+
- Use case: Medical queries with PII protection
110+
- Plugins: pii (hash mode), system_prompt, semantic-cache
111+
112+
13. **14-domain-embedding-plugin.yaml** - Domain + Embedding + Plugins
113+
- Use case: Financial advice with comprehensive protection
114+
- Plugins: pii, system_prompt, jailbreak, semantic-cache
115+
116+
14. **15-keyword-embedding-domain-plugin.yaml** - Keyword + Embedding + Domain + Plugins
117+
- Use case: Enterprise compliance and legal queries with full protection
118+
- Signals: compliance/confidential keywords + business_analysis/legal_review embeddings + business/law/economics domains
119+
- Plugins: pii (hash/mask modes), jailbreak, system_prompt, semantic-cache, header_mutation
120+
- Multiple decisions with different plugin configurations
121+
122+
15. **16-keyword-embedding-domain-no-plugin.yaml** - All signals, no plugins
123+
- Use case: Educational tutorials across multiple domains
124+
- Signals: tutorial keywords + learning_intent embeddings + CS/math/engineering domains
125+
- Multiple decisions with different priorities
126+
127+
## Plugin Types Used
128+
129+
- **semantic-cache**: Cache responses for similar queries
130+
- **pii**: Detect and redact/mask/hash PII entities
131+
- **jailbreak**: Detect and block jailbreak attempts
132+
- **system_prompt**: Inject custom system prompts
133+
- **header_mutation**: Add custom headers to requests
134+
135+
## Running Tests
136+
137+
```bash
138+
cd src/semantic-router
139+
go test ./pkg/k8s -v -run TestConverterWithTestData
140+
```
141+
142+
This will:
143+
1. Load `base-config.yaml` as the static configuration base
144+
2. Parse each input YAML file (IntelligentPool + IntelligentRoute)
145+
3. Convert CRDs to RouterConfig format
146+
4. Merge static base config with dynamic CRD-derived config
147+
5. Generate output YAML files in `testdata/output/`
148+
6. Validate that output can be unmarshaled correctly
149+
150+
## Output Structure
151+
152+
Each generated output file contains:
153+
- **Static parts** (from base-config.yaml):
154+
- embedding_models, bert_model, classifier, prompt_guard
155+
- semantic_cache, observability, api, tools
156+
- reasoning_families, default_reasoning_effort
157+
158+
- **Dynamic parts** (from CRDs):
159+
- keyword_rules (from signals.keywords)
160+
- embedding_rules (from signals.embeddings)
161+
- categories (from signals.domains)
162+
- decisions (from decisions)
163+
- model_config (from IntelligentPool.models)
164+
- default_model (from IntelligentPool.defaultModel)
165+
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
---
2+
apiVersion: vllm.ai/v1alpha1
3+
kind: IntelligentPool
4+
metadata:
5+
name: test-pool
6+
namespace: default
7+
spec:
8+
defaultModel: "qwen3-8b"
9+
models:
10+
- name: "qwen3-8b"
11+
reasoningFamily: "qwen3"
12+
piiPolicy:
13+
allowByDefault: true
14+
pricing:
15+
inputTokenPrice: 0.000001
16+
outputTokenPrice: 0.000002
17+
loras:
18+
- name: "tech-expert"
19+
description: "Technical support specialist"
20+
---
21+
apiVersion: vllm.ai/v1alpha1
22+
kind: IntelligentRoute
23+
metadata:
24+
name: test-route
25+
namespace: default
26+
spec:
27+
signals:
28+
keywords:
29+
- name: "urgent"
30+
operator: "OR"
31+
keywords: ["urgent", "emergency", "asap"]
32+
caseSensitive: false
33+
embeddings:
34+
- name: "tech_support"
35+
threshold: 0.75
36+
candidates:
37+
- "I need help with technical issues"
38+
- "Can you help me troubleshoot this problem?"
39+
- "Something is not working correctly"
40+
aggregationMethod: "max"
41+
domains:
42+
- "computer_science"
43+
- "math"
44+
45+
decisions:
46+
- name: "urgent_tech"
47+
priority: 100
48+
description: "Urgent technical support requests"
49+
signals:
50+
operator: "AND"
51+
conditions:
52+
- type: "keyword"
53+
name: "urgent"
54+
- type: "embedding"
55+
name: "tech_support"
56+
modelRefs:
57+
- model: "qwen3-8b"
58+
useReasoning: true
59+
reasoningEffort: "medium"
60+
plugins:
61+
- type: "semantic-cache"
62+
configuration:
63+
enabled: true
64+
threshold: 0.9
65+
66+
- name: "general_tech"
67+
priority: 50
68+
description: "General technical queries"
69+
signals:
70+
operator: "OR"
71+
conditions:
72+
- type: "embedding"
73+
name: "tech_support"
74+
- type: "domain"
75+
name: "computer_science"
76+
modelRefs:
77+
- model: "qwen3-8b"
78+
useReasoning: false
79+
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
---
2+
apiVersion: vllm.ai/v1alpha1
3+
kind: IntelligentPool
4+
metadata:
5+
name: keyword-pool
6+
namespace: default
7+
spec:
8+
defaultModel: "base-model"
9+
models:
10+
- name: "base-model"
11+
reasoningFamily: "qwen3"
12+
piiPolicy:
13+
allowByDefault: true
14+
pricing:
15+
inputTokenPrice: 0.000001
16+
outputTokenPrice: 0.000002
17+
18+
---
19+
apiVersion: vllm.ai/v1alpha1
20+
kind: IntelligentRoute
21+
metadata:
22+
name: keyword-route
23+
namespace: default
24+
spec:
25+
signals:
26+
keywords:
27+
- name: "urgent"
28+
operator: "OR"
29+
keywords: ["urgent", "emergency", "critical", "asap"]
30+
caseSensitive: false
31+
- name: "greeting"
32+
operator: "OR"
33+
keywords: ["hello", "hi", "hey", "greetings"]
34+
caseSensitive: false
35+
36+
decisions:
37+
- name: "urgent_request"
38+
description: "Handle urgent requests"
39+
priority: 100
40+
signals:
41+
operator: "AND"
42+
conditions:
43+
- type: "keyword"
44+
name: "urgent"
45+
modelRefs:
46+
- model: "base-model"
47+
use_reasoning: true
48+
reasoning_effort: "high"
49+
50+
- name: "greeting_response"
51+
description: "Handle greetings"
52+
priority: 50
53+
signals:
54+
operator: "AND"
55+
conditions:
56+
- type: "keyword"
57+
name: "greeting"
58+
modelRefs:
59+
- model: "base-model"
60+
use_reasoning: false
61+

0 commit comments

Comments
 (0)