Skip to content

Commit 84c25bf

Browse files
committed
feat: implement decision-based routing with plugin architecture
Signed-off-by: bitliu <[email protected]>
1 parent eb0d095 commit 84c25bf

File tree

9 files changed

+171
-13
lines changed

9 files changed

+171
-13
lines changed

deploy/kubernetes/crds/examples/intelligentroute-example.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,18 @@ spec:
8080
configuration:
8181
enabled: true
8282
threshold: 0.5
83+
- type: "header_mutation"
84+
configuration:
85+
add:
86+
- name: "X-Decision-Name"
87+
value: "urgent_technical"
88+
- name: "X-Priority"
89+
value: "high"
90+
update:
91+
- name: "User-Agent"
92+
value: "semantic-router/urgent"
93+
delete:
94+
- "X-Debug-Info"
8395

8496
- name: "math_problems"
8597
priority: 80

deploy/kubernetes/crds/intelligentroute-crd.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,7 @@ spec:
220220
- jailbreak
221221
- pii
222222
- system_prompt
223+
- header_mutation
223224
description: Plugin type
224225
configuration:
225226
type: object

src/semantic-router/examples/decision-based-routing.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,16 @@ decisions:
7373
use_reasoning: true
7474
reasoning_effort: "high"
7575
plugins:
76+
- type: "header_mutation"
77+
configuration:
78+
add:
79+
- name: "X-Decision-Name"
80+
value: "complex-coding"
81+
- name: "X-Reasoning-Enabled"
82+
value: "true"
83+
update:
84+
- name: "User-Agent"
85+
value: "semantic-router/coding"
7686
- type: "semantic-cache"
7787
configuration:
7888
enabled: true

src/semantic-router/pkg/apis/vllm.ai/v1alpha1/types_route.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -244,9 +244,9 @@ type ModelRef struct {
244244

245245
// DecisionPlugin defines a plugin configuration for a decision
246246
type DecisionPlugin struct {
247-
// Type is the plugin type (semantic-cache, jailbreak, pii, system_prompt)
247+
// Type is the plugin type (semantic-cache, jailbreak, pii, system_prompt, header_mutation)
248248
// +kubebuilder:validation:Required
249-
// +kubebuilder:validation:Enum=semantic-cache;jailbreak;pii;system_prompt
249+
// +kubebuilder:validation:Enum=semantic-cache;jailbreak;pii;system_prompt;header_mutation
250250
Type string `json:"type" yaml:"type"`
251251

252252
// Configuration is the plugin-specific configuration as a raw JSON object

src/semantic-router/pkg/config/config.go

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -546,6 +546,19 @@ type SystemPromptPluginConfig struct {
546546
Mode string `json:"mode,omitempty" yaml:"mode,omitempty"` // "replace" or "insert"
547547
}
548548

549+
// HeaderMutationPluginConfig represents configuration for header_mutation plugin
550+
type HeaderMutationPluginConfig struct {
551+
Add []HeaderPair `json:"add,omitempty" yaml:"add,omitempty"`
552+
Update []HeaderPair `json:"update,omitempty" yaml:"update,omitempty"`
553+
Delete []string `json:"delete,omitempty" yaml:"delete,omitempty"`
554+
}
555+
556+
// HeaderPair represents a header name-value pair
557+
type HeaderPair struct {
558+
Name string `json:"name" yaml:"name"`
559+
Value string `json:"value" yaml:"value"`
560+
}
561+
549562
// Helper methods for Decision to access plugin configurations
550563

551564
// GetPluginConfig returns the configuration for a specific plugin type
@@ -633,6 +646,49 @@ func (d *Decision) GetSystemPromptConfig() *SystemPromptPluginConfig {
633646
return result
634647
}
635648

649+
// GetHeaderMutationConfig returns the header_mutation plugin configuration
650+
func (d *Decision) GetHeaderMutationConfig() *HeaderMutationPluginConfig {
651+
config := d.GetPluginConfig("header_mutation")
652+
if config == nil {
653+
return nil
654+
}
655+
656+
result := &HeaderMutationPluginConfig{}
657+
658+
// Parse add headers
659+
if add, ok := config["add"].([]interface{}); ok {
660+
for _, item := range add {
661+
if headerMap, ok := item.(map[string]interface{}); ok {
662+
name, _ := headerMap["name"].(string)
663+
value, _ := headerMap["value"].(string)
664+
result.Add = append(result.Add, HeaderPair{Name: name, Value: value})
665+
}
666+
}
667+
}
668+
669+
// Parse update headers
670+
if update, ok := config["update"].([]interface{}); ok {
671+
for _, item := range update {
672+
if headerMap, ok := item.(map[string]interface{}); ok {
673+
name, _ := headerMap["name"].(string)
674+
value, _ := headerMap["value"].(string)
675+
result.Update = append(result.Update, HeaderPair{Name: name, Value: value})
676+
}
677+
}
678+
}
679+
680+
// Parse delete headers
681+
if del, ok := config["delete"].([]interface{}); ok {
682+
for _, item := range del {
683+
if headerName, ok := item.(string); ok {
684+
result.Delete = append(result.Delete, headerName)
685+
}
686+
}
687+
}
688+
689+
return result
690+
}
691+
636692
// RuleCombination defines how to combine multiple rule conditions with AND/OR operators
637693
type RuleCombination struct {
638694
// Operator specifies how to combine conditions: "AND" or "OR"

src/semantic-router/pkg/extproc/processor_req_body.go

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,9 @@ func (r *OpenAIRouter) handleRequestBody(v *ext_proc.ProcessingRequest_RequestBo
6464
// Get content from messages
6565
userContent, nonUserMessages := extractUserAndNonUserContent(openAIRequest)
6666

67-
// Perform classification and model selection once at the beginning
68-
categoryName, classificationConfidence, reasoningDecision, selectedModel := r.performClassificationAndModelSelection(originalModel, userContent, nonUserMessages)
67+
// Perform decision evaluation and model selection once at the beginning
68+
// Use decision-based routing if decisions are configured, otherwise fall back to category-based
69+
categoryName, classificationConfidence, reasoningDecision, selectedModel := r.performDecisionEvaluationAndModelSelection(originalModel, userContent, nonUserMessages, ctx)
6970

7071
// Perform security checks with category-specific settings
7172
if response, shouldReturn := r.performSecurityChecks(ctx, userContent, nonUserMessages, categoryName); shouldReturn {
@@ -159,7 +160,7 @@ func (r *OpenAIRouter) handleAutoModelRouting(openAIRequest *openai.ChatCompleti
159160
}
160161

161162
// Create response with mutations
162-
response = r.createRoutingResponse(matchedModel, selectedEndpoint, modifiedBody)
163+
response = r.createRoutingResponse(matchedModel, selectedEndpoint, modifiedBody, ctx)
163164

164165
// Log routing decision
165166
r.logRoutingDecision(ctx, "auto_routing", originalModel, matchedModel, categoryName, reasoningDecision.UseReasoning, selectedEndpoint)
@@ -274,14 +275,17 @@ func (r *OpenAIRouter) modifyRequestBodyForAutoRouting(openAIRequest *openai.Cha
274275
}
275276

276277
// createRoutingResponse creates a routing response with mutations
277-
func (r *OpenAIRouter) createRoutingResponse(model string, endpoint string, modifiedBody []byte) *ext_proc.ProcessingResponse {
278+
func (r *OpenAIRouter) createRoutingResponse(model string, endpoint string, modifiedBody []byte, ctx *RequestContext) *ext_proc.ProcessingResponse {
278279
bodyMutation := &ext_proc.BodyMutation{
279280
Mutation: &ext_proc.BodyMutation_Body{
280281
Body: modifiedBody,
281282
},
282283
}
283284

284285
setHeaders := []*core.HeaderValueOption{}
286+
removeHeaders := []string{"content-length"}
287+
288+
// Add standard routing headers
285289
if endpoint != "" {
286290
setHeaders = append(setHeaders, &core.HeaderValueOption{
287291
Header: &core.HeaderValue{
@@ -299,8 +303,21 @@ func (r *OpenAIRouter) createRoutingResponse(model string, endpoint string, modi
299303
})
300304
}
301305

306+
// Apply header mutations from decision's header_mutation plugin
307+
if ctx.VSRSelectedDecision != nil {
308+
pluginSetHeaders, pluginRemoveHeaders := r.buildHeaderMutations(ctx.VSRSelectedDecision)
309+
if len(pluginSetHeaders) > 0 {
310+
setHeaders = append(setHeaders, pluginSetHeaders...)
311+
logging.Infof("Applied %d header mutations from decision %s", len(pluginSetHeaders), ctx.VSRSelectedDecision.Name)
312+
}
313+
if len(pluginRemoveHeaders) > 0 {
314+
removeHeaders = append(removeHeaders, pluginRemoveHeaders...)
315+
logging.Infof("Applied %d header deletions from decision %s", len(pluginRemoveHeaders), ctx.VSRSelectedDecision.Name)
316+
}
317+
}
318+
302319
headerMutation := &ext_proc.HeaderMutation{
303-
RemoveHeaders: []string{"content-length"},
320+
RemoveHeaders: removeHeaders,
304321
SetHeaders: setHeaders,
305322
}
306323

src/semantic-router/pkg/extproc/processor_req_header.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"go.opentelemetry.io/otel/attribute"
1010
"go.opentelemetry.io/otel/trace"
1111

12+
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
1213
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/headers"
1314
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability/logging"
1415
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability/tracing"
@@ -33,11 +34,12 @@ type RequestContext struct {
3334
TTFTSeconds float64
3435

3536
// VSR decision tracking
36-
VSRSelectedCategory string // The category selected by VSR
37-
VSRReasoningMode string // "on" or "off" - whether reasoning mode was determined to be used
38-
VSRSelectedModel string // The model selected by VSR
39-
VSRCacheHit bool // Whether this request hit the cache
40-
VSRInjectedSystemPrompt bool // Whether a system prompt was injected into the request
37+
VSRSelectedCategory string // The category selected by VSR
38+
VSRReasoningMode string // "on" or "off" - whether reasoning mode was determined to be used
39+
VSRSelectedModel string // The model selected by VSR
40+
VSRCacheHit bool // Whether this request hit the cache
41+
VSRInjectedSystemPrompt bool // Whether a system prompt was injected into the request
42+
VSRSelectedDecision *config.Decision // The decision selected by DecisionEngine
4143

4244
// Tracing context
4345
TraceContext context.Context // OpenTelemetry trace context for span propagation

src/semantic-router/pkg/extproc/req_filter_classification.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ func (r *OpenAIRouter) performClassificationAndModelSelection(originalModel stri
6767
// performDecisionEvaluationAndModelSelection performs decision evaluation using DecisionEngine
6868
// Returns (decisionName, confidence, reasoningDecision, selectedModel)
6969
// This is the new approach that uses Decision-based routing with AND/OR rule combinations
70-
func (r *OpenAIRouter) performDecisionEvaluationAndModelSelection(originalModel string, userContent string, nonUserMessages []string) (string, float64, entropy.ReasoningDecision, string) {
70+
func (r *OpenAIRouter) performDecisionEvaluationAndModelSelection(originalModel string, userContent string, nonUserMessages []string, ctx *RequestContext) (string, float64, entropy.ReasoningDecision, string) {
7171
var decisionName string
7272
var evaluationConfidence float64
7373
var reasoningDecision entropy.ReasoningDecision
@@ -110,6 +110,9 @@ func (r *OpenAIRouter) performDecisionEvaluationAndModelSelection(originalModel
110110
return "", 0.0, entropy.ReasoningDecision{}, r.Config.DefaultModel
111111
}
112112

113+
// Store the selected decision in context for later use (e.g., header mutations)
114+
ctx.VSRSelectedDecision = result.Decision
115+
113116
decisionName = result.Decision.Name
114117
evaluationConfidence = result.Confidence
115118
logging.Infof("Decision Evaluation Result: decision=%s, confidence=%.3f, matched_rules=%v",
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
package extproc
2+
3+
import (
4+
corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
5+
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
6+
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability/logging"
7+
)
8+
9+
// buildHeaderMutations builds header mutations based on the decision's header_mutation plugin configuration
10+
// Returns (setHeaders, removeHeaders) to be applied to the request
11+
func (r *OpenAIRouter) buildHeaderMutations(decision *config.Decision) ([]*corev3.HeaderValueOption, []string) {
12+
if decision == nil {
13+
return nil, nil
14+
}
15+
16+
// Get header mutation configuration
17+
headerConfig := decision.GetHeaderMutationConfig()
18+
if headerConfig == nil {
19+
return nil, nil
20+
}
21+
22+
logging.Debugf("Building header mutations for decision %s: add=%d, update=%d, delete=%d",
23+
decision.Name, len(headerConfig.Add), len(headerConfig.Update), len(headerConfig.Delete))
24+
25+
var setHeaders []*corev3.HeaderValueOption
26+
var removeHeaders []string
27+
28+
// Apply additions (add new headers)
29+
for _, headerPair := range headerConfig.Add {
30+
setHeaders = append(setHeaders, &corev3.HeaderValueOption{
31+
Header: &corev3.HeaderValue{
32+
Key: headerPair.Name,
33+
RawValue: []byte(headerPair.Value),
34+
},
35+
})
36+
logging.Debugf("Adding header: %s=%s", headerPair.Name, headerPair.Value)
37+
}
38+
39+
// Apply updates (modify existing headers - in Envoy this is the same as set)
40+
for _, headerPair := range headerConfig.Update {
41+
setHeaders = append(setHeaders, &corev3.HeaderValueOption{
42+
Header: &corev3.HeaderValue{
43+
Key: headerPair.Name,
44+
RawValue: []byte(headerPair.Value),
45+
},
46+
})
47+
logging.Debugf("Updating header: %s=%s", headerPair.Name, headerPair.Value)
48+
}
49+
50+
// Apply deletions
51+
for _, headerName := range headerConfig.Delete {
52+
removeHeaders = append(removeHeaders, headerName)
53+
logging.Debugf("Deleting header: %s", headerName)
54+
}
55+
56+
return setHeaders, removeHeaders
57+
}

0 commit comments

Comments
 (0)