Skip to content

Commit c7b5934

Browse files
committed
feat: implement decision-based routing with plugin architecture
Signed-off-by: bitliu <[email protected]>
1 parent b88d605 commit c7b5934

File tree

7 files changed

+340
-87
lines changed

7 files changed

+340
-87
lines changed

deploy/kubernetes/ai-gateway/semantic-router-values/values.yaml

Lines changed: 297 additions & 78 deletions
Large diffs are not rendered by default.

src/semantic-router/pkg/extproc/processor_req_body.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,8 @@ func (r *OpenAIRouter) handleAutoModelRouting(openAIRequest *openai.ChatCompleti
145145
r.recordRoutingDecision(ctx, decisionName, originalModel, matchedModel, reasoningDecision)
146146

147147
// Track VSR decision information
148-
r.trackVSRDecision(ctx, decisionName, matchedModel, reasoningDecision.UseReasoning)
148+
// categoryName is already set in ctx.VSRSelectedCategory by performDecisionEvaluationAndModelSelection
149+
r.trackVSRDecision(ctx, ctx.VSRSelectedCategory, decisionName, matchedModel, reasoningDecision.UseReasoning)
149150

150151
// Track model routing metrics
151152
metrics.RecordModelRouting(originalModel, matchedModel)

src/semantic-router/pkg/extproc/processor_req_header.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,13 @@ type RequestContext struct {
3434
TTFTSeconds float64
3535

3636
// VSR decision tracking
37-
VSRSelectedCategory string // The category selected by VSR
37+
VSRSelectedCategory string // The category from domain classification (MMLU category)
38+
VSRSelectedDecisionName string // The decision name from DecisionEngine evaluation
3839
VSRReasoningMode string // "on" or "off" - whether reasoning mode was determined to be used
3940
VSRSelectedModel string // The model selected by VSR
4041
VSRCacheHit bool // Whether this request hit the cache
4142
VSRInjectedSystemPrompt bool // Whether a system prompt was injected into the request
42-
VSRSelectedDecision *config.Decision // The decision selected by DecisionEngine
43+
VSRSelectedDecision *config.Decision // The decision object selected by DecisionEngine (for plugins)
4344

4445
// Tracing context
4546
TraceContext context.Context // OpenTelemetry trace context for span propagation

src/semantic-router/pkg/extproc/processor_res_header.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ func (r *OpenAIRouter) handleResponseHeaders(v *ext_proc.ProcessingRequest_Respo
5454
if isSuccessful && !ctx.VSRCacheHit && ctx != nil {
5555
var setHeaders []*core.HeaderValueOption
5656

57-
// Add x-vsr-selected-category header
57+
// Add x-vsr-selected-category header (from domain classification)
5858
if ctx.VSRSelectedCategory != "" {
5959
setHeaders = append(setHeaders, &core.HeaderValueOption{
6060
Header: &core.HeaderValue{
@@ -64,6 +64,16 @@ func (r *OpenAIRouter) handleResponseHeaders(v *ext_proc.ProcessingRequest_Respo
6464
})
6565
}
6666

67+
// Add x-vsr-selected-decision header (from decision evaluation)
68+
if ctx.VSRSelectedDecisionName != "" {
69+
setHeaders = append(setHeaders, &core.HeaderValueOption{
70+
Header: &core.HeaderValue{
71+
Key: headers.VSRSelectedDecision,
72+
RawValue: []byte(ctx.VSRSelectedDecisionName),
73+
},
74+
})
75+
}
76+
6777
// Add x-vsr-selected-reasoning header
6878
if ctx.VSRReasoningMode != "" {
6979
setHeaders = append(setHeaders, &core.HeaderValueOption{

src/semantic-router/pkg/extproc/recorder.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,11 @@ func (r *OpenAIRouter) recordRoutingDecision(ctx *RequestContext, decisionName s
5757
}
5858

5959
// trackVSRDecision tracks VSR decision information in context
60-
func (r *OpenAIRouter) trackVSRDecision(ctx *RequestContext, decisionName string, matchedModel string, useReasoning bool) {
61-
ctx.VSRSelectedCategory = decisionName
60+
// categoryName: the category from domain classification (MMLU category)
61+
// decisionName: the decision name from DecisionEngine evaluation
62+
func (r *OpenAIRouter) trackVSRDecision(ctx *RequestContext, categoryName string, decisionName string, matchedModel string, useReasoning bool) {
63+
ctx.VSRSelectedCategory = categoryName
64+
ctx.VSRSelectedDecisionName = decisionName
6265
ctx.VSRSelectedModel = matchedModel
6366
if useReasoning {
6467
ctx.VSRReasoningMode = "on"

src/semantic-router/pkg/extproc/req_filter_classification.go

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,23 @@ func (r *OpenAIRouter) performDecisionEvaluationAndModelSelection(originalModel
5656
// Store the selected decision in context for later use (e.g., header mutations)
5757
ctx.VSRSelectedDecision = result.Decision
5858

59+
// Extract domain category from matched rules (for VSRSelectedCategory header)
60+
// MatchedRules contains rule names like "domain:math", "keyword:thinking", etc.
61+
// We extract the first domain rule as the category
62+
categoryName := ""
63+
for _, rule := range result.MatchedRules {
64+
if strings.HasPrefix(rule, "domain:") {
65+
categoryName = strings.TrimPrefix(rule, "domain:")
66+
break
67+
}
68+
}
69+
// Store category in context for response headers
70+
ctx.VSRSelectedCategory = categoryName
71+
5972
decisionName = result.Decision.Name
6073
evaluationConfidence = result.Confidence
61-
logging.Infof("Decision Evaluation Result: decision=%s, confidence=%.3f, matched_rules=%v",
62-
decisionName, evaluationConfidence, result.MatchedRules)
74+
logging.Infof("Decision Evaluation Result: decision=%s, category=%s, confidence=%.3f, matched_rules=%v",
75+
decisionName, categoryName, evaluationConfidence, result.MatchedRules)
6376

6477
// Select best model from the decision's ModelRefs
6578
if len(result.Decision.ModelRefs) > 0 {

src/semantic-router/pkg/headers/headers.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,16 @@ const (
2424
// Vector Semantic Router decision-making information for debugging and monitoring.
2525
// Headers are only added when the request is successful and did not hit the cache.
2626
const (
27-
// VSRSelectedCategory indicates the category selected by VSR during classification.
27+
// VSRSelectedCategory indicates the category selected by VSR during domain classification.
28+
// This comes from the domain classifier (MMLU categories).
2829
// Example values: "math", "business", "biology", "computer_science"
2930
VSRSelectedCategory = "x-vsr-selected-category"
3031

32+
// VSRSelectedDecision indicates the decision selected by VSR during decision evaluation.
33+
// This is the final routing decision made by the DecisionEngine.
34+
// Example values: "math_decision", "business_decision", "thinking_decision"
35+
VSRSelectedDecision = "x-vsr-selected-decision"
36+
3137
// VSRSelectedReasoning indicates whether reasoning mode was determined to be used.
3238
// Values: "on" (reasoning enabled) or "off" (reasoning disabled)
3339
VSRSelectedReasoning = "x-vsr-selected-reasoning"

0 commit comments

Comments
 (0)