Skip to content

Commit b88d605

Browse files
committed
feat: implement decision-based routing with plugin architecture
Signed-off-by: bitliu <[email protected]>
1 parent 84c25bf commit b88d605

File tree

3 files changed

+36
-93
lines changed

3 files changed

+36
-93
lines changed

src/semantic-router/pkg/extproc/processor_req_body.go

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -66,14 +66,14 @@ func (r *OpenAIRouter) handleRequestBody(v *ext_proc.ProcessingRequest_RequestBo
6666

6767
// Perform decision evaluation and model selection once at the beginning
6868
// Use decision-based routing if decisions are configured, otherwise fall back to category-based
69-
categoryName, classificationConfidence, reasoningDecision, selectedModel := r.performDecisionEvaluationAndModelSelection(originalModel, userContent, nonUserMessages, ctx)
69+
decisionName, classificationConfidence, reasoningDecision, selectedModel := r.performDecisionEvaluationAndModelSelection(originalModel, userContent, nonUserMessages, ctx)
7070

71-
// Perform security checks with category-specific settings
72-
if response, shouldReturn := r.performSecurityChecks(ctx, userContent, nonUserMessages, categoryName); shouldReturn {
71+
// Perform security checks with decision-specific settings
72+
if response, shouldReturn := r.performSecurityChecks(ctx, userContent, nonUserMessages, decisionName); shouldReturn {
7373
return response, nil
7474
}
7575

76-
// Perform PII detection and policy check (if PII policy is enabled for the category)
76+
// Perform PII detection and policy check (if PII policy is enabled for the decision)
7777
// For auto models: this may modify selectedModel if the initially selected model violates PII policy
7878
// For non-auto models: this checks if the specified model passes PII policy
7979
isAutoModel := r.Config != nil && r.Config.IsAutoModelName(originalModel)
@@ -83,7 +83,7 @@ func (r *OpenAIRouter) handleRequestBody(v *ext_proc.ProcessingRequest_RequestBo
8383
modelToCheck = originalModel
8484
}
8585

86-
allowedModel, piiResponse := r.performPIIDetection(ctx, userContent, nonUserMessages, categoryName, modelToCheck, isAutoModel)
86+
allowedModel, piiResponse := r.performPIIDetection(ctx, userContent, nonUserMessages, decisionName, modelToCheck, isAutoModel)
8787
if piiResponse != nil {
8888
// PII policy violation - return error response
8989
return piiResponse, nil
@@ -93,18 +93,18 @@ func (r *OpenAIRouter) handleRequestBody(v *ext_proc.ProcessingRequest_RequestBo
9393
selectedModel = allowedModel
9494
}
9595

96-
// Handle caching with category-specific settings
97-
if response, shouldReturn := r.handleCaching(ctx, categoryName); shouldReturn {
96+
// Handle caching with decision-specific settings
97+
if response, shouldReturn := r.handleCaching(ctx, decisionName); shouldReturn {
9898
return response, nil
9999
}
100100

101101
// Handle model selection and routing with pre-computed classification results and selected model
102-
return r.handleModelRouting(openAIRequest, originalModel, categoryName, classificationConfidence, reasoningDecision, selectedModel, ctx)
102+
return r.handleModelRouting(openAIRequest, originalModel, decisionName, classificationConfidence, reasoningDecision, selectedModel, ctx)
103103
}
104104

105105
// handleModelRouting handles model selection and routing logic
106-
// categoryName, classificationConfidence, reasoningDecision, and selectedModel are pre-computed from ProcessRequest
107-
func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNewParams, originalModel string, categoryName string, classificationConfidence float64, reasoningDecision entropy.ReasoningDecision, selectedModel string, ctx *RequestContext) (*ext_proc.ProcessingResponse, error) {
106+
// decisionName, classificationConfidence, reasoningDecision, and selectedModel are pre-computed from ProcessRequest
107+
func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNewParams, originalModel string, decisionName string, classificationConfidence float64, reasoningDecision entropy.ReasoningDecision, selectedModel string, ctx *RequestContext) (*ext_proc.ProcessingResponse, error) {
108108
response := &ext_proc.ProcessingResponse{
109109
Response: &ext_proc.ProcessingResponse_RequestBody{
110110
RequestBody: &ext_proc.BodyResponse{
@@ -118,7 +118,7 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
118118
isAutoModel := r.Config != nil && r.Config.IsAutoModelName(originalModel)
119119

120120
if isAutoModel && selectedModel != "" {
121-
return r.handleAutoModelRouting(openAIRequest, originalModel, categoryName, reasoningDecision, selectedModel, ctx, response)
121+
return r.handleAutoModelRouting(openAIRequest, originalModel, decisionName, reasoningDecision, selectedModel, ctx, response)
122122
} else if !isAutoModel {
123123
return r.handleSpecifiedModelRouting(openAIRequest, originalModel, ctx)
124124
}
@@ -129,9 +129,9 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
129129
}
130130

131131
// handleAutoModelRouting handles routing for auto model selection
132-
func (r *OpenAIRouter) handleAutoModelRouting(openAIRequest *openai.ChatCompletionNewParams, originalModel string, categoryName string, reasoningDecision entropy.ReasoningDecision, selectedModel string, ctx *RequestContext, response *ext_proc.ProcessingResponse) (*ext_proc.ProcessingResponse, error) {
133-
logging.Infof("Using Auto Model Selection (model=%s), category=%s, selected=%s",
134-
originalModel, categoryName, selectedModel)
132+
func (r *OpenAIRouter) handleAutoModelRouting(openAIRequest *openai.ChatCompletionNewParams, originalModel string, decisionName string, reasoningDecision entropy.ReasoningDecision, selectedModel string, ctx *RequestContext, response *ext_proc.ProcessingResponse) (*ext_proc.ProcessingResponse, error) {
133+
logging.Infof("Using Auto Model Selection (model=%s), decision=%s, selected=%s",
134+
originalModel, decisionName, selectedModel)
135135

136136
matchedModel := selectedModel
137137

@@ -142,10 +142,10 @@ func (r *OpenAIRouter) handleAutoModelRouting(openAIRequest *openai.ChatCompleti
142142
}
143143

144144
// Record routing decision with tracing
145-
r.recordRoutingDecision(ctx, categoryName, originalModel, matchedModel, reasoningDecision)
145+
r.recordRoutingDecision(ctx, decisionName, originalModel, matchedModel, reasoningDecision)
146146

147147
// Track VSR decision information
148-
r.trackVSRDecision(ctx, categoryName, matchedModel, reasoningDecision.UseReasoning)
148+
r.trackVSRDecision(ctx, decisionName, matchedModel, reasoningDecision.UseReasoning)
149149

150150
// Track model routing metrics
151151
metrics.RecordModelRouting(originalModel, matchedModel)
@@ -154,7 +154,7 @@ func (r *OpenAIRouter) handleAutoModelRouting(openAIRequest *openai.ChatCompleti
154154
selectedEndpoint := r.selectEndpointForModel(ctx, matchedModel)
155155

156156
// Modify request body with new model, reasoning mode, and system prompt
157-
modifiedBody, err := r.modifyRequestBodyForAutoRouting(openAIRequest, matchedModel, categoryName, reasoningDecision.UseReasoning, ctx)
157+
modifiedBody, err := r.modifyRequestBodyForAutoRouting(openAIRequest, matchedModel, decisionName, reasoningDecision.UseReasoning, ctx)
158158
if err != nil {
159159
return nil, err
160160
}
@@ -163,7 +163,7 @@ func (r *OpenAIRouter) handleAutoModelRouting(openAIRequest *openai.ChatCompleti
163163
response = r.createRoutingResponse(matchedModel, selectedEndpoint, modifiedBody, ctx)
164164

165165
// Log routing decision
166-
r.logRoutingDecision(ctx, "auto_routing", originalModel, matchedModel, categoryName, reasoningDecision.UseReasoning, selectedEndpoint)
166+
r.logRoutingDecision(ctx, "auto_routing", originalModel, matchedModel, decisionName, reasoningDecision.UseReasoning, selectedEndpoint)
167167

168168
// Handle route cache clearing
169169
if r.shouldClearRouteCache() {
@@ -242,7 +242,7 @@ func (r *OpenAIRouter) selectEndpointForModel(ctx *RequestContext, model string)
242242
}
243243

244244
// modifyRequestBodyForAutoRouting modifies the request body for auto routing
245-
func (r *OpenAIRouter) modifyRequestBodyForAutoRouting(openAIRequest *openai.ChatCompletionNewParams, matchedModel string, categoryName string, useReasoning bool, ctx *RequestContext) ([]byte, error) {
245+
func (r *OpenAIRouter) modifyRequestBodyForAutoRouting(openAIRequest *openai.ChatCompletionNewParams, matchedModel string, decisionName string, useReasoning bool, ctx *RequestContext) ([]byte, error) {
246246
// Modify the model in the request
247247
openAIRequest.Model = matchedModel
248248

@@ -254,19 +254,19 @@ func (r *OpenAIRouter) modifyRequestBodyForAutoRouting(openAIRequest *openai.Cha
254254
return nil, status.Errorf(codes.Internal, "error serializing modified request: %v", err)
255255
}
256256

257-
if categoryName == "" {
257+
if decisionName == "" {
258258
return modifiedBody, nil
259259
}
260260
// Set reasoning mode
261-
modifiedBody, err = r.setReasoningModeToRequestBody(modifiedBody, useReasoning, categoryName)
261+
modifiedBody, err = r.setReasoningModeToRequestBody(modifiedBody, useReasoning, decisionName)
262262
if err != nil {
263263
logging.Errorf("Error setting reasoning mode %v to request: %v", useReasoning, err)
264264
metrics.RecordRequestError(matchedModel, "serialization_error")
265265
return nil, status.Errorf(codes.Internal, "error setting reasoning mode: %v", err)
266266
}
267267

268-
// Add category-specific system prompt if configured
269-
modifiedBody, err = r.addSystemPromptIfConfigured(modifiedBody, categoryName, matchedModel, ctx)
268+
// Add decision-specific system prompt if configured
269+
modifiedBody, err = r.addSystemPromptIfConfigured(modifiedBody, decisionName, matchedModel, ctx)
270270
if err != nil {
271271
return nil, err
272272
}

src/semantic-router/pkg/extproc/recorder.go

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,18 @@ import (
1313
)
1414

1515
// logRoutingDecision logs routing decision with structured logging
16-
func (r *OpenAIRouter) logRoutingDecision(ctx *RequestContext, reasonCode string, originalModel string, selectedModel string, categoryName string, reasoningEnabled bool, endpoint string) {
16+
func (r *OpenAIRouter) logRoutingDecision(ctx *RequestContext, reasonCode string, originalModel string, selectedModel string, decisionName string, reasoningEnabled bool, endpoint string) {
1717
effortForMetrics := ""
18-
if reasoningEnabled && categoryName != "" {
19-
effortForMetrics = r.getReasoningEffort(categoryName, selectedModel)
18+
if reasoningEnabled && decisionName != "" {
19+
effortForMetrics = r.getReasoningEffort(decisionName, selectedModel)
2020
}
2121

2222
logging.LogEvent("routing_decision", map[string]interface{}{
2323
"reason_code": reasonCode,
2424
"request_id": ctx.RequestID,
2525
"original_model": originalModel,
2626
"selected_model": selectedModel,
27-
"category": categoryName,
27+
"decision": decisionName,
2828
"reasoning_enabled": reasoningEnabled,
2929
"reasoning_effort": effortForMetrics,
3030
"selected_endpoint": endpoint,
@@ -34,15 +34,15 @@ func (r *OpenAIRouter) logRoutingDecision(ctx *RequestContext, reasonCode string
3434
}
3535

3636
// recordRoutingDecision records routing decision with tracing
37-
func (r *OpenAIRouter) recordRoutingDecision(ctx *RequestContext, categoryName string, originalModel string, matchedModel string, reasoningDecision entropy.ReasoningDecision) {
37+
func (r *OpenAIRouter) recordRoutingDecision(ctx *RequestContext, decisionName string, originalModel string, matchedModel string, reasoningDecision entropy.ReasoningDecision) {
3838
routingCtx, routingSpan := tracing.StartSpan(ctx.TraceContext, tracing.SpanRoutingDecision)
3939

4040
useReasoning := reasoningDecision.UseReasoning
4141
logging.Infof("Entropy-based reasoning decision for this query: %v on [%s] model (confidence: %.3f, reason: %s)",
4242
useReasoning, matchedModel, reasoningDecision.Confidence, reasoningDecision.DecisionReason)
4343

44-
effortForMetrics := r.getReasoningEffort(categoryName, matchedModel)
45-
metrics.RecordReasoningDecision(categoryName, matchedModel, useReasoning, effortForMetrics)
44+
effortForMetrics := r.getReasoningEffort(decisionName, matchedModel)
45+
metrics.RecordReasoningDecision(decisionName, matchedModel, useReasoning, effortForMetrics)
4646

4747
tracing.SetSpanAttributes(routingSpan,
4848
attribute.String(tracing.AttrRoutingStrategy, "auto"),
@@ -57,8 +57,8 @@ func (r *OpenAIRouter) recordRoutingDecision(ctx *RequestContext, categoryName s
5757
}
5858

5959
// trackVSRDecision tracks VSR decision information in context
60-
func (r *OpenAIRouter) trackVSRDecision(ctx *RequestContext, categoryName string, matchedModel string, useReasoning bool) {
61-
ctx.VSRSelectedCategory = categoryName
60+
func (r *OpenAIRouter) trackVSRDecision(ctx *RequestContext, decisionName string, matchedModel string, useReasoning bool) {
61+
ctx.VSRSelectedCategory = decisionName
6262
ctx.VSRSelectedModel = matchedModel
6363
if useReasoning {
6464
ctx.VSRReasoningMode = "on"

src/semantic-router/pkg/extproc/req_filter_classification.go

Lines changed: 4 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -7,63 +7,6 @@ import (
77
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/utils/entropy"
88
)
99

10-
// extractUserAndNonUserContent extracts user and non-user messages from the request
11-
12-
// performClassificationAndModelSelection performs classification and model selection once
13-
// Returns (categoryName, confidence, reasoningDecision, selectedModel)
14-
func (r *OpenAIRouter) performClassificationAndModelSelection(originalModel string, userContent string, nonUserMessages []string) (string, float64, entropy.ReasoningDecision, string) {
15-
var categoryName string
16-
var classificationConfidence float64
17-
var reasoningDecision entropy.ReasoningDecision
18-
var selectedModel string
19-
20-
// Only perform classification for auto models with content
21-
if !r.Config.IsAutoModelName(originalModel) {
22-
return "", 0.0, entropy.ReasoningDecision{}, ""
23-
}
24-
25-
if len(nonUserMessages) == 0 && userContent == "" {
26-
return "", 0.0, entropy.ReasoningDecision{}, ""
27-
}
28-
29-
// Determine text to use for classification
30-
classificationText := userContent
31-
if classificationText == "" && len(nonUserMessages) > 0 {
32-
classificationText = strings.Join(nonUserMessages, " ")
33-
}
34-
35-
if classificationText == "" {
36-
return "", 0.0, entropy.ReasoningDecision{}, ""
37-
}
38-
39-
// Perform entropy-based classification once
40-
catName, confidence, reasoningDec, err := r.Classifier.ClassifyCategoryWithEntropy(classificationText)
41-
if err != nil {
42-
logging.Errorf("Entropy-based classification error: %v, using empty category", err)
43-
categoryName = ""
44-
classificationConfidence = 0.0
45-
reasoningDecision = entropy.ReasoningDecision{}
46-
} else {
47-
categoryName = catName
48-
classificationConfidence = confidence
49-
reasoningDecision = reasoningDec
50-
logging.Infof("Classification Result: category=%s, confidence=%.3f, reasoning=%v",
51-
categoryName, classificationConfidence, reasoningDecision.UseReasoning)
52-
}
53-
54-
// Select best model for this category
55-
if categoryName != "" {
56-
selectedModel = r.Classifier.SelectBestModelForCategory(categoryName)
57-
logging.Infof("Selected model for category %s: %s", categoryName, selectedModel)
58-
} else {
59-
// No category found, use default model
60-
selectedModel = r.Config.DefaultModel
61-
logging.Infof("No category classified, using default model: %s", selectedModel)
62-
}
63-
64-
return categoryName, classificationConfidence, reasoningDecision, selectedModel
65-
}
66-
6710
// performDecisionEvaluationAndModelSelection performs decision evaluation using DecisionEngine
6811
// Returns (decisionName, confidence, reasoningDecision, selectedModel)
6912
// This is the new approach that uses Decision-based routing with AND/OR rule combinations
@@ -84,8 +27,8 @@ func (r *OpenAIRouter) performDecisionEvaluationAndModelSelection(originalModel
8427

8528
// Check if decisions are configured
8629
if len(r.Config.Decisions) == 0 {
87-
logging.Warnf("No decisions configured, falling back to legacy classification")
88-
return r.performClassificationAndModelSelection(originalModel, userContent, nonUserMessages)
30+
logging.Warnf("No decisions configured, using default model")
31+
return "", 0.0, entropy.ReasoningDecision{}, r.Config.DefaultModel
8932
}
9033

9134
// Determine text to use for evaluation
@@ -101,8 +44,8 @@ func (r *OpenAIRouter) performDecisionEvaluationAndModelSelection(originalModel
10144
// Perform decision evaluation using DecisionEngine
10245
result, err := r.Classifier.EvaluateDecisionWithEngine(evaluationText)
10346
if err != nil {
104-
logging.Errorf("Decision evaluation error: %v, falling back to legacy classification", err)
105-
return r.performClassificationAndModelSelection(originalModel, userContent, nonUserMessages)
47+
logging.Errorf("Decision evaluation error: %v, using default model", err)
48+
return "", 0.0, entropy.ReasoningDecision{}, r.Config.DefaultModel
10649
}
10750

10851
if result == nil || result.Decision == nil {

0 commit comments

Comments
 (0)