Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ categories:
- name: business
use_reasoning: false
reasoning_description: "Business content is typically conversational"
reasoning_effort: low # Business conversations need low reasoning effort
model_scores:
- model: phi4
score: 0.8
Expand Down Expand Up @@ -136,6 +137,7 @@ categories:
- name: chemistry
use_reasoning: true
reasoning_description: "Chemical reactions and formulas require systematic thinking"
reasoning_effort: high # Chemistry requires high reasoning effort
model_scores:
- model: mistral-small3.1
score: 0.8
Expand Down Expand Up @@ -186,6 +188,7 @@ categories:
- name: math
use_reasoning: true
reasoning_description: "Mathematical problems require step-by-step reasoning"
reasoning_effort: high # Math problems need high reasoning effort
model_scores:
- model: phi4
score: 1.0
Expand Down Expand Up @@ -233,4 +236,5 @@ categories:
score: 0.6
- model: phi4
score: 0.2
default_model: mistral-small3.1
default_model: mistral-small3.1
default_reasoning_effort: medium # Default reasoning effort level (low, medium, high)
4 changes: 4 additions & 0 deletions src/semantic-router/pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ type RouterConfig struct {
// Default LLM model to use if no match is found
DefaultModel string `yaml:"default_model"`

// Default reasoning effort level (low, medium, high) when not specified per category
DefaultReasoningEffort string `yaml:"default_reasoning_effort,omitempty"`

// Semantic cache configuration
SemanticCache SemanticCacheConfig `yaml:"semantic_cache"`

Expand Down Expand Up @@ -217,6 +220,7 @@ type Category struct {
Description string `yaml:"description,omitempty"`
UseReasoning bool `yaml:"use_reasoning"`
ReasoningDescription string `yaml:"reasoning_description,omitempty"`
ReasoningEffort string `yaml:"reasoning_effort,omitempty"` // Configurable reasoning effort level (low, medium, high)
ModelScores []ModelScore `yaml:"model_scores"`
}

Expand Down
4 changes: 2 additions & 2 deletions src/semantic-router/pkg/extproc/reason_mode_config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ func TestAddReasoningModeToRequestBody(t *testing.T) {
fmt.Printf("Original request body:\n%s\n\n", string(originalBody))

// Add reasoning mode
modifiedBody, err := router.setReasoningModeToRequestBody(originalBody, true)
modifiedBody, err := router.setReasoningModeToRequestBody(originalBody, true, "math")
if err != nil {
fmt.Printf("Error adding reasoning mode: %v\n", err)
return
Expand Down Expand Up @@ -273,7 +273,7 @@ func TestAddReasoningModeToRequestBody(t *testing.T) {
return
}

modifiedComplexBody, err := router.setReasoningModeToRequestBody(complexBody, true)
modifiedComplexBody, err := router.setReasoningModeToRequestBody(complexBody, true, "chemistry")
if err != nil {
fmt.Printf("Error adding reasoning mode to complex request: %v\n", err)
return
Expand Down
48 changes: 41 additions & 7 deletions src/semantic-router/pkg/extproc/reason_mode_selector.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,19 @@ import (

// shouldUseReasoningMode determines if reasoning mode should be enabled based on the query category
func (r *OpenAIRouter) shouldUseReasoningMode(query string) bool {
enabled, _ := r.getReasoningModeAndCategory(query)
return enabled
}

// getReasoningModeAndCategory determines if reasoning mode should be enabled and returns the category name
func (r *OpenAIRouter) getReasoningModeAndCategory(query string) (bool, string) {
// Get the category for this query using the existing classification system
categoryName := r.findCategoryForClassification(query)

// If no category was determined (empty string), default to no reasoning
if categoryName == "" {
log.Printf("No category determined for query, defaulting to no reasoning mode")
return false
return false, ""
}

// Normalize category name for consistent lookup
Expand All @@ -30,13 +36,13 @@ func (r *OpenAIRouter) shouldUseReasoningMode(query string) bool {
}
log.Printf("Reasoning mode decision: Category '%s' → %s",
categoryName, reasoningStatus)
return category.UseReasoning
return category.UseReasoning, categoryName
}
}

// If category not found in config, default to no reasoning
log.Printf("Category '%s' not found in configuration, defaulting to no reasoning mode", categoryName)
return false
return false, categoryName
}

// getChatTemplateKwargs returns the appropriate chat template kwargs based on model and reasoning mode
Expand All @@ -57,12 +63,12 @@ func getChatTemplateKwargs(model string, useReasoning bool) map[string]interface
}
}

// Default: no chat template kwargs
// Default: no chat template kwargs for unknown models
return nil
}

// setReasoningModeToRequestBody adds chat_template_kwargs to the JSON request body
func (r *OpenAIRouter) setReasoningModeToRequestBody(requestBody []byte, enabled bool) ([]byte, error) {
func (r *OpenAIRouter) setReasoningModeToRequestBody(requestBody []byte, enabled bool, categoryName string) ([]byte, error) {
// Parse the JSON request body
var requestMap map[string]interface{}
if err := json.Unmarshal(requestBody, &requestMap); err != nil {
Expand Down Expand Up @@ -91,8 +97,9 @@ func (r *OpenAIRouter) setReasoningModeToRequestBody(requestBody []byte, enabled
originalReasoningEffort = "low"
}
if enabled {
// TODO: make this configurable
requestMap["reasoning_effort"] = "high"
// Use configurable reasoning effort based on category
effort := r.getReasoningEffort(categoryName)
requestMap["reasoning_effort"] = effort
} else {
requestMap["reasoning_effort"] = originalReasoningEffort
}
Expand Down Expand Up @@ -170,3 +177,30 @@ func (r *OpenAIRouter) LogReasoningConfigurationSummary() {

log.Printf("Reasoning mode summary: %d/%d categories have reasoning enabled", enabledCount, len(r.Config.Categories))
}

// getReasoningEffort returns the reasoning effort level for a given category
func (r *OpenAIRouter) getReasoningEffort(categoryName string) string {
// Handle case where Config is nil (e.g., in tests)
if r.Config == nil {
return "medium"
}

// Find the category configuration
for _, category := range r.Config.Categories {
if category.Name == categoryName {
// Use category-specific effort if configured
if category.ReasoningEffort != "" {
return category.ReasoningEffort
}
break
}
}

// Fall back to global default if configured
if r.Config.DefaultReasoningEffort != "" {
return r.Config.DefaultReasoningEffort
}

// Final fallback to "medium" as a reasonable default
return "medium"
}
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ func TestReasoningModeIntegration(t *testing.T) {
t.Fatalf("Failed to marshal original request: %v", err)
}

modifiedBody, err := router.setReasoningModeToRequestBody(originalBody, true)
modifiedBody, err := router.setReasoningModeToRequestBody(originalBody, true, "math")
if err != nil {
t.Fatalf("Failed to add reasoning mode: %v", err)
}
Expand Down Expand Up @@ -130,7 +130,7 @@ func TestReasoningModeIntegration(t *testing.T) {
t.Fatalf("Failed to marshal phi4 request: %v", err)
}

modifiedBodyPhi4, err := router.setReasoningModeToRequestBody(originalBodyPhi4, true)
modifiedBodyPhi4, err := router.setReasoningModeToRequestBody(originalBodyPhi4, true, "math")
if err != nil {
t.Fatalf("Failed to process phi4 request: %v", err)
}
Expand All @@ -148,8 +148,8 @@ func TestReasoningModeIntegration(t *testing.T) {
// But reasoning_effort should still be set
if reasoningEffort, exists := modifiedRequestPhi4["reasoning_effort"]; !exists {
t.Error("reasoning_effort should be set for phi4 model")
} else if reasoningEffort != "high" {
t.Errorf("Expected reasoning_effort: high for phi4 model, got %v", reasoningEffort)
} else if reasoningEffort != "medium" {
t.Errorf("Expected reasoning_effort: medium for phi4 model (default), got %v", reasoningEffort)
}
})

Expand Down
4 changes: 2 additions & 2 deletions src/semantic-router/pkg/extproc/request_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
log.Printf("Routing to model: %s", matchedModel)

// Check reasoning mode for this category
useReasoning := r.shouldUseReasoningMode(userContent)
useReasoning, categoryName := r.getReasoningModeAndCategory(userContent)
log.Printf("Reasoning mode decision for this query: %v on [%s] model", useReasoning, matchedModel)

// Track the model load for the selected model
Expand Down Expand Up @@ -366,7 +366,7 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
return nil, status.Errorf(codes.Internal, "error serializing modified request: %v", err)
}

modifiedBody, err = r.setReasoningModeToRequestBody(modifiedBody, useReasoning)
modifiedBody, err = r.setReasoningModeToRequestBody(modifiedBody, useReasoning, categoryName)
if err != nil {
log.Printf("Error setting reasoning mode %v to request: %v", useReasoning, err)
return nil, status.Errorf(codes.Internal, "error setting reasoning mode: %v", err)
Expand Down
Loading