diff --git a/config/config.yaml b/config/config.yaml index 354ac457..fadbf30a 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -96,6 +96,7 @@ categories: - name: business use_reasoning: false reasoning_description: "Business content is typically conversational" + reasoning_effort: low # Business conversations need low reasoning effort model_scores: - model: phi4 score: 0.8 @@ -136,6 +137,7 @@ categories: - name: chemistry use_reasoning: true reasoning_description: "Chemical reactions and formulas require systematic thinking" + reasoning_effort: high # Chemistry requires high reasoning effort model_scores: - model: mistral-small3.1 score: 0.8 @@ -186,6 +188,7 @@ categories: - name: math use_reasoning: true reasoning_description: "Mathematical problems require step-by-step reasoning" + reasoning_effort: high # Math problems need high reasoning effort model_scores: - model: phi4 score: 1.0 @@ -233,4 +236,5 @@ categories: score: 0.6 - model: phi4 score: 0.2 -default_model: mistral-small3.1 \ No newline at end of file +default_model: mistral-small3.1 +default_reasoning_effort: medium # Default reasoning effort level (low, medium, high) \ No newline at end of file diff --git a/src/semantic-router/pkg/config/config.go b/src/semantic-router/pkg/config/config.go index d44e4dde..fa7cc7f5 100644 --- a/src/semantic-router/pkg/config/config.go +++ b/src/semantic-router/pkg/config/config.go @@ -41,6 +41,9 @@ type RouterConfig struct { // Default LLM model to use if no match is found DefaultModel string `yaml:"default_model"` + // Default reasoning effort level (low, medium, high) when not specified per category + DefaultReasoningEffort string `yaml:"default_reasoning_effort,omitempty"` + // Semantic cache configuration SemanticCache SemanticCacheConfig `yaml:"semantic_cache"` @@ -217,6 +220,7 @@ type Category struct { Description string `yaml:"description,omitempty"` UseReasoning bool `yaml:"use_reasoning"` ReasoningDescription string `yaml:"reasoning_description,omitempty"` + ReasoningEffort string `yaml:"reasoning_effort,omitempty"` // Configurable reasoning effort level (low, medium, high) ModelScores []ModelScore `yaml:"model_scores"` } diff --git a/src/semantic-router/pkg/extproc/reason_mode_config_test.go b/src/semantic-router/pkg/extproc/reason_mode_config_test.go index ed64e3dc..2e8fc347 100644 --- a/src/semantic-router/pkg/extproc/reason_mode_config_test.go +++ b/src/semantic-router/pkg/extproc/reason_mode_config_test.go @@ -220,7 +220,7 @@ func TestAddReasoningModeToRequestBody(t *testing.T) { fmt.Printf("Original request body:\n%s\n\n", string(originalBody)) // Add reasoning mode - modifiedBody, err := router.setReasoningModeToRequestBody(originalBody, true) + modifiedBody, err := router.setReasoningModeToRequestBody(originalBody, true, "math") if err != nil { fmt.Printf("Error adding reasoning mode: %v\n", err) return @@ -273,7 +273,7 @@ func TestAddReasoningModeToRequestBody(t *testing.T) { return } - modifiedComplexBody, err := router.setReasoningModeToRequestBody(complexBody, true) + modifiedComplexBody, err := router.setReasoningModeToRequestBody(complexBody, true, "chemistry") if err != nil { fmt.Printf("Error adding reasoning mode to complex request: %v\n", err) return diff --git a/src/semantic-router/pkg/extproc/reason_mode_selector.go b/src/semantic-router/pkg/extproc/reason_mode_selector.go index a0cf153e..00b97792 100644 --- a/src/semantic-router/pkg/extproc/reason_mode_selector.go +++ b/src/semantic-router/pkg/extproc/reason_mode_selector.go @@ -9,13 +9,19 @@ import ( // shouldUseReasoningMode determines if reasoning mode should be enabled based on the query category func (r *OpenAIRouter) shouldUseReasoningMode(query string) bool { + enabled, _ := r.getReasoningModeAndCategory(query) + return enabled +} + +// getReasoningModeAndCategory determines if reasoning mode should be enabled and returns the category name +func (r *OpenAIRouter) getReasoningModeAndCategory(query string) (bool, string) { // Get the category for this query using the existing classification system categoryName := r.findCategoryForClassification(query) // If no category was determined (empty string), default to no reasoning if categoryName == "" { log.Printf("No category determined for query, defaulting to no reasoning mode") - return false + return false, "" } // Normalize category name for consistent lookup @@ -30,13 +36,13 @@ func (r *OpenAIRouter) shouldUseReasoningMode(query string) bool { } log.Printf("Reasoning mode decision: Category '%s' → %s", categoryName, reasoningStatus) - return category.UseReasoning + return category.UseReasoning, categoryName } } // If category not found in config, default to no reasoning log.Printf("Category '%s' not found in configuration, defaulting to no reasoning mode", categoryName) - return false + return false, categoryName } // getChatTemplateKwargs returns the appropriate chat template kwargs based on model and reasoning mode @@ -57,12 +63,12 @@ func getChatTemplateKwargs(model string, useReasoning bool) map[string]interface } } - // Default: no chat template kwargs + // Default: no chat template kwargs for unknown models return nil } // setReasoningModeToRequestBody adds chat_template_kwargs to the JSON request body -func (r *OpenAIRouter) setReasoningModeToRequestBody(requestBody []byte, enabled bool) ([]byte, error) { +func (r *OpenAIRouter) setReasoningModeToRequestBody(requestBody []byte, enabled bool, categoryName string) ([]byte, error) { // Parse the JSON request body var requestMap map[string]interface{} if err := json.Unmarshal(requestBody, &requestMap); err != nil { @@ -91,8 +97,9 @@ func (r *OpenAIRouter) setReasoningModeToRequestBody(requestBody []byte, enabled originalReasoningEffort = "low" } if enabled { - // TODO: make this configurable - requestMap["reasoning_effort"] = "high" + // Use configurable reasoning effort based on category + effort := r.getReasoningEffort(categoryName) + requestMap["reasoning_effort"] = effort } else { requestMap["reasoning_effort"] = originalReasoningEffort } @@ -170,3 +177,30 @@ func (r *OpenAIRouter) LogReasoningConfigurationSummary() { log.Printf("Reasoning mode summary: %d/%d categories have reasoning enabled", enabledCount, len(r.Config.Categories)) } + +// getReasoningEffort returns the reasoning effort level for a given category +func (r *OpenAIRouter) getReasoningEffort(categoryName string) string { + // Handle case where Config is nil (e.g., in tests) + if r.Config == nil { + return "medium" + } + + // Find the category configuration + for _, category := range r.Config.Categories { + if category.Name == categoryName { + // Use category-specific effort if configured + if category.ReasoningEffort != "" { + return category.ReasoningEffort + } + break + } + } + + // Fall back to global default if configured + if r.Config.DefaultReasoningEffort != "" { + return r.Config.DefaultReasoningEffort + } + + // Final fallback to "medium" as a reasonable default + return "medium" +} diff --git a/src/semantic-router/pkg/extproc/reasoning_integration_test.go b/src/semantic-router/pkg/extproc/reasoning_integration_test.go index dcfeb25c..d903acee 100644 --- a/src/semantic-router/pkg/extproc/reasoning_integration_test.go +++ b/src/semantic-router/pkg/extproc/reasoning_integration_test.go @@ -79,7 +79,7 @@ func TestReasoningModeIntegration(t *testing.T) { t.Fatalf("Failed to marshal original request: %v", err) } - modifiedBody, err := router.setReasoningModeToRequestBody(originalBody, true) + modifiedBody, err := router.setReasoningModeToRequestBody(originalBody, true, "math") if err != nil { t.Fatalf("Failed to add reasoning mode: %v", err) } @@ -130,7 +130,7 @@ func TestReasoningModeIntegration(t *testing.T) { t.Fatalf("Failed to marshal phi4 request: %v", err) } - modifiedBodyPhi4, err := router.setReasoningModeToRequestBody(originalBodyPhi4, true) + modifiedBodyPhi4, err := router.setReasoningModeToRequestBody(originalBodyPhi4, true, "math") if err != nil { t.Fatalf("Failed to process phi4 request: %v", err) } @@ -148,8 +148,8 @@ func TestReasoningModeIntegration(t *testing.T) { // But reasoning_effort should still be set if reasoningEffort, exists := modifiedRequestPhi4["reasoning_effort"]; !exists { t.Error("reasoning_effort should be set for phi4 model") - } else if reasoningEffort != "high" { - t.Errorf("Expected reasoning_effort: high for phi4 model, got %v", reasoningEffort) + } else if reasoningEffort != "medium" { + t.Errorf("Expected reasoning_effort: medium for phi4 model (default), got %v", reasoningEffort) } }) diff --git a/src/semantic-router/pkg/extproc/request_handler.go b/src/semantic-router/pkg/extproc/request_handler.go index 440189dc..09e40480 100644 --- a/src/semantic-router/pkg/extproc/request_handler.go +++ b/src/semantic-router/pkg/extproc/request_handler.go @@ -335,7 +335,7 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe log.Printf("Routing to model: %s", matchedModel) // Check reasoning mode for this category - useReasoning := r.shouldUseReasoningMode(userContent) + useReasoning, categoryName := r.getReasoningModeAndCategory(userContent) log.Printf("Reasoning mode decision for this query: %v on [%s] model", useReasoning, matchedModel) // Track the model load for the selected model @@ -366,7 +366,7 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe return nil, status.Errorf(codes.Internal, "error serializing modified request: %v", err) } - modifiedBody, err = r.setReasoningModeToRequestBody(modifiedBody, useReasoning) + modifiedBody, err = r.setReasoningModeToRequestBody(modifiedBody, useReasoning, categoryName) if err != nil { log.Printf("Error setting reasoning mode %v to request: %v", useReasoning, err) return nil, status.Errorf(codes.Internal, "error setting reasoning mode: %v", err)