diff --git a/config/config.yaml b/config/config.yaml index 3b74778a..fe41998b 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -57,6 +57,28 @@ model_config: pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] # Only allow these specific PII types # Specify which endpoints can serve this model (optional - if not specified, uses all endpoints that list this model) preferred_endpoints: ["endpoint1", "endpoint3"] + # Reasoning family - phi4 doesn't support reasoning, so omit this field + + # Example: DeepSeek model with custom name + "ds-v31-custom": + reasoning_family: "deepseek" # This model uses DeepSeek reasoning syntax + preferred_endpoints: ["endpoint1"] + pii_policy: + allow_by_default: true + + # Example: Qwen3 model with custom name + "my-qwen3-model": + reasoning_family: "qwen3" # This model uses Qwen3 reasoning syntax + preferred_endpoints: ["endpoint2"] + pii_policy: + allow_by_default: true + + # Example: GPT-OSS model with custom name + "custom-gpt-oss": + reasoning_family: "gpt-oss" # This model uses GPT-OSS reasoning syntax + preferred_endpoints: ["endpoint1"] + pii_policy: + allow_by_default: true gemma3:27b: pricing: currency: USD @@ -236,7 +258,6 @@ categories: - model: phi4 score: 0.2 default_model: mistral-small3.1 -default_reasoning_effort: medium # Default reasoning effort level (low, medium, high) # API Configuration api: @@ -253,4 +274,25 @@ api: sample_rate: 1.0 # Collect metrics for all requests (1.0 = 100%, 0.5 = 50%) # Histogram buckets for metrics (directly configure what you need) duration_buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30] - size_buckets: [1, 2, 5, 10, 20, 50, 100, 200] + size_buckets: [1, 2, 5, 10, 20, 50, 100, 200] + +# Reasoning family configurations - define how different model families handle reasoning syntax +reasoning_families: + deepseek: + type: "chat_template_kwargs" + parameter: "thinking" + + qwen3: + type: "chat_template_kwargs" + parameter: "enable_thinking" + + gpt-oss: + type: "reasoning_effort" + parameter: "reasoning_effort" + + gpt: + type: "reasoning_effort" + parameter: "reasoning_effort" + +# Global default reasoning effort level +default_reasoning_effort: medium # Default reasoning effort level (low, medium, high) diff --git a/src/semantic-router/pkg/config/config.go b/src/semantic-router/pkg/config/config.go index 8986263c..6c54ba52 100644 --- a/src/semantic-router/pkg/config/config.go +++ b/src/semantic-router/pkg/config/config.go @@ -45,6 +45,9 @@ type RouterConfig struct { // Default reasoning effort level (low, medium, high) when not specified per category DefaultReasoningEffort string `yaml:"default_reasoning_effort,omitempty"` + // Reasoning family configurations to define how different model families handle reasoning syntax + ReasoningFamilies map[string]ReasoningFamilyConfig `yaml:"reasoning_families,omitempty"` + // Semantic cache configuration SemanticCache SemanticCacheConfig `yaml:"semantic_cache"` @@ -208,6 +211,16 @@ type ModelParams struct { // Optional pricing used for cost computation Pricing ModelPricing `yaml:"pricing,omitempty"` + + // Reasoning family for this model (e.g., "deepseek", "qwen3", "gpt-oss") + // If empty, the model doesn't support reasoning mode + ReasoningFamily string `yaml:"reasoning_family,omitempty"` +} + +// ReasoningFamilyConfig defines how a reasoning family handles reasoning mode +type ReasoningFamilyConfig struct { + Type string `yaml:"type"` // "chat_template_kwargs" or "reasoning_effort" + Parameter string `yaml:"parameter"` // "thinking", "enable_thinking", "reasoning_effort", etc. } // PIIPolicy represents the PII (Personally Identifiable Information) policy for a model @@ -264,6 +277,41 @@ type Category struct { ModelScores []ModelScore `yaml:"model_scores"` } +// Legacy types - can be removed once migration is complete + +// GetModelReasoningFamily returns the reasoning family configuration for a given model name +func (rc *RouterConfig) GetModelReasoningFamily(modelName string) *ReasoningFamilyConfig { + if rc == nil || rc.ModelConfig == nil || rc.ReasoningFamilies == nil { + return nil + } + + // Look up the model in model_config + modelParams, exists := rc.ModelConfig[modelName] + if !exists || modelParams.ReasoningFamily == "" { + return nil + } + + // Look up the reasoning family configuration + familyConfig, exists := rc.ReasoningFamilies[modelParams.ReasoningFamily] + if !exists { + return nil + } + + return &familyConfig +} + +// Legacy functions - can be removed once migration is complete + +// contains checks if a slice contains a string +func contains(slice []string, item string) bool { + for _, s := range slice { + if s == item { + return true + } + } + return false +} + var ( config *RouterConfig configOnce sync.Once diff --git a/src/semantic-router/pkg/extproc/reason_mode_config_test.go b/src/semantic-router/pkg/extproc/reason_mode_config_test.go index 2e8fc347..e58b7560 100644 --- a/src/semantic-router/pkg/extproc/reason_mode_config_test.go +++ b/src/semantic-router/pkg/extproc/reason_mode_config_test.go @@ -199,10 +199,42 @@ requestBody := buildRequestBody(model, messages, useReasoning, stream) func TestAddReasoningModeToRequestBody(t *testing.T) { fmt.Println("=== Testing addReasoningModeToRequestBody Function ===") - // Create a mock router with minimal config - router := &OpenAIRouter{} + // Create a mock router with family-based reasoning config + router := &OpenAIRouter{ + Config: &config.RouterConfig{ + DefaultReasoningEffort: "medium", + ReasoningFamilies: map[string]config.ReasoningFamilyConfig{ + "deepseek": { + Type: "chat_template_kwargs", + Parameter: "thinking", + }, + "qwen3": { + Type: "chat_template_kwargs", + Parameter: "enable_thinking", + }, + "gpt-oss": { + Type: "reasoning_effort", + Parameter: "reasoning_effort", + }, + }, + ModelConfig: map[string]config.ModelParams{ + "deepseek-v31": { + ReasoningFamily: "deepseek", + }, + "qwen3-model": { + ReasoningFamily: "qwen3", + }, + "gpt-oss-model": { + ReasoningFamily: "gpt-oss", + }, + "phi4": { + // No reasoning family - doesn't support reasoning + }, + }, + }, + } - // Test case 1: Basic request body + // Test case 1: Basic request body with model that has NO reasoning support (phi4) originalRequest := map[string]interface{}{ "model": "phi4", "messages": []map[string]interface{}{ @@ -235,29 +267,76 @@ func TestAddReasoningModeToRequestBody(t *testing.T) { return } - // Check if chat_template_kwargs was added - if chatTemplateKwargs, exists := modifiedRequest["chat_template_kwargs"]; exists { + // Check that chat_template_kwargs was NOT added for phi4 (since it has no reasoning_family) + if _, exists := modifiedRequest["chat_template_kwargs"]; exists { + fmt.Println("ERROR: chat_template_kwargs should not be added for phi4 (no reasoning family configured)") + } else { + fmt.Println("SUCCESS: chat_template_kwargs correctly not added for phi4 (no reasoning support)") + } + + // Check that reasoning_effort was NOT added for phi4 + if _, exists := modifiedRequest["reasoning_effort"]; exists { + fmt.Println("ERROR: reasoning_effort should not be added for phi4 (no reasoning family configured)") + } else { + fmt.Println("SUCCESS: reasoning_effort correctly not added for phi4 (no reasoning support)") + } + + // Test case 2: Request with model that HAS reasoning support (deepseek-v31) + fmt.Println("\n--- Test Case 2: Model with reasoning support ---") + deepseekRequest := map[string]interface{}{ + "model": "deepseek-v31", + "messages": []map[string]interface{}{ + {"role": "user", "content": "What is 2 + 2?"}, + }, + "stream": false, + } + + deepseekBody, err := json.Marshal(deepseekRequest) + if err != nil { + fmt.Printf("Error marshaling deepseek request: %v\n", err) + return + } + + fmt.Printf("Original deepseek request:\n%s\n\n", string(deepseekBody)) + + // Add reasoning mode to DeepSeek model + modifiedDeepseekBody, err := router.setReasoningModeToRequestBody(deepseekBody, true, "math") + if err != nil { + fmt.Printf("Error adding reasoning mode to deepseek: %v\n", err) + return + } + + fmt.Printf("Modified deepseek request with reasoning:\n%s\n\n", string(modifiedDeepseekBody)) + + var modifiedDeepseekRequest map[string]interface{} + if err := json.Unmarshal(modifiedDeepseekBody, &modifiedDeepseekRequest); err != nil { + fmt.Printf("Error unmarshaling modified deepseek request: %v\n", err) + return + } + + // Check that chat_template_kwargs WAS added for deepseek-v31 + if chatTemplateKwargs, exists := modifiedDeepseekRequest["chat_template_kwargs"]; exists { if kwargs, ok := chatTemplateKwargs.(map[string]interface{}); ok { if thinking, hasThinking := kwargs["thinking"]; hasThinking { if thinkingBool, isBool := thinking.(bool); isBool && thinkingBool { - fmt.Println("✅ SUCCESS: chat_template_kwargs with thinking: true was correctly added") + fmt.Println("SUCCESS: chat_template_kwargs with thinking: true correctly added for deepseek-v31") } else { - fmt.Printf("❌ ERROR: thinking value is not true, got: %v\n", thinking) + fmt.Printf("ERROR: thinking value is not true for deepseek-v31, got: %v\n", thinking) } } else { - fmt.Println("❌ ERROR: thinking field not found in chat_template_kwargs") + fmt.Println("ERROR: thinking field not found in chat_template_kwargs for deepseek-v31") } } else { - fmt.Printf("❌ ERROR: chat_template_kwargs is not a map, got: %T\n", chatTemplateKwargs) + fmt.Printf("ERROR: chat_template_kwargs is not a map for deepseek-v31, got: %T\n", chatTemplateKwargs) } } else { - fmt.Println("❌ ERROR: chat_template_kwargs not found in modified request") + fmt.Println("ERROR: chat_template_kwargs not found for deepseek-v31 (should be present)") } - // Test case 2: Request with existing fields - fmt.Println("\n--- Test Case 2: Request with existing fields ---") + // Test case 3: Request with existing fields + fmt.Println("\n--- Test Case 3: Request with existing fields ---") complexRequest := map[string]interface{}{ - "model": "phi4", + "model": "deepseek-v31", "messages": []map[string]interface{}{ {"role": "system", "content": "You are a helpful assistant"}, {"role": "user", "content": "Solve x^2 + 5x + 6 = 0"}, @@ -290,20 +369,20 @@ func TestAddReasoningModeToRequestBody(t *testing.T) { allFieldsPreserved := true for _, field := range originalFields { if _, exists := modifiedComplexRequest[field]; !exists { - fmt.Printf("❌ ERROR: Original field '%s' was lost\n", field) + fmt.Printf("ERROR: Original field '%s' was lost\n", field) allFieldsPreserved = false } } if allFieldsPreserved { - fmt.Println("✅ SUCCESS: All original fields preserved") + fmt.Println("SUCCESS: All original fields preserved") } - // Verify chat_template_kwargs was added + // Verify chat_template_kwargs was added for deepseek-v31 if _, exists := modifiedComplexRequest["chat_template_kwargs"]; exists { - fmt.Println("✅ SUCCESS: chat_template_kwargs added to complex request") - fmt.Printf("Final modified request:\n%s\n", string(modifiedComplexBody)) + fmt.Println("SUCCESS: chat_template_kwargs added to complex deepseek request") + fmt.Printf("Final modified deepseek request:\n%s\n", string(modifiedComplexBody)) } else { - fmt.Println("❌ ERROR: chat_template_kwargs not added to complex request") + fmt.Println("ERROR: chat_template_kwargs not added to complex deepseek request") } } diff --git a/src/semantic-router/pkg/extproc/reason_mode_selector.go b/src/semantic-router/pkg/extproc/reason_mode_selector.go index aa17cf0e..00051736 100644 --- a/src/semantic-router/pkg/extproc/reason_mode_selector.go +++ b/src/semantic-router/pkg/extproc/reason_mode_selector.go @@ -6,6 +6,7 @@ import ( "log" "strings" + "github.com/vllm-project/semantic-router/semantic-router/pkg/config" "github.com/vllm-project/semantic-router/semantic-router/pkg/metrics" ) @@ -47,61 +48,42 @@ func (r *OpenAIRouter) getReasoningModeAndCategory(query string) (bool, string) return false, categoryName } -// hasDeepSeekAlias returns true if the model uses a short alias for DeepSeek (e.g., "ds-*") -// Rules: -// - Accept only when the model string starts with: "ds-", "ds_", "ds:", "ds " or exactly equals "ds" -// - Do NOT match occurrences of "ds" in the middle of the model name (e.g., "foo-ds-1b") -func hasDeepSeekAlias(lower string) bool { - lower = strings.TrimSpace(lower) - if strings.HasPrefix(lower, "ds") { - if len(lower) == 2 { // exactly "ds" - return true - } - sep := lower[2] - return sep == '-' || sep == '_' || sep == ':' || sep == ' ' +// getModelReasoningFamily finds the reasoning family configuration for a model using the config system +func (r *OpenAIRouter) getModelReasoningFamily(model string) *config.ReasoningFamilyConfig { + if r.Config == nil { + return nil } - return false + return r.Config.GetModelReasoningFamily(model) } -// getModelFamilyAndTemplateParam returns a normalized model family name and the template param to be used (if any) -func getModelFamilyAndTemplateParam(model string) (string, string) { - lower := strings.ToLower(strings.TrimSpace(model)) - if strings.Contains(lower, "qwen3") { - return "qwen3", "enable_thinking" - } - if strings.Contains(lower, "deepseek") || hasDeepSeekAlias(lower) { - return "deepseek", "thinking" - } - // GPT-OSS family and generic GPT fall back to using reasoning_effort (OpenAI-compatible field) - if strings.Contains(lower, "gpt-oss") || strings.Contains(lower, "gpt_oss") { - return "gpt-oss", "reasoning_effort" +// buildReasoningRequestFields returns the appropriate fields to add to the request based on model config +func (r *OpenAIRouter) buildReasoningRequestFields(model string, useReasoning bool, categoryName string) (map[string]interface{}, string) { + familyConfig := r.getModelReasoningFamily(model) + if familyConfig == nil { + // No reasoning family configured for this model - don't apply any reasoning syntax + // Models without reasoning_family don't support reasoning mode + return nil, "" } - if strings.Contains(lower, "gpt") { - return "gpt", "reasoning_effort" - } - return "unknown", "" -} -// getChatTemplateKwargs returns the appropriate chat template kwargs based on model and reasoning mode -func getChatTemplateKwargs(model string, useReasoning bool) map[string]interface{} { - lower := strings.ToLower(strings.TrimSpace(model)) - - // Qwen3: use enable_thinking true/false - if strings.Contains(lower, "qwen3") { - return map[string]interface{}{ - "enable_thinking": useReasoning, - } + if !useReasoning { + // When reasoning is disabled, don't add any reasoning fields + return nil, "" } - // DeepSeek v3 family: use thinking true/false - if strings.Contains(lower, "deepseek") || strings.Contains(lower, "ds") { - return map[string]interface{}{ - "thinking": useReasoning, + // When reasoning is enabled, use the configured family syntax + switch familyConfig.Type { + case "chat_template_kwargs": + kwargs := map[string]interface{}{ + familyConfig.Parameter: useReasoning, } + return map[string]interface{}{"chat_template_kwargs": kwargs}, "" + case "reasoning_effort": + effort := r.getReasoningEffort(categoryName) + return map[string]interface{}{"reasoning_effort": effort}, effort + default: + // Unknown reasoning syntax type - don't apply anything + return nil, "" } - - // Default: no chat template kwargs for unknown models - return nil } // setReasoningModeToRequestBody adds chat_template_kwargs to the JSON request body @@ -120,49 +102,81 @@ func (r *OpenAIRouter) setReasoningModeToRequestBody(requestBody []byte, enabled } } - family, param := getModelFamilyAndTemplateParam(model) - - // Add chat_template_kwargs for reasoning mode - kwargs := getChatTemplateKwargs(model, enabled) - if kwargs != nil { - requestMap["chat_template_kwargs"] = kwargs - } else { - delete(requestMap, "chat_template_kwargs") + // Get original reasoning effort for potential preservation + originalReasoningEffort, hasOriginalEffort := requestMap["reasoning_effort"] + if !hasOriginalEffort { + originalReasoningEffort = "low" // Default for compatibility } - // Also set Reasoning-Effort in openai request - // This is a hack to get the reasoning mode for openai/gpt-oss-20b to work - originalReasoningEffort, ok := requestMap["reasoning_effort"] - if !ok { - // This seems to be the default for openai/gpt-oss models - originalReasoningEffort = "low" - } - var appliedEffort string + + // Clear both reasoning fields to start with a clean state + delete(requestMap, "chat_template_kwargs") + delete(requestMap, "reasoning_effort") + + var appliedEffort string = "" + + var reasoningApplied bool + if enabled { - // Use configurable reasoning effort based on category - effort := r.getReasoningEffort(categoryName) - requestMap["reasoning_effort"] = effort - appliedEffort = effort + // When reasoning is enabled, build the appropriate fields + reasoningFields, effort := r.buildReasoningRequestFields(model, enabled, categoryName) + if reasoningFields != nil { + for key, value := range reasoningFields { + requestMap[key] = value + } + appliedEffort = effort + reasoningApplied = true + } else { + // Model has no reasoning family configured + reasoningApplied = false + } } else { - requestMap["reasoning_effort"] = originalReasoningEffort - if s, ok := originalReasoningEffort.(string); ok { - appliedEffort = s + // When reasoning is disabled, only preserve reasoning_effort for gpt-oss models + familyConfig := r.getModelReasoningFamily(model) + if familyConfig != nil && familyConfig.Type == "reasoning_effort" { + requestMap["reasoning_effort"] = originalReasoningEffort + if s, ok := originalReasoningEffort.(string); ok { + appliedEffort = s + } } + reasoningApplied = false + // For all other models, reasoning fields remain cleared } - log.Printf("Original reasoning effort: %s", originalReasoningEffort) - log.Printf("Added reasoning mode (enabled: %v) and reasoning effort (%s) to request for model: %s", enabled, requestMap["reasoning_effort"], model) + // Log based on what actually happened + if enabled && !reasoningApplied { + log.Printf("No reasoning support for model: %s (no reasoning family configured)", model) + } else if reasoningApplied { + log.Printf("Applied reasoning mode (enabled: %v) with effort (%s) to model: %s", enabled, appliedEffort, model) + } else { + log.Printf("Reasoning mode disabled for model: %s", model) + } // Record metrics for template usage and effort when enabled if enabled { - // If we applied a known template param, record its usage - if kwargs != nil && param != "" { - metrics.RecordReasoningTemplateUsage(family, param) - } else if kwargs == nil && param == "reasoning_effort" { - // For GPT/GPT-OSS, we only set reasoning_effort - metrics.RecordReasoningTemplateUsage(family, param) + familyConfig := r.getModelReasoningFamily(model) + modelFamily := "unknown" + templateParam := "reasoning_effort" // default fallback + + if familyConfig != nil { + // Use the model's actual reasoning family name from model_config + if r.Config != nil && r.Config.ModelConfig != nil { + if modelParams, exists := r.Config.ModelConfig[model]; exists && modelParams.ReasoningFamily != "" { + modelFamily = modelParams.ReasoningFamily + } + } + + if familyConfig.Type == "chat_template_kwargs" { + templateParam = familyConfig.Parameter + } else { + templateParam = "reasoning_effort" + } + } + + // Record template usage and effort + metrics.RecordReasoningTemplateUsage(modelFamily, templateParam) + if appliedEffort != "" { + metrics.RecordReasoningEffortUsage(modelFamily, appliedEffort) } - // Record which effort level was used for this family - metrics.RecordReasoningEffortUsage(family, appliedEffort) } // Serialize back to JSON diff --git a/src/semantic-router/pkg/extproc/reason_mode_selector_test.go b/src/semantic-router/pkg/extproc/reason_mode_selector_test.go index 527dd1db..ffe95d66 100644 --- a/src/semantic-router/pkg/extproc/reason_mode_selector_test.go +++ b/src/semantic-router/pkg/extproc/reason_mode_selector_test.go @@ -1,76 +1,347 @@ package extproc -import "testing" +import ( + "encoding/json" + "testing" + + "github.com/vllm-project/semantic-router/semantic-router/pkg/config" +) + +// TestModelReasoningFamily tests the new family-based configuration approach +func TestModelReasoningFamily(t *testing.T) { + // Create a router with sample model configurations + router := &OpenAIRouter{ + Config: &config.RouterConfig{ + DefaultReasoningEffort: "medium", + ReasoningFamilies: map[string]config.ReasoningFamilyConfig{ + "qwen3": { + Type: "chat_template_kwargs", + Parameter: "enable_thinking", + }, + "deepseek": { + Type: "chat_template_kwargs", + Parameter: "thinking", + }, + "gpt-oss": { + Type: "reasoning_effort", + Parameter: "reasoning_effort", + }, + "gpt": { + Type: "reasoning_effort", + Parameter: "reasoning_effort", + }, + }, + ModelConfig: map[string]config.ModelParams{ + "qwen3-model": { + ReasoningFamily: "qwen3", + }, + "ds-v31-custom": { + ReasoningFamily: "deepseek", + }, + "my-deepseek": { + ReasoningFamily: "deepseek", + }, + "gpt-oss-model": { + ReasoningFamily: "gpt-oss", + }, + "custom-gpt": { + ReasoningFamily: "gpt", + }, + "phi4": { + // No reasoning family - doesn't support reasoning + }, + }, + }, + } -// TestGetModelFamilyAndTemplateParam verifies model-family detection and template parameter mapping -func TestGetModelFamilyAndTemplateParam(t *testing.T) { testCases := []struct { - name string - model string - expectedFamily string - expectedParam string + name string + model string + expectedConfig string // expected config name or empty for no config + expectedType string + expectedParameter string + expectConfig bool }{ { - name: "Qwen3 family", - model: "Qwen3-7B", - expectedFamily: "qwen3", - expectedParam: "enable_thinking", + name: "qwen3-model with qwen3 family", + model: "qwen3-model", + expectedConfig: "qwen3", + expectedType: "chat_template_kwargs", + expectedParameter: "enable_thinking", + expectConfig: true, }, { - name: "DeepSeek family", - model: "deepseek-v31", - expectedFamily: "deepseek", - expectedParam: "thinking", + name: "ds-v31-custom with deepseek family", + model: "ds-v31-custom", + expectedConfig: "deepseek", + expectedType: "chat_template_kwargs", + expectedParameter: "thinking", + expectConfig: true, }, { - name: "DeepSeek alias ds (prefix)", - model: "DS-1.5B", - expectedFamily: "deepseek", - expectedParam: "thinking", + name: "my-deepseek with deepseek family", + model: "my-deepseek", + expectedConfig: "deepseek", + expectedType: "chat_template_kwargs", + expectedParameter: "thinking", + expectConfig: true, }, { - name: "Non-leading ds should not match DeepSeek", - model: "mistral-ds-1b", - expectedFamily: "unknown", - expectedParam: "", + name: "gpt-oss-model with gpt-oss family", + model: "gpt-oss-model", + expectedConfig: "gpt-oss", + expectedType: "reasoning_effort", + expectedParameter: "reasoning_effort", + expectConfig: true, }, { - name: "GPT-OSS family", - model: "gpt-oss-20b", - expectedFamily: "gpt-oss", - expectedParam: "reasoning_effort", + name: "custom-gpt with gpt family", + model: "custom-gpt", + expectedConfig: "gpt", + expectedType: "reasoning_effort", + expectedParameter: "reasoning_effort", + expectConfig: true, }, { - name: "GPT generic family", - model: "gpt-4o-mini", - expectedFamily: "gpt", - expectedParam: "reasoning_effort", + name: "phi4 - no reasoning family", + model: "phi4", + expectedConfig: "", + expectedType: "", + expectedParameter: "", + expectConfig: false, + }, + { + name: "unknown model - no config", + model: "unknown-model", + expectedConfig: "", + expectedType: "", + expectedParameter: "", + expectConfig: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + familyConfig := router.getModelReasoningFamily(tc.model) + + if !tc.expectConfig { + // For unknown models, we expect no configuration + if familyConfig != nil { + t.Fatalf("Expected no family config for %q, got %+v", tc.model, familyConfig) + } + return + } + + // For known models, we expect a valid configuration + if familyConfig == nil { + t.Fatalf("Expected family config for %q, got nil", tc.model) + } + if familyConfig.Type != tc.expectedType { + t.Fatalf("Expected type %q for model %q, got %q", tc.expectedType, tc.model, familyConfig.Type) + } + if familyConfig.Parameter != tc.expectedParameter { + t.Fatalf("Expected parameter %q for model %q, got %q", tc.expectedParameter, tc.model, familyConfig.Parameter) + } + }) + } +} + +// TestSetReasoningModeToRequestBody verifies that reasoning_effort is handled correctly for different model families +func TestSetReasoningModeToRequestBody(t *testing.T) { + // Create a router with family-based reasoning configurations + router := &OpenAIRouter{ + Config: &config.RouterConfig{ + DefaultReasoningEffort: "medium", + ReasoningFamilies: map[string]config.ReasoningFamilyConfig{ + "deepseek": { + Type: "chat_template_kwargs", + Parameter: "thinking", + }, + "qwen3": { + Type: "chat_template_kwargs", + Parameter: "enable_thinking", + }, + "gpt-oss": { + Type: "reasoning_effort", + Parameter: "reasoning_effort", + }, + }, + ModelConfig: map[string]config.ModelParams{ + "ds-v31-custom": { + ReasoningFamily: "deepseek", + }, + "qwen3-model": { + ReasoningFamily: "qwen3", + }, + "gpt-oss-model": { + ReasoningFamily: "gpt-oss", + }, + "phi4": { + // No reasoning family - doesn't support reasoning + }, + }, }, + } + + testCases := []struct { + name string + model string + enabled bool + initialReasoningEffort interface{} + expectReasoningEffortKey bool + expectedReasoningEffort interface{} + expectedChatTemplateKwargs bool + }{ { - name: "GPT underscore variant", - model: " GPT_OSS-foo ", - expectedFamily: "gpt-oss", - expectedParam: "reasoning_effort", + name: "GPT-OSS model with reasoning disabled - preserve reasoning_effort", + model: "gpt-oss-model", + enabled: false, + initialReasoningEffort: "low", + expectReasoningEffortKey: true, + expectedReasoningEffort: "low", + expectedChatTemplateKwargs: false, }, { - name: "Unknown family", - model: "phi4", - expectedFamily: "unknown", - expectedParam: "", + name: "Phi4 model with reasoning disabled - remove reasoning_effort", + model: "phi4", + enabled: false, + initialReasoningEffort: "low", + expectReasoningEffortKey: false, + expectedReasoningEffort: nil, + expectedChatTemplateKwargs: false, }, { - name: "Empty model name", - model: "", - expectedFamily: "unknown", - expectedParam: "", + name: "Phi4 model with reasoning enabled - no fields set (no reasoning family)", + model: "phi4", + enabled: true, + initialReasoningEffort: "low", + expectReasoningEffortKey: false, + expectedReasoningEffort: nil, + expectedChatTemplateKwargs: false, + }, + { + name: "DeepSeek model with reasoning disabled - remove reasoning_effort", + model: "ds-v31-custom", + enabled: false, + initialReasoningEffort: "low", + expectReasoningEffortKey: false, + expectedReasoningEffort: nil, + expectedChatTemplateKwargs: false, + }, + { + name: "GPT-OSS model with reasoning enabled - set reasoning_effort", + model: "gpt-oss-model", + enabled: true, + initialReasoningEffort: "low", + expectReasoningEffortKey: true, + expectedReasoningEffort: "medium", + expectedChatTemplateKwargs: false, + }, + { + name: "DeepSeek model with reasoning enabled - set chat_template_kwargs", + model: "ds-v31-custom", + enabled: true, + initialReasoningEffort: "low", + expectReasoningEffortKey: false, + expectedReasoningEffort: nil, + expectedChatTemplateKwargs: true, + }, + { + name: "Unknown model - no fields set", + model: "unknown-model", + enabled: true, + initialReasoningEffort: "low", + expectReasoningEffortKey: false, + expectedReasoningEffort: nil, + expectedChatTemplateKwargs: false, + }, + { + name: "Qwen3 model with reasoning enabled - set chat_template_kwargs", + model: "qwen3-model", + enabled: true, + initialReasoningEffort: "low", + expectReasoningEffortKey: false, + expectedReasoningEffort: nil, + expectedChatTemplateKwargs: true, + }, + { + name: "Qwen3 model with reasoning disabled - no fields set", + model: "qwen3-model", + enabled: false, + initialReasoningEffort: "low", + expectReasoningEffortKey: false, + expectedReasoningEffort: nil, + expectedChatTemplateKwargs: false, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - family, param := getModelFamilyAndTemplateParam(tc.model) - if family != tc.expectedFamily || param != tc.expectedParam { - t.Fatalf("for model %q got (family=%q, param=%q), want (family=%q, param=%q)", tc.model, family, param, tc.expectedFamily, tc.expectedParam) + // Prepare initial request body + requestBody := map[string]interface{}{ + "model": tc.model, + "messages": []map[string]string{ + {"role": "user", "content": "test message"}, + }, + } + if tc.initialReasoningEffort != nil { + requestBody["reasoning_effort"] = tc.initialReasoningEffort + } + + requestBytes, err := json.Marshal(requestBody) + if err != nil { + t.Fatalf("Failed to marshal request body: %v", err) + } + + // Call the function under test + modifiedBytes, err := router.setReasoningModeToRequestBody(requestBytes, tc.enabled, "test-category") + if err != nil { + t.Fatalf("setReasoningModeToRequestBody failed: %v", err) + } + + // Parse the modified request body + var modifiedRequest map[string]interface{} + if err := json.Unmarshal(modifiedBytes, &modifiedRequest); err != nil { + t.Fatalf("Failed to unmarshal modified request body: %v", err) + } + + // Check reasoning_effort handling + reasoningEffort, hasReasoningEffort := modifiedRequest["reasoning_effort"] + if tc.expectReasoningEffortKey != hasReasoningEffort { + t.Fatalf("Expected reasoning_effort key presence: %v, got: %v", tc.expectReasoningEffortKey, hasReasoningEffort) + } + if tc.expectReasoningEffortKey && reasoningEffort != tc.expectedReasoningEffort { + t.Fatalf("Expected reasoning_effort: %v, got: %v", tc.expectedReasoningEffort, reasoningEffort) + } + + // Check chat_template_kwargs handling + chatTemplateKwargs, hasChatTemplateKwargs := modifiedRequest["chat_template_kwargs"] + if tc.expectedChatTemplateKwargs != hasChatTemplateKwargs { + t.Fatalf("Expected chat_template_kwargs key presence: %v, got: %v", tc.expectedChatTemplateKwargs, hasChatTemplateKwargs) + } + if tc.expectedChatTemplateKwargs { + kwargs, ok := chatTemplateKwargs.(map[string]interface{}) + if !ok { + t.Fatalf("Expected chat_template_kwargs to be a map") + } + if len(kwargs) == 0 { + t.Fatalf("Expected non-empty chat_template_kwargs") + } + + // Validate the specific parameter based on model type + if tc.model == "deepseek-v31" || tc.model == "ds-1.5b" { + if thinkingValue, exists := kwargs["thinking"]; !exists { + t.Fatalf("Expected 'thinking' parameter in chat_template_kwargs for DeepSeek model") + } else if thinkingValue != true { + t.Fatalf("Expected 'thinking' to be true, got %v", thinkingValue) + } + } else if tc.model == "qwen3-7b" { + if thinkingValue, exists := kwargs["enable_thinking"]; !exists { + t.Fatalf("Expected 'enable_thinking' parameter in chat_template_kwargs for Qwen3 model") + } else if thinkingValue != true { + t.Fatalf("Expected 'enable_thinking' to be true, got %v", thinkingValue) + } + } } }) } diff --git a/src/semantic-router/pkg/extproc/reasoning_integration_test.go b/src/semantic-router/pkg/extproc/reasoning_integration_test.go index d903acee..799899bb 100644 --- a/src/semantic-router/pkg/extproc/reasoning_integration_test.go +++ b/src/semantic-router/pkg/extproc/reasoning_integration_test.go @@ -11,6 +11,7 @@ import ( func TestReasoningModeIntegration(t *testing.T) { // Create a mock router with reasoning configuration cfg := &config.RouterConfig{ + DefaultReasoningEffort: "medium", Categories: []config.Category{ { Name: "math", @@ -23,6 +24,34 @@ func TestReasoningModeIntegration(t *testing.T) { ReasoningDescription: "Business content is typically conversational", }, }, + ReasoningFamilies: map[string]config.ReasoningFamilyConfig{ + "deepseek": { + Type: "chat_template_kwargs", + Parameter: "thinking", + }, + "qwen3": { + Type: "chat_template_kwargs", + Parameter: "enable_thinking", + }, + "gpt-oss": { + Type: "reasoning_effort", + Parameter: "reasoning_effort", + }, + }, + ModelConfig: map[string]config.ModelParams{ + "deepseek-v31": { + ReasoningFamily: "deepseek", + }, + "qwen3-model": { + ReasoningFamily: "qwen3", + }, + "gpt-oss-model": { + ReasoningFamily: "gpt-oss", + }, + "phi4": { + // No reasoning family - doesn't support reasoning + }, + }, } router := &OpenAIRouter{ @@ -140,55 +169,86 @@ func TestReasoningModeIntegration(t *testing.T) { t.Fatalf("Failed to unmarshal phi4 request: %v", err) } - // For phi4, chat_template_kwargs should not be added (since it's not supported) + // For phi4, no reasoning fields should be added (since it's an unknown model) if _, exists := modifiedRequestPhi4["chat_template_kwargs"]; exists { - t.Error("chat_template_kwargs should not be added for unsupported model phi4") + t.Error("chat_template_kwargs should not be added for unknown model phi4") } - // But reasoning_effort should still be set - if reasoningEffort, exists := modifiedRequestPhi4["reasoning_effort"]; !exists { - t.Error("reasoning_effort should be set for phi4 model") - } else if reasoningEffort != "medium" { - t.Errorf("Expected reasoning_effort: medium for phi4 model (default), got %v", reasoningEffort) + // reasoning_effort should also not be set for unknown models + if reasoningEffort, exists := modifiedRequestPhi4["reasoning_effort"]; exists { + t.Errorf("reasoning_effort should NOT be set for unknown model phi4, but got %v", reasoningEffort) } }) - // Test case 4: Test getChatTemplateKwargs function - t.Run("getChatTemplateKwargs returns correct values", func(t *testing.T) { - // Test with DeepSeek model and reasoning enabled - kwargs := getChatTemplateKwargs("deepseek-v31", true) - if kwargs == nil { - t.Error("Expected non-nil kwargs for DeepSeek model with reasoning enabled") + // Test case 4: Test buildReasoningRequestFields function with config-driven approach + t.Run("buildReasoningRequestFields returns correct values", func(t *testing.T) { + // Create a router with sample configurations for testing + router := &OpenAIRouter{ + Config: &config.RouterConfig{ + DefaultReasoningEffort: "medium", + ReasoningFamilies: map[string]config.ReasoningFamilyConfig{ + "deepseek": { + Type: "chat_template_kwargs", + Parameter: "thinking", + }, + "qwen3": { + Type: "chat_template_kwargs", + Parameter: "enable_thinking", + }, + }, + ModelConfig: map[string]config.ModelParams{ + "deepseek-v31": { + ReasoningFamily: "deepseek", + }, + "qwen3-model": { + ReasoningFamily: "qwen3", + }, + "phi4": { + // No reasoning family - doesn't support reasoning + }, + }, + }, } - if thinking, ok := kwargs["thinking"]; !ok || thinking != true { + // Test with DeepSeek model and reasoning enabled + fields, _ := router.buildReasoningRequestFields("deepseek-v31", true, "test-category") + if fields == nil { + t.Error("Expected non-nil fields for DeepSeek model with reasoning enabled") + } + if chatKwargs, ok := fields["chat_template_kwargs"]; !ok { + t.Error("Expected chat_template_kwargs for DeepSeek model") + } else if kwargs, ok := chatKwargs.(map[string]interface{}); !ok { + t.Error("Expected chat_template_kwargs to be a map") + } else if thinking, ok := kwargs["thinking"]; !ok || thinking != true { t.Errorf("Expected thinking: true for DeepSeek model, got %v", thinking) } // Test with DeepSeek model and reasoning disabled - kwargs = getChatTemplateKwargs("deepseek-v31", false) - if kwargs == nil { - t.Error("Expected non-nil kwargs for DeepSeek model with reasoning disabled") - } - - if thinking, ok := kwargs["thinking"]; !ok || thinking != false { - t.Errorf("Expected thinking: false for DeepSeek model, got %v", thinking) + fields, _ = router.buildReasoningRequestFields("deepseek-v31", false, "test-category") + if fields != nil { + t.Errorf("Expected nil fields for DeepSeek model with reasoning disabled, got %v", fields) } // Test with Qwen3 model and reasoning enabled - kwargs = getChatTemplateKwargs("qwen3-7b", true) - if kwargs == nil { - t.Error("Expected non-nil kwargs for Qwen3 model with reasoning enabled") + fields, _ = router.buildReasoningRequestFields("qwen3-model", true, "test-category") + if fields == nil { + t.Error("Expected non-nil fields for Qwen3 model with reasoning enabled") } - - if enableThinking, ok := kwargs["enable_thinking"]; !ok || enableThinking != true { + if chatKwargs, ok := fields["chat_template_kwargs"]; !ok { + t.Error("Expected chat_template_kwargs for Qwen3 model") + } else if kwargs, ok := chatKwargs.(map[string]interface{}); !ok { + t.Error("Expected chat_template_kwargs to be a map") + } else if enableThinking, ok := kwargs["enable_thinking"]; !ok || enableThinking != true { t.Errorf("Expected enable_thinking: true for Qwen3 model, got %v", enableThinking) } - // Test with unknown model (should return nil) - kwargs = getChatTemplateKwargs("unknown-model", true) - if kwargs != nil { - t.Errorf("Expected nil kwargs for unknown model, got %v", kwargs) + // Test with unknown model (should return no fields) + fields, effort := router.buildReasoningRequestFields("unknown-model", true, "test-category") + if fields != nil { + t.Errorf("Expected nil fields for unknown model with reasoning enabled, got %v", fields) + } + if effort != "" { + t.Errorf("Expected effort string: empty for unknown model, got %v", effort) } }) diff --git a/website/docs/getting-started/configuration.md b/website/docs/getting-started/configuration.md index 5f4cdadb..eeaa79be 100644 --- a/website/docs/getting-started/configuration.md +++ b/website/docs/getting-started/configuration.md @@ -87,6 +87,44 @@ categories: score: 0.8 default_model: your-model + +# Reasoning family configurations - define how different model families handle reasoning syntax +reasoning_families: + deepseek: + type: "chat_template_kwargs" + parameter: "thinking" + + qwen3: + type: "chat_template_kwargs" + parameter: "enable_thinking" + + gpt-oss: + type: "reasoning_effort" + parameter: "reasoning_effort" + + gpt: + type: "reasoning_effort" + parameter: "reasoning_effort" + +# Global default reasoning effort level +default_reasoning_effort: "medium" + +# Model configurations - assign reasoning families to specific models +model_config: + # Example: DeepSeek model with custom name + "ds-v31-custom": + reasoning_family: "deepseek" # This model uses DeepSeek reasoning syntax + preferred_endpoints: ["endpoint1"] + + # Example: Qwen3 model with custom name + "my-qwen3-model": + reasoning_family: "qwen3" # This model uses Qwen3 reasoning syntax + preferred_endpoints: ["endpoint2"] + + # Example: Model without reasoning support + "phi4": + # No reasoning_family field - this model doesn't support reasoning mode + preferred_endpoints: ["endpoint1"] ``` ## Key Configuration Sections @@ -189,6 +227,111 @@ categories: default_model: your-model # Fallback model ``` +### Model Reasoning Configuration + +Configure how different models handle reasoning mode syntax. This allows you to add new models without code changes: + +```yaml +# Model reasoning configurations - define how different models handle reasoning syntax +model_reasoning_configs: + - name: "deepseek" + patterns: ["deepseek", "ds-", "ds_", "ds:", "ds "] + reasoning_syntax: + type: "chat_template_kwargs" + parameter: "thinking" + + - name: "qwen3" + patterns: ["qwen3"] + reasoning_syntax: + type: "chat_template_kwargs" + parameter: "enable_thinking" + + - name: "gpt-oss" + patterns: ["gpt-oss", "gpt_oss"] + reasoning_syntax: + type: "reasoning_effort" + parameter: "reasoning_effort" + + - name: "gpt" + patterns: ["gpt"] + reasoning_syntax: + type: "reasoning_effort" + parameter: "reasoning_effort" + +# Global default reasoning effort level (when not specified per category) +default_reasoning_effort: "medium" +``` + +#### Model Reasoning Configuration Options + +**Configuration Structure:** +- `name`: A unique identifier for the model family +- `patterns`: Array of patterns to match against model names +- `reasoning_syntax.type`: How the model expects reasoning mode to be specified + - `"chat_template_kwargs"`: Use chat template parameters (for models like DeepSeek, Qwen3) + - `"reasoning_effort"`: Use OpenAI-compatible reasoning_effort field (for GPT models) +- `reasoning_syntax.parameter`: The specific parameter name the model uses + +**Pattern Matching:** +The system supports both simple string patterns and regular expressions for flexible model matching: + +- **Simple string matches**: `"deepseek"` matches any model containing "deepseek" +- **Prefix patterns**: `"ds-"` matches models starting with "ds-" or exactly "ds" +- **Regular expressions**: `"^gpt-4.*"` matches models starting with "gpt-4" +- **Wildcard**: `"*"` matches all models (use for fallback configurations) +- **Multiple patterns**: `["deepseek", "ds-", "^phi.*"]` matches any of these patterns + +**Regex Pattern Examples:** +```yaml +patterns: + - "^gpt-4.*" # Models starting with "gpt-4" + - ".*-instruct$" # Models ending with "-instruct" + - "phi[0-9]+" # Models like "phi3", "phi4", etc. + - "^(llama|mistral)" # Models starting with "llama" or "mistral" +``` + +**Adding New Models:** +To support a new model family (e.g., Claude), simply add a new configuration: + +```yaml +model_reasoning_configs: + - name: "claude" + patterns: ["claude"] + reasoning_syntax: + type: "chat_template_kwargs" + parameter: "enable_reasoning" +``` + +**Unknown Models:** +Models that don't match any configured pattern will have no reasoning fields applied when reasoning mode is enabled. This prevents issues with models that don't support reasoning syntax. + +**Default Reasoning Effort:** +Set the global default reasoning effort level used when categories don't specify their own effort level: + +```yaml +default_reasoning_effort: "high" # Options: "low", "medium", "high" +``` + +**Category-Specific Reasoning Effort:** +Override the default effort level per category: + +```yaml +categories: +- name: math + use_reasoning: true + reasoning_effort: "high" # Use high effort for complex math + model_scores: + - model: your-model + score: 1.0 + +- name: general + use_reasoning: true + reasoning_effort: "low" # Use low effort for general queries + model_scores: + - model: your-model + score: 1.0 +``` + ### Security Features Configure PII detection and jailbreak protection: @@ -577,6 +720,41 @@ make test-pii # PII detection make test-prompt-guard # Jailbreak protection ``` +### Model Reasoning Configuration Issues + +**Model not getting reasoning fields:** +- Check that the model name matches a pattern in `model_reasoning_configs` +- Verify the pattern syntax (exact matches vs prefixes) +- Unknown models will have no reasoning fields applied (this is by design) + +**Wrong reasoning syntax applied:** +- Ensure the `reasoning_syntax.type` matches your model's expected format +- Check the `reasoning_syntax.parameter` name is correct +- DeepSeek models typically use `chat_template_kwargs` with `"thinking"` +- GPT models typically use `reasoning_effort` + +**Adding support for new models:** +```yaml +# Add a new model configuration +model_reasoning_configs: + - name: "my-new-model" + patterns: ["my-model"] + reasoning_syntax: + type: "chat_template_kwargs" # or "reasoning_effort" + parameter: "custom_parameter" +``` + +**Testing model reasoning configuration:** +```bash +# Test reasoning with your specific model +curl -X POST http://localhost:8801/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "your-model-name", + "messages": [{"role": "user", "content": "What is 2+2?"}] + }' +``` + ## Configuration Generation The Semantic Router supports automated configuration generation based on model performance benchmarks. This workflow uses MMLU-Pro evaluation results to determine optimal model routing for different categories.