vllm-project
diff --git a/‎config/config.yaml‎
Lines changed: 44 additions & 2 deletions b/‎config/config.yaml‎
Lines changed: 44 additions & 2 deletions
diff --git a/‎src/semantic-router/pkg/config/config.go‎
Lines changed: 48 additions & 0 deletions b/‎src/semantic-router/pkg/config/config.go‎
Lines changed: 48 additions & 0 deletions
diff --git a/‎src/semantic-router/pkg/extproc/reason_mode_config_test.go‎
Lines changed: 98 additions & 19 deletions b/‎src/semantic-router/pkg/extproc/reason_mode_config_test.go‎
Lines changed: 98 additions & 19 deletions
@@ -57,6 +57,28 @@ model_config:
       pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"]  # Only allow these specific PII types
     # Specify which endpoints can serve this model (optional - if not specified, uses all endpoints that list this model)
     preferred_endpoints: ["endpoint1", "endpoint3"]
+    # Reasoning family - phi4 doesn't support reasoning, so omit this field
+  
+  # Example: DeepSeek model with custom name
+  "ds-v31-custom":
+    reasoning_family: "deepseek"  # This model uses DeepSeek reasoning syntax
+    preferred_endpoints: ["endpoint1"]
+    pii_policy:
+      allow_by_default: true
+  
+  # Example: Qwen3 model with custom name  
+  "my-qwen3-model":
+    reasoning_family: "qwen3"     # This model uses Qwen3 reasoning syntax
+    preferred_endpoints: ["endpoint2"]
+    pii_policy:
+      allow_by_default: true
+  
+  # Example: GPT-OSS model with custom name
+  "custom-gpt-oss":
+    reasoning_family: "gpt-oss"   # This model uses GPT-OSS reasoning syntax
+    preferred_endpoints: ["endpoint1"]
+    pii_policy:
+      allow_by_default: true
   gemma3:27b:
     pricing:
       currency: USD
@@ -236,7 +258,6 @@ categories:
   - model: phi4
     score: 0.2
 default_model: mistral-small3.1
-default_reasoning_effort: medium  # Default reasoning effort level (low, medium, high)
 
 # API Configuration
 api:
@@ -253,4 +274,25 @@ api:
       sample_rate: 1.0                   # Collect metrics for all requests (1.0 = 100%, 0.5 = 50%)
       # Histogram buckets for metrics (directly configure what you need)
       duration_buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]
-      size_buckets: [1, 2, 5, 10, 20, 50, 100, 200] 
+      size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]
+
+# Reasoning family configurations - define how different model families handle reasoning syntax
+reasoning_families:
+  deepseek:
+    type: "chat_template_kwargs"
+    parameter: "thinking"
+  
+  qwen3:
+    type: "chat_template_kwargs"
+    parameter: "enable_thinking"
+  
+  gpt-oss:
+    type: "reasoning_effort"
+    parameter: "reasoning_effort"
+  
+  gpt:
+    type: "reasoning_effort"
+    parameter: "reasoning_effort"
+
+# Global default reasoning effort level
+default_reasoning_effort: medium  # Default reasoning effort level (low, medium, high) 
@@ -45,6 +45,9 @@ type RouterConfig struct {
 	// Default reasoning effort level (low, medium, high) when not specified per category
 	DefaultReasoningEffort string `yaml:"default_reasoning_effort,omitempty"`
 
+	// Reasoning family configurations to define how different model families handle reasoning syntax
+	ReasoningFamilies map[string]ReasoningFamilyConfig `yaml:"reasoning_families,omitempty"`
+
 	// Semantic cache configuration
 	SemanticCache SemanticCacheConfig `yaml:"semantic_cache"`
 
@@ -208,6 +211,16 @@ type ModelParams struct {
 
 	// Optional pricing used for cost computation
 	Pricing ModelPricing `yaml:"pricing,omitempty"`
+
+	// Reasoning family for this model (e.g., "deepseek", "qwen3", "gpt-oss")
+	// If empty, the model doesn't support reasoning mode
+	ReasoningFamily string `yaml:"reasoning_family,omitempty"`
+}
+
+// ReasoningFamilyConfig defines how a reasoning family handles reasoning mode
+type ReasoningFamilyConfig struct {
+	Type      string `yaml:"type"`      // "chat_template_kwargs" or "reasoning_effort"
+	Parameter string `yaml:"parameter"` // "thinking", "enable_thinking", "reasoning_effort", etc.
 }
 
 // PIIPolicy represents the PII (Personally Identifiable Information) policy for a model
@@ -264,6 +277,41 @@ type Category struct {
 	ModelScores          []ModelScore `yaml:"model_scores"`
 }
 
+// Legacy types - can be removed once migration is complete
+
+// GetModelReasoningFamily returns the reasoning family configuration for a given model name
+func (rc *RouterConfig) GetModelReasoningFamily(modelName string) *ReasoningFamilyConfig {
+	if rc == nil || rc.ModelConfig == nil || rc.ReasoningFamilies == nil {
+		return nil
+	}
+
+	// Look up the model in model_config
+	modelParams, exists := rc.ModelConfig[modelName]
+	if !exists || modelParams.ReasoningFamily == "" {
+		return nil
+	}
+
+	// Look up the reasoning family configuration
+	familyConfig, exists := rc.ReasoningFamilies[modelParams.ReasoningFamily]
+	if !exists {
+		return nil
+	}
+
+	return &familyConfig
+}
+
+// Legacy functions - can be removed once migration is complete
+
+// contains checks if a slice contains a string
+func contains(slice []string, item string) bool {
+	for _, s := range slice {
+		if s == item {
+			return true
+		}
+	}
+	return false
+}
+
 var (
 	config     *RouterConfig
 	configOnce sync.Once
 
@@ -199,10 +199,42 @@ requestBody := buildRequestBody(model, messages, useReasoning, stream)
 func TestAddReasoningModeToRequestBody(t *testing.T) {
 	fmt.Println("=== Testing addReasoningModeToRequestBody Function ===")
 
-	// Create a mock router with minimal config
-	router := &OpenAIRouter{}
+	// Create a mock router with family-based reasoning config
+	router := &OpenAIRouter{
+		Config: &config.RouterConfig{
+			DefaultReasoningEffort: "medium",
+			ReasoningFamilies: map[string]config.ReasoningFamilyConfig{
+				"deepseek": {
+					Type:      "chat_template_kwargs",
+					Parameter: "thinking",
+				},
+				"qwen3": {
+					Type:      "chat_template_kwargs",
+					Parameter: "enable_thinking",
+				},
+				"gpt-oss": {
+					Type:      "reasoning_effort",
+					Parameter: "reasoning_effort",
+				},
+			},
+			ModelConfig: map[string]config.ModelParams{
+				"deepseek-v31": {
+					ReasoningFamily: "deepseek",
+				},
+				"qwen3-model": {
+					ReasoningFamily: "qwen3",
+				},
+				"gpt-oss-model": {
+					ReasoningFamily: "gpt-oss",
+				},
+				"phi4": {
+					// No reasoning family - doesn't support reasoning
+				},
+			},
+		},
+	}
 
-	// Test case 1: Basic request body
+	// Test case 1: Basic request body with model that has NO reasoning support (phi4)
 	originalRequest := map[string]interface{}{
 		"model": "phi4",
 		"messages": []map[string]interface{}{
@@ -235,29 +267,76 @@ func TestAddReasoningModeToRequestBody(t *testing.T) {
 		return
 	}
 
-	// Check if chat_template_kwargs was added
-	if chatTemplateKwargs, exists := modifiedRequest["chat_template_kwargs"]; exists {
+	// Check that chat_template_kwargs was NOT added for phi4 (since it has no reasoning_family)
+	if _, exists := modifiedRequest["chat_template_kwargs"]; exists {
+		fmt.Println("ERROR: chat_template_kwargs should not be added for phi4 (no reasoning family configured)")
+	} else {
+		fmt.Println("SUCCESS: chat_template_kwargs correctly not added for phi4 (no reasoning support)")
+	}
+
+	// Check that reasoning_effort was NOT added for phi4
+	if _, exists := modifiedRequest["reasoning_effort"]; exists {
+		fmt.Println("ERROR: reasoning_effort should not be added for phi4 (no reasoning family configured)")
+	} else {
+		fmt.Println("SUCCESS: reasoning_effort correctly not added for phi4 (no reasoning support)")
+	}
+
+	// Test case 2: Request with model that HAS reasoning support (deepseek-v31)
+	fmt.Println("\n--- Test Case 2: Model with reasoning support ---")
+	deepseekRequest := map[string]interface{}{
+		"model": "deepseek-v31",
+		"messages": []map[string]interface{}{
+			{"role": "user", "content": "What is 2 + 2?"},
+		},
+		"stream": false,
+	}
+
+	deepseekBody, err := json.Marshal(deepseekRequest)
+	if err != nil {
+		fmt.Printf("Error marshaling deepseek request: %v\n", err)
+		return
+	}
+
+	fmt.Printf("Original deepseek request:\n%s\n\n", string(deepseekBody))
+
+	// Add reasoning mode to DeepSeek model
+	modifiedDeepseekBody, err := router.setReasoningModeToRequestBody(deepseekBody, true, "math")
+	if err != nil {
+		fmt.Printf("Error adding reasoning mode to deepseek: %v\n", err)
+		return
+	}
+
+	fmt.Printf("Modified deepseek request with reasoning:\n%s\n\n", string(modifiedDeepseekBody))
+
+	var modifiedDeepseekRequest map[string]interface{}
+	if err := json.Unmarshal(modifiedDeepseekBody, &modifiedDeepseekRequest); err != nil {
+		fmt.Printf("Error unmarshaling modified deepseek request: %v\n", err)
+		return
+	}
+
+	// Check that chat_template_kwargs WAS added for deepseek-v31
+	if chatTemplateKwargs, exists := modifiedDeepseekRequest["chat_template_kwargs"]; exists {
 		if kwargs, ok := chatTemplateKwargs.(map[string]interface{}); ok {
 			if thinking, hasThinking := kwargs["thinking"]; hasThinking {
 				if thinkingBool, isBool := thinking.(bool); isBool && thinkingBool {
-					fmt.Println("✅ SUCCESS: chat_template_kwargs with thinking: true was correctly added")
+					fmt.Println("SUCCESS: chat_template_kwargs with thinking: true correctly added for deepseek-v31")
 				} else {
-					fmt.Printf("❌ ERROR: thinking value is not true, got: %v\n", thinking)
+					fmt.Printf("ERROR: thinking value is not true for deepseek-v31, got: %v\n", thinking)
 				}
 			} else {
-				fmt.Println("❌ ERROR: thinking field not found in chat_template_kwargs")
+				fmt.Println("ERROR: thinking field not found in chat_template_kwargs for deepseek-v31")
 			}
 		} else {
-			fmt.Printf("❌ ERROR: chat_template_kwargs is not a map, got: %T\n", chatTemplateKwargs)
+			fmt.Printf("ERROR: chat_template_kwargs is not a map for deepseek-v31, got: %T\n", chatTemplateKwargs)
 		}
 	} else {
-		fmt.Println("❌ ERROR: chat_template_kwargs not found in modified request")
+		fmt.Println("ERROR: chat_template_kwargs not found for deepseek-v31 (should be present)")
 	}
 
-	// Test case 2: Request with existing fields
-	fmt.Println("\n--- Test Case 2: Request with existing fields ---")
+	// Test case 3: Request with existing fields
+	fmt.Println("\n--- Test Case 3: Request with existing fields ---")
 	complexRequest := map[string]interface{}{
-		"model": "phi4",
+		"model": "deepseek-v31",
 		"messages": []map[string]interface{}{
 			{"role": "system", "content": "You are a helpful assistant"},
 			{"role": "user", "content": "Solve x^2 + 5x + 6 = 0"},
@@ -290,20 +369,20 @@ func TestAddReasoningModeToRequestBody(t *testing.T) {
 	allFieldsPreserved := true
 	for _, field := range originalFields {
 		if _, exists := modifiedComplexRequest[field]; !exists {
-			fmt.Printf("❌ ERROR: Original field '%s' was lost\n", field)
+			fmt.Printf("ERROR: Original field '%s' was lost\n", field)
 			allFieldsPreserved = false
 		}
 	}
 
 	if allFieldsPreserved {
-		fmt.Println("✅ SUCCESS: All original fields preserved")
+		fmt.Println("SUCCESS: All original fields preserved")
 	}
 
-	// Verify chat_template_kwargs was added
+	// Verify chat_template_kwargs was added for deepseek-v31
 	if _, exists := modifiedComplexRequest["chat_template_kwargs"]; exists {
-		fmt.Println("✅ SUCCESS: chat_template_kwargs added to complex request")
-		fmt.Printf("Final modified request:\n%s\n", string(modifiedComplexBody))
+		fmt.Println("SUCCESS: chat_template_kwargs added to complex deepseek request")
+		fmt.Printf("Final modified deepseek request:\n%s\n", string(modifiedComplexBody))
 	} else {
-		fmt.Println("❌ ERROR: chat_template_kwargs not added to complex request")
+		fmt.Println("ERROR: chat_template_kwargs not added to complex deepseek request")
 	}
 }