vllm-project
diff --git a/‎.github/workflows/test-and-build.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/test-and-build.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 1 addition & 0 deletions b/‎README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎config/config.yaml‎
Lines changed: 44 additions & 2 deletions b/‎config/config.yaml‎
Lines changed: 44 additions & 2 deletions
diff --git a/‎src/semantic-router/pkg/config/config.go‎
Lines changed: 54 additions & 18 deletions b/‎src/semantic-router/pkg/config/config.go‎
Lines changed: 54 additions & 18 deletions
diff --git a/‎src/semantic-router/pkg/extproc/reason_mode_config_test.go‎
Lines changed: 98 additions & 19 deletions b/‎src/semantic-router/pkg/extproc/reason_mode_config_test.go‎
Lines changed: 98 additions & 19 deletions
@@ -1,4 +1,4 @@
-name: Run Test
+name: Test And Build
 
 on:
   schedule:
 
@@ -6,6 +6,7 @@
 [![Hugging Face](https://img.shields.io/badge/🤗%20Hugging%20Face-Community-yellow)](https://huggingface.co/LLM-Semantic-Router)
 [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](LICENSE)
 [![Crates.io](https://img.shields.io/crates/v/candle-semantic-router.svg)](https://crates.io/crates/candle-semantic-router)
+![](https://github.com/vllm-project/semantic-router/workflows/Test%20And%20Build/badge.svg)
 
 **📚 [Complete Documentation](https://vllm-semantic-router.com) | 🚀 [Quick Start](https://vllm-semantic-router.com/docs/getting-started/installation) | 📣 [Blog](https://vllm-semantic-router.com/blog/) | 📖 [API Reference](https://vllm-semantic-router.com/docs/api/router/)**
 
 
@@ -57,6 +57,28 @@ model_config:
       pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"]  # Only allow these specific PII types
     # Specify which endpoints can serve this model (optional - if not specified, uses all endpoints that list this model)
     preferred_endpoints: ["endpoint1", "endpoint3"]
+    # Reasoning family - phi4 doesn't support reasoning, so omit this field
+  
+  # Example: DeepSeek model with custom name
+  "ds-v31-custom":
+    reasoning_family: "deepseek"  # This model uses DeepSeek reasoning syntax
+    preferred_endpoints: ["endpoint1"]
+    pii_policy:
+      allow_by_default: true
+  
+  # Example: Qwen3 model with custom name  
+  "my-qwen3-model":
+    reasoning_family: "qwen3"     # This model uses Qwen3 reasoning syntax
+    preferred_endpoints: ["endpoint2"]
+    pii_policy:
+      allow_by_default: true
+  
+  # Example: GPT-OSS model with custom name
+  "custom-gpt-oss":
+    reasoning_family: "gpt-oss"   # This model uses GPT-OSS reasoning syntax
+    preferred_endpoints: ["endpoint1"]
+    pii_policy:
+      allow_by_default: true
   gemma3:27b:
     pricing:
       currency: USD
@@ -235,7 +257,6 @@ categories:
   - model: phi4
     score: 0.2
 default_model: mistral-small3.1
-default_reasoning_effort: medium  # Default reasoning effort level (low, medium, high)
 
 # API Configuration
 api:
@@ -252,4 +273,25 @@ api:
       sample_rate: 1.0                   # Collect metrics for all requests (1.0 = 100%, 0.5 = 50%)
       # Histogram buckets for metrics (directly configure what you need)
       duration_buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]
-      size_buckets: [1, 2, 5, 10, 20, 50, 100, 200] 
+      size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]
+
+# Reasoning family configurations - define how different model families handle reasoning syntax
+reasoning_families:
+  deepseek:
+    type: "chat_template_kwargs"
+    parameter: "thinking"
+  
+  qwen3:
+    type: "chat_template_kwargs"
+    parameter: "enable_thinking"
+  
+  gpt-oss:
+    type: "reasoning_effort"
+    parameter: "reasoning_effort"
+  
+  gpt:
+    type: "reasoning_effort"
+    parameter: "reasoning_effort"
+
+# Global default reasoning effort level
+default_reasoning_effort: medium  # Default reasoning effort level (low, medium, high) 
@@ -4,6 +4,7 @@ import (
 	"fmt"
 	"os"
 	"path/filepath"
+	"slices"
 	"sync"
 
 	"gopkg.in/yaml.v3"
@@ -44,6 +45,9 @@ type RouterConfig struct {
 	// Default reasoning effort level (low, medium, high) when not specified per category
 	DefaultReasoningEffort string `yaml:"default_reasoning_effort,omitempty"`
 
+	// Reasoning family configurations to define how different model families handle reasoning syntax
+	ReasoningFamilies map[string]ReasoningFamilyConfig `yaml:"reasoning_families,omitempty"`
+
 	// Semantic cache configuration
 	SemanticCache SemanticCacheConfig `yaml:"semantic_cache"`
 
@@ -207,6 +211,16 @@ type ModelParams struct {
 
 	// Optional pricing used for cost computation
 	Pricing ModelPricing `yaml:"pricing,omitempty"`
+
+	// Reasoning family for this model (e.g., "deepseek", "qwen3", "gpt-oss")
+	// If empty, the model doesn't support reasoning mode
+	ReasoningFamily string `yaml:"reasoning_family,omitempty"`
+}
+
+// ReasoningFamilyConfig defines how a reasoning family handles reasoning mode
+type ReasoningFamilyConfig struct {
+	Type      string `yaml:"type"`      // "chat_template_kwargs" or "reasoning_effort"
+	Parameter string `yaml:"parameter"` // "thinking", "enable_thinking", "reasoning_effort", etc.
 }
 
 // PIIPolicy represents the PII (Personally Identifiable Information) policy for a model
@@ -263,6 +277,41 @@ type Category struct {
 	ModelScores          []ModelScore `yaml:"model_scores"`
 }
 
+// Legacy types - can be removed once migration is complete
+
+// GetModelReasoningFamily returns the reasoning family configuration for a given model name
+func (rc *RouterConfig) GetModelReasoningFamily(modelName string) *ReasoningFamilyConfig {
+	if rc == nil || rc.ModelConfig == nil || rc.ReasoningFamilies == nil {
+		return nil
+	}
+
+	// Look up the model in model_config
+	modelParams, exists := rc.ModelConfig[modelName]
+	if !exists || modelParams.ReasoningFamily == "" {
+		return nil
+	}
+
+	// Look up the reasoning family configuration
+	familyConfig, exists := rc.ReasoningFamilies[modelParams.ReasoningFamily]
+	if !exists {
+		return nil
+	}
+
+	return &familyConfig
+}
+
+// Legacy functions - can be removed once migration is complete
+
+// contains checks if a slice contains a string
+func contains(slice []string, item string) bool {
+	for _, s := range slice {
+		if s == item {
+			return true
+		}
+	}
+	return false
+}
+
 var (
 	config     *RouterConfig
 	configOnce sync.Once
@@ -390,14 +439,7 @@ func (c *RouterConfig) IsModelAllowedForPIIType(modelName string, piiType string
 	}
 
 	// If allow_by_default is false, only explicitly allowed PII types are permitted
-	for _, allowedPII := range policy.PIITypes {
-		if allowedPII == piiType {
-			return true
-		}
-	}
-
-	// PII type not found in allowed list and allow_by_default is false
-	return false
+	return slices.Contains(policy.PIITypes, piiType)
 }
 
 // IsModelAllowedForPIITypes checks if a model is allowed to process any of the given PII types
@@ -438,23 +480,17 @@ func (c *RouterConfig) GetEndpointsForModel(modelName string) []VLLMEndpoint {
 
 	// First, find all endpoints that can serve this model
 	for _, endpoint := range c.VLLMEndpoints {
-		for _, model := range endpoint.Models {
-			if model == modelName {
-				availableEndpoints = append(availableEndpoints, endpoint)
-				break
-			}
+		if slices.Contains(endpoint.Models, modelName) {
+			availableEndpoints = append(availableEndpoints, endpoint)
 		}
 	}
 
 	// Check if model has preferred endpoints configured
 	if modelConfig, ok := c.ModelConfig[modelName]; ok && len(modelConfig.PreferredEndpoints) > 0 {
 		var preferredEndpoints []VLLMEndpoint
 		for _, endpoint := range availableEndpoints {
-			for _, preferredName := range modelConfig.PreferredEndpoints {
-				if endpoint.Name == preferredName {
-					preferredEndpoints = append(preferredEndpoints, endpoint)
-					break
-				}
+			if slices.Contains(modelConfig.PreferredEndpoints, endpoint.Name) {
+				preferredEndpoints = append(preferredEndpoints, endpoint)
 			}
 		}
 		if len(preferredEndpoints) > 0 {
 
@@ -199,10 +199,42 @@ requestBody := buildRequestBody(model, messages, useReasoning, stream)
 func TestAddReasoningModeToRequestBody(t *testing.T) {
 	fmt.Println("=== Testing addReasoningModeToRequestBody Function ===")
 
-	// Create a mock router with minimal config
-	router := &OpenAIRouter{}
+	// Create a mock router with family-based reasoning config
+	router := &OpenAIRouter{
+		Config: &config.RouterConfig{
+			DefaultReasoningEffort: "medium",
+			ReasoningFamilies: map[string]config.ReasoningFamilyConfig{
+				"deepseek": {
+					Type:      "chat_template_kwargs",
+					Parameter: "thinking",
+				},
+				"qwen3": {
+					Type:      "chat_template_kwargs",
+					Parameter: "enable_thinking",
+				},
+				"gpt-oss": {
+					Type:      "reasoning_effort",
+					Parameter: "reasoning_effort",
+				},
+			},
+			ModelConfig: map[string]config.ModelParams{
+				"deepseek-v31": {
+					ReasoningFamily: "deepseek",
+				},
+				"qwen3-model": {
+					ReasoningFamily: "qwen3",
+				},
+				"gpt-oss-model": {
+					ReasoningFamily: "gpt-oss",
+				},
+				"phi4": {
+					// No reasoning family - doesn't support reasoning
+				},
+			},
+		},
+	}
 
-	// Test case 1: Basic request body
+	// Test case 1: Basic request body with model that has NO reasoning support (phi4)
 	originalRequest := map[string]interface{}{
 		"model": "phi4",
 		"messages": []map[string]interface{}{
@@ -235,29 +267,76 @@ func TestAddReasoningModeToRequestBody(t *testing.T) {
 		return
 	}
 
-	// Check if chat_template_kwargs was added
-	if chatTemplateKwargs, exists := modifiedRequest["chat_template_kwargs"]; exists {
+	// Check that chat_template_kwargs was NOT added for phi4 (since it has no reasoning_family)
+	if _, exists := modifiedRequest["chat_template_kwargs"]; exists {
+		fmt.Println("ERROR: chat_template_kwargs should not be added for phi4 (no reasoning family configured)")
+	} else {
+		fmt.Println("SUCCESS: chat_template_kwargs correctly not added for phi4 (no reasoning support)")
+	}
+
+	// Check that reasoning_effort was NOT added for phi4
+	if _, exists := modifiedRequest["reasoning_effort"]; exists {
+		fmt.Println("ERROR: reasoning_effort should not be added for phi4 (no reasoning family configured)")
+	} else {
+		fmt.Println("SUCCESS: reasoning_effort correctly not added for phi4 (no reasoning support)")
+	}
+
+	// Test case 2: Request with model that HAS reasoning support (deepseek-v31)
+	fmt.Println("\n--- Test Case 2: Model with reasoning support ---")
+	deepseekRequest := map[string]interface{}{
+		"model": "deepseek-v31",
+		"messages": []map[string]interface{}{
+			{"role": "user", "content": "What is 2 + 2?"},
+		},
+		"stream": false,
+	}
+
+	deepseekBody, err := json.Marshal(deepseekRequest)
+	if err != nil {
+		fmt.Printf("Error marshaling deepseek request: %v\n", err)
+		return
+	}
+
+	fmt.Printf("Original deepseek request:\n%s\n\n", string(deepseekBody))
+
+	// Add reasoning mode to DeepSeek model
+	modifiedDeepseekBody, err := router.setReasoningModeToRequestBody(deepseekBody, true, "math")
+	if err != nil {
+		fmt.Printf("Error adding reasoning mode to deepseek: %v\n", err)
+		return
+	}
+
+	fmt.Printf("Modified deepseek request with reasoning:\n%s\n\n", string(modifiedDeepseekBody))
+
+	var modifiedDeepseekRequest map[string]interface{}
+	if err := json.Unmarshal(modifiedDeepseekBody, &modifiedDeepseekRequest); err != nil {
+		fmt.Printf("Error unmarshaling modified deepseek request: %v\n", err)
+		return
+	}
+
+	// Check that chat_template_kwargs WAS added for deepseek-v31
+	if chatTemplateKwargs, exists := modifiedDeepseekRequest["chat_template_kwargs"]; exists {
 		if kwargs, ok := chatTemplateKwargs.(map[string]interface{}); ok {
 			if thinking, hasThinking := kwargs["thinking"]; hasThinking {
 				if thinkingBool, isBool := thinking.(bool); isBool && thinkingBool {
-					fmt.Println("✅ SUCCESS: chat_template_kwargs with thinking: true was correctly added")
+					fmt.Println("SUCCESS: chat_template_kwargs with thinking: true correctly added for deepseek-v31")
 				} else {
-					fmt.Printf("❌ ERROR: thinking value is not true, got: %v\n", thinking)
+					fmt.Printf("ERROR: thinking value is not true for deepseek-v31, got: %v\n", thinking)
 				}
 			} else {
-				fmt.Println("❌ ERROR: thinking field not found in chat_template_kwargs")
+				fmt.Println("ERROR: thinking field not found in chat_template_kwargs for deepseek-v31")
 			}
 		} else {
-			fmt.Printf("❌ ERROR: chat_template_kwargs is not a map, got: %T\n", chatTemplateKwargs)
+			fmt.Printf("ERROR: chat_template_kwargs is not a map for deepseek-v31, got: %T\n", chatTemplateKwargs)
 		}
 	} else {
-		fmt.Println("❌ ERROR: chat_template_kwargs not found in modified request")
+		fmt.Println("ERROR: chat_template_kwargs not found for deepseek-v31 (should be present)")
 	}
 
-	// Test case 2: Request with existing fields
-	fmt.Println("\n--- Test Case 2: Request with existing fields ---")
+	// Test case 3: Request with existing fields
+	fmt.Println("\n--- Test Case 3: Request with existing fields ---")
 	complexRequest := map[string]interface{}{
-		"model": "phi4",
+		"model": "deepseek-v31",
 		"messages": []map[string]interface{}{
 			{"role": "system", "content": "You are a helpful assistant"},
 			{"role": "user", "content": "Solve x^2 + 5x + 6 = 0"},
@@ -290,20 +369,20 @@ func TestAddReasoningModeToRequestBody(t *testing.T) {
 	allFieldsPreserved := true
 	for _, field := range originalFields {
 		if _, exists := modifiedComplexRequest[field]; !exists {
-			fmt.Printf("❌ ERROR: Original field '%s' was lost\n", field)
+			fmt.Printf("ERROR: Original field '%s' was lost\n", field)
 			allFieldsPreserved = false
 		}
 	}
 
 	if allFieldsPreserved {
-		fmt.Println("✅ SUCCESS: All original fields preserved")
+		fmt.Println("SUCCESS: All original fields preserved")
 	}
 
-	// Verify chat_template_kwargs was added
+	// Verify chat_template_kwargs was added for deepseek-v31
 	if _, exists := modifiedComplexRequest["chat_template_kwargs"]; exists {
-		fmt.Println("✅ SUCCESS: chat_template_kwargs added to complex request")
-		fmt.Printf("Final modified request:\n%s\n", string(modifiedComplexBody))
+		fmt.Println("SUCCESS: chat_template_kwargs added to complex deepseek request")
+		fmt.Printf("Final modified deepseek request:\n%s\n", string(modifiedComplexBody))
 	} else {
-		fmt.Println("❌ ERROR: chat_template_kwargs not added to complex request")
+		fmt.Println("ERROR: chat_template_kwargs not added to complex deepseek request")
 	}
 }
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-name: Run Test`
	`1`	`+name: Test And Build`
`2`	`2`
`3`	`3`	`on:`
`4`	`4`	`schedule:`