From 1f330469318813f53d71328aa747eb3fb4ee4410 Mon Sep 17 00:00:00 2001
From: Huamin Chen <hchen@redhat.com>
Date: Fri, 19 Sep 2025 16:19:36 +0000
Subject: [PATCH] refactor: move use_reasoning to the model level from the
 category level to support non-reasoning models

Signed-off-by: Huamin Chen <hchen@redhat.com>
---
 config/config.yaml                            | 285 +++++-------------
 src/semantic-router/pkg/config/config.go      |  70 ++++-
 src/semantic-router/pkg/config/config_test.go |  34 ++-
 .../pkg/extproc/reason_mode_config_test.go    |  57 +++-
 .../pkg/extproc/reason_mode_selector.go       |  50 +--
 .../pkg/extproc/reasoning_integration_test.go |  38 ++-
 .../pkg/utils/classification/classifier.go    |   8 +-
 .../utils/classification/classifier_test.go   |  12 +-
 website/docs/getting-started/configuration.md |  41 ++-
 9 files changed, 317 insertions(+), 278 deletions(-)

diff --git a/config/config.yaml b/config/config.yaml
index 184ddfa4..10094fd2 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -2,34 +2,22 @@ bert_model:
   model_id: sentence-transformers/all-MiniLM-L12-v2
   threshold: 0.6
   use_cpu: true
+
 semantic_cache:
   enabled: true
   backend_type: "memory"  # Options: "memory" or "milvus"
   similarity_threshold: 0.8
   max_entries: 1000  # Only applies to memory backend
   ttl_seconds: 3600
-  eviction_policy: "fifo"  # "fifo", "lru", "lfu", currently only supports memory backend
-
-  # For production environments, use Milvus for scalable caching:
-  # backend_type: "milvus"
-  # backend_config_path: "config/cache/milvus.yaml"
+  eviction_policy: "fifo"  
 
-  # Development/Testing: Use in-memory cache (current configuration)
-  # - Fast startup and no external dependencies
-  # - Limited to single instance scaling
-  # - Data lost on restart
-
-  # Production: Use Milvus vector database
-  # - Horizontally scalable and persistent
-  # - Supports distributed deployments
-  # - Requires Milvus cluster setup
-  # - To enable: uncomment the lines above and install Milvus dependencies
 tools:
-  enabled: true  # Set to true to enable automatic tool selection
-  top_k: 3        # Number of most relevant tools to select
-  similarity_threshold: 0.2  # Threshold for tool similarity
+  enabled: true
+  top_k: 3
+  similarity_threshold: 0.2
   tools_db_path: "config/tools_db.json"
-  fallback_to_empty: true  # If true, return no tools on failure; if false, return error
+  fallback_to_empty: true
+
 prompt_guard:
   enabled: true
   use_modernbert: true
@@ -38,258 +26,114 @@ prompt_guard:
   use_cpu: true
   jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json"
 
-# vLLM Endpoints Configuration - supports multiple endpoints, each can serve multiple models
+# vLLM Endpoints Configuration
 vllm_endpoints:
   - name: "endpoint1"
     address: "127.0.0.1"
-    port: 11434
-    models:
-      - "phi4"
-      - "gemma3:27b"
-    weight: 1  # Load balancing weight
-    health_check_path: "/health"  # Optional health check endpoint
-  - name: "endpoint2"
-    address: "127.0.0.1"
-    port: 11434
+    port: 8000
     models:
-      - "mistral-small3.1"
+      - "openai/gpt-oss-20b"
     weight: 1
     health_check_path: "/health"
-  - name: "endpoint3"
-    address: "127.0.0.1"
-    port: 11434
-    models:
-      - "phi4"  # Same model can be served by multiple endpoints for redundancy
-      - "mistral-small3.1"
-    weight: 2  # Higher weight for more powerful endpoint
 
 model_config:
-  phi4:
-    pricing:
-      currency: USD
-      prompt_per_1m: 0.07
-      completion_per_1m: 0.35
-    pii_policy:
-      allow_by_default: false  # Deny all PII by default
-      pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"]  # Only allow these specific PII types
-    # Specify which endpoints can serve this model (optional - if not specified, uses all endpoints that list this model)
-    preferred_endpoints: ["endpoint1", "endpoint3"]
-    # Reasoning family - phi4 doesn't support reasoning, so omit this field
-
-  # Example: DeepSeek model with custom name
-  "ds-v31-custom":
-    reasoning_family: "deepseek"  # This model uses DeepSeek reasoning syntax
+  "openai/gpt-oss-20b":
+    reasoning_family: "gpt-oss"  # This model uses GPT-OSS reasoning syntax
     preferred_endpoints: ["endpoint1"]
     pii_policy:
       allow_by_default: true
 
-  # Example: Qwen3 model with custom name
-  "my-qwen3-model":
-    reasoning_family: "qwen3"     # This model uses Qwen3 reasoning syntax
-    preferred_endpoints: ["endpoint2"]
-    pii_policy:
-      allow_by_default: true
-
-  # Example: GPT-OSS model with custom name
-  "custom-gpt-oss":
-    reasoning_family: "gpt-oss"   # This model uses GPT-OSS reasoning syntax
-    preferred_endpoints: ["endpoint1"]
-    pii_policy:
-      allow_by_default: true
-  gemma3:27b:
-    pricing:
-      currency: USD
-      prompt_per_1m: 0.067
-      completion_per_1m: 0.267
-    pii_policy:
-      allow_by_default: false  # Deny all PII by default
-      pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"]  # Only allow these specific PII types
-    preferred_endpoints: ["endpoint1"]
-  "mistral-small3.1":
-    pricing:
-      currency: USD
-      prompt_per_1m: 0.1
-      completion_per_1m: 0.3
-    pii_policy:
-      allow_by_default: false  # Deny all PII by default
-      pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"]  # Only allow these specific PII types
-    preferred_endpoints: ["endpoint2", "endpoint3"]
-
-# Classifier configuration for text classification
+# Classifier configuration
 classifier:
   category_model:
-    model_id: "models/category_classifier_modernbert-base_model"  # TODO: Use local model for now before the code can download the entire model from huggingface
+    model_id: "models/category_classifier_modernbert-base_model"
     use_modernbert: true
     threshold: 0.6
     use_cpu: true
     category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
   pii_model:
-    model_id: "models/pii_classifier_modernbert-base_presidio_token_model"  # TODO: Use local model for now before the code can download the entire model from huggingface
+    model_id: "models/pii_classifier_modernbert-base_presidio_token_model"
     use_modernbert: true
     threshold: 0.7
     use_cpu: true
     pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
+
+# Categories with new use_reasoning field structure
 categories:
   - name: business
-    use_reasoning: false
-    reasoning_description: "Business content is typically conversational"
-    reasoning_effort: low  # Business conversations need low reasoning effort
     model_scores:
-      - model: phi4
-        score: 0.8
-      - model: gemma3:27b
-        score: 0.4
-      - model: mistral-small3.1
-        score: 0.2
+      - model: openai/gpt-oss-20b
+        score: 0.7
+        use_reasoning: false  # Business performs better without reasoning
   - name: law
-    use_reasoning: false
-    reasoning_description: "Legal content is typically explanatory"
     model_scores:
-      - model: gemma3:27b
-        score: 0.8
-      - model: phi4
-        score: 0.6
-      - model: mistral-small3.1
+      - model: openai/gpt-oss-20b
         score: 0.4
+        use_reasoning: false
   - name: psychology
-    use_reasoning: false
-    reasoning_description: "Psychology content is usually explanatory"
     model_scores:
-      - model: mistral-small3.1
+      - model: openai/gpt-oss-20b
         score: 0.6
-      - model: gemma3:27b
-        score: 0.4
-      - model: phi4
-        score: 0.4
+        use_reasoning: false
   - name: biology
-    use_reasoning: true
-    reasoning_description: "Biological processes benefit from structured analysis"
     model_scores:
-      - model: mistral-small3.1
-        score: 0.8
-      - model: gemma3:27b
-        score: 0.6
-      - model: phi4
-        score: 0.2
+      - model: openai/gpt-oss-20b
+        score: 0.9
+        use_reasoning: false
   - name: chemistry
-    use_reasoning: true
-    reasoning_description: "Chemical reactions and formulas require systematic thinking"
-    reasoning_effort: high  # Chemistry requires high reasoning effort
     model_scores:
-      - model: mistral-small3.1
-        score: 0.8
-      - model: gemma3:27b
-        score: 0.6
-      - model: phi4
+      - model: openai/gpt-oss-20b
         score: 0.6
+        use_reasoning: true  # Enable reasoning for complex chemistry
   - name: history
-    use_reasoning: false
-    reasoning_description: "Historical content is narrative-based"
     model_scores:
-      - model: mistral-small3.1
-        score: 0.8
-      - model: phi4
-        score: 0.6
-      - model: gemma3:27b
-        score: 0.4
+      - model: openai/gpt-oss-20b
+        score: 0.7
+        use_reasoning: false
   - name: other
-    use_reasoning: false
-    reasoning_description: "General content doesn't require reasoning"
     model_scores:
-      - model: gemma3:27b
-        score: 0.8
-      - model: phi4
-        score: 0.6
-      - model: mistral-small3.1
-        score: 0.6
+      - model: openai/gpt-oss-20b
+        score: 0.7
+        use_reasoning: false
   - name: health
-    use_reasoning: false
-    reasoning_description: "Health information is typically informational"
     model_scores:
-      - model: gemma3:27b
-        score: 0.8
-      - model: phi4
-        score: 0.8
-      - model: mistral-small3.1
-        score: 0.6
+      - model: openai/gpt-oss-20b
+        score: 0.5
+        use_reasoning: false
   - name: economics
-    use_reasoning: false
-    reasoning_description: "Economic discussions are usually explanatory"
     model_scores:
-      - model: gemma3:27b
-        score: 0.8
-      - model: mistral-small3.1
-        score: 0.8
-      - model: phi4
-        score: 0.0
+      - model: openai/gpt-oss-20b
+        score: 1.0
+        use_reasoning: false
   - name: math
-    use_reasoning: true
-    reasoning_description: "Mathematical problems require step-by-step reasoning"
-    reasoning_effort: high  # Math problems need high reasoning effort
     model_scores:
-      - model: phi4
+      - model: openai/gpt-oss-20b
         score: 1.0
-      - model: mistral-small3.1
-        score: 0.8
-      - model: gemma3:27b
-        score: 0.6
+        use_reasoning: true  # Enable reasoning for complex math
   - name: physics
-    use_reasoning: true
-    reasoning_description: "Physics concepts need logical analysis"
     model_scores:
-      - model: gemma3:27b
-        score: 0.4
-      - model: phi4
-        score: 0.4
-      - model: mistral-small3.1
-        score: 0.4
+      - model: openai/gpt-oss-20b
+        score: 0.7
+        use_reasoning: true  # Enable reasoning for physics
   - name: computer science
-    use_reasoning: true
-    reasoning_description: "Programming and algorithms need logical reasoning"
     model_scores:
-      - model: gemma3:27b
+      - model: openai/gpt-oss-20b
         score: 0.6
-      - model: mistral-small3.1
-        score: 0.6
-      - model: phi4
-        score: 0.0
+        use_reasoning: false
   - name: philosophy
-    use_reasoning: false
-    reasoning_description: "Philosophical discussions are conversational"
     model_scores:
-      - model: phi4
-        score: 0.6
-      - model: gemma3:27b
-        score: 0.2
-      - model: mistral-small3.1
-        score: 0.2
+      - model: openai/gpt-oss-20b
+        score: 0.5
+        use_reasoning: false
   - name: engineering
-    use_reasoning: true
-    reasoning_description: "Engineering problems require systematic problem-solving"
     model_scores:
-      - model: gemma3:27b
-        score: 0.6
-      - model: mistral-small3.1
-        score: 0.6
-      - model: phi4
-        score: 0.2
-
-default_model: mistral-small3.1
+      - model: openai/gpt-oss-20b
+        score: 0.7
+        use_reasoning: false
 
-# API Configuration
-api:
-  batch_classification:
-    # Metrics configuration for monitoring batch classification performance
-    metrics:
-      enabled: true              # Enable comprehensive metrics collection
-      detailed_goroutine_tracking: true  # Track individual goroutine lifecycle
-      high_resolution_timing: false      # Use nanosecond precision timing
-      sample_rate: 1.0                   # Collect metrics for all requests (1.0 = 100%, 0.5 = 50%)
-      # Histogram buckets for metrics (directly configure what you need)
-      duration_buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]
-      size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]
+default_model: openai/gpt-oss-20b
 
-# Reasoning family configurations - define how different model families handle reasoning syntax
+# Reasoning family configurations
 reasoning_families:
   deepseek:
     type: "chat_template_kwargs"
@@ -302,10 +146,23 @@ reasoning_families:
   gpt-oss:
     type: "reasoning_effort"
     parameter: "reasoning_effort"
-
   gpt:
     type: "reasoning_effort"
     parameter: "reasoning_effort"
 
 # Global default reasoning effort level
-default_reasoning_effort: medium  # Default reasoning effort level (low, medium, high)
+default_reasoning_effort: high
+
+# API Configuration
+api:
+  batch_classification:
+    max_batch_size: 100
+    concurrency_threshold: 5
+    max_concurrency: 8
+    metrics:
+      enabled: true
+      detailed_goroutine_tracking: true
+      high_resolution_timing: false
+      sample_rate: 1.0
+      duration_buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]
+      size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]
diff --git a/src/semantic-router/pkg/config/config.go b/src/semantic-router/pkg/config/config.go
index 9a3bfb70..cf4aafa9 100644
--- a/src/semantic-router/pkg/config/config.go
+++ b/src/semantic-router/pkg/config/config.go
@@ -261,14 +261,14 @@ func (c *RouterConfig) GetCacheSimilarityThreshold() float32 {
 
 // Category represents a category for routing queries
 type ModelScore struct {
-	Model string  `yaml:"model"`
-	Score float64 `yaml:"score"`
+	Model        string  `yaml:"model"`
+	Score        float64 `yaml:"score"`
+	UseReasoning *bool   `yaml:"use_reasoning"` // Pointer to detect missing field
 }
 
 type Category struct {
 	Name                 string       `yaml:"name"`
 	Description          string       `yaml:"description,omitempty"`
-	UseReasoning         bool         `yaml:"use_reasoning"`
 	ReasoningDescription string       `yaml:"reasoning_description,omitempty"`
 	ReasoningEffort      string       `yaml:"reasoning_effort,omitempty"` // Configurable reasoning effort level (low, medium, high)
 	ModelScores          []ModelScore `yaml:"model_scores"`
@@ -336,6 +336,36 @@ func LoadConfig(configPath string) (*RouterConfig, error) {
 	return config, nil
 }
 
+// BoolPtr returns a pointer to a bool value (helper for tests and config)
+func BoolPtr(b bool) *bool {
+	return &b
+}
+
+// validateConfigStructure performs additional validation on the parsed config
+func validateConfigStructure(cfg *RouterConfig) error {
+	// Ensure all categories have at least one model with scores
+	for _, category := range cfg.Categories {
+		if len(category.ModelScores) == 0 {
+			return fmt.Errorf("category '%s' has no model_scores defined - each category must have at least one model", category.Name)
+		}
+
+		// Validate each model score has the required fields
+		for i, modelScore := range category.ModelScores {
+			if modelScore.Model == "" {
+				return fmt.Errorf("category '%s', model_scores[%d]: model name cannot be empty", category.Name, i)
+			}
+			if modelScore.Score <= 0 {
+				return fmt.Errorf("category '%s', model '%s': score must be greater than 0, got %f", category.Name, modelScore.Model, modelScore.Score)
+			}
+			if modelScore.UseReasoning == nil {
+				return fmt.Errorf("category '%s', model '%s': missing required field 'use_reasoning'", category.Name, modelScore.Model)
+			}
+		}
+	}
+
+	return nil
+}
+
 // ParseConfigFile parses the YAML config file without touching the global cache.
 func ParseConfigFile(configPath string) (*RouterConfig, error) {
 	// Resolve symlinks to handle Kubernetes ConfigMap mounts
@@ -347,10 +377,17 @@ func ParseConfigFile(configPath string) (*RouterConfig, error) {
 	if err != nil {
 		return nil, fmt.Errorf("failed to read config file: %w", err)
 	}
+
 	cfg := &RouterConfig{}
 	if err := yaml.Unmarshal(data, cfg); err != nil {
 		return nil, fmt.Errorf("failed to parse config file: %w", err)
 	}
+
+	// Validation after parsing
+	if err := validateConfigStructure(cfg); err != nil {
+		return nil, err
+	}
+
 	return cfg, nil
 }
 
@@ -573,6 +610,33 @@ func (c *RouterConfig) SelectBestEndpointAddressForModel(modelName string) (stri
 	return fmt.Sprintf("%s:%d", bestEndpoint.Address, bestEndpoint.Port), true
 }
 
+// GetModelReasoningForCategory returns whether a specific model supports reasoning in a given category
+func (c *RouterConfig) GetModelReasoningForCategory(categoryName string, modelName string) bool {
+	for _, category := range c.Categories {
+		if category.Name == categoryName {
+			for _, modelScore := range category.ModelScores {
+				if modelScore.Model == modelName {
+					return modelScore.UseReasoning != nil && *modelScore.UseReasoning
+				}
+			}
+		}
+	}
+	return false // Default to false if category or model not found
+}
+
+// GetBestModelForCategory returns the best scoring model for a given category
+func (c *RouterConfig) GetBestModelForCategory(categoryName string) (string, bool) {
+	for _, category := range c.Categories {
+		if category.Name == categoryName {
+			if len(category.ModelScores) > 0 {
+				useReasoning := category.ModelScores[0].UseReasoning != nil && *category.ModelScores[0].UseReasoning
+				return category.ModelScores[0].Model, useReasoning
+			}
+		}
+	}
+	return "", false // Return empty string and false if category not found or has no models
+}
+
 // ValidateEndpoints validates that all configured models have at least one endpoint
 func (c *RouterConfig) ValidateEndpoints() error {
 	// Get all models from categories
diff --git a/src/semantic-router/pkg/config/config_test.go b/src/semantic-router/pkg/config/config_test.go
index 5a820a09..839fc682 100644
--- a/src/semantic-router/pkg/config/config_test.go
+++ b/src/semantic-router/pkg/config/config_test.go
@@ -67,8 +67,10 @@ categories:
     model_scores:
       - model: "model-a"
         score: 0.9
+        use_reasoning: true
       - model: "model-b"
         score: 0.8
+        use_reasoning: false
 
 default_model: "model-b"
 
@@ -339,12 +341,15 @@ categories:
     model_scores:
       - model: "model1"
         score: 0.9
+        use_reasoning: true
       - model: "model2"
         score: 0.8
+        use_reasoning: false
   - name: "category2"
     model_scores:
       - model: "model3"
         score: 0.95
+        use_reasoning: true
 default_model: "default-model"
 `
 			err := os.WriteFile(configFile, []byte(configContent), 0o644)
@@ -387,7 +392,10 @@ default_model: "default-model"
 				configContent := `
 categories:
   - name: "empty_category"
-    model_scores: []
+    model_scores:
+      - model: "fallback-model"
+        score: 0.5
+        use_reasoning: false
 default_model: "fallback-model"
 `
 				err := os.WriteFile(configFile, []byte(configContent), 0o644)
@@ -640,8 +648,16 @@ prompt_guard:
 categories:
   - name: "category1"
     description: "Description for category 1"
+    model_scores:
+      - model: "model1"
+        score: 0.9
+        use_reasoning: true
   - name: "category2"
     description: "Description for category 2"
+    model_scores:
+      - model: "model2"
+        score: 0.8
+        use_reasoning: false
 `
 				err := os.WriteFile(configFile, []byte(configContent), 0o644)
 				Expect(err).NotTo(HaveOccurred())
@@ -666,8 +682,16 @@ categories:
 categories:
   - name: "category1"
     description: "Has description"
+    model_scores:
+      - model: "model1"
+        score: 0.9
+        use_reasoning: true
   - name: "category2"
     # No description field
+    model_scores:
+      - model: "model2"
+        score: 0.8
+        use_reasoning: false
 `
 				err := os.WriteFile(configFile, []byte(configContent), 0o644)
 				Expect(err).NotTo(HaveOccurred())
@@ -743,6 +767,10 @@ default_model: "model-with-hyphens_and_underscores"
 categories:
   - name: "category with spaces"
     description: "Description with special chars: @#$%^&*()"
+    model_scores:
+      - model: "model-with-hyphens_and_underscores"
+        score: 0.9
+        use_reasoning: true
 `
 			err := os.WriteFile(configFile, []byte(configContent), 0o644)
 			Expect(err).NotTo(HaveOccurred())
@@ -794,8 +822,10 @@ categories:
     model_scores:
       - model: "model-a"
         score: 0.9
+        use_reasoning: true
       - model: "model-b"
         score: 0.8
+        use_reasoning: false
 
 default_model: "model-b"
 `
@@ -932,6 +962,7 @@ categories:
     model_scores:
       - model: "missing-model"
         score: 0.9
+        use_reasoning: true
 
 default_model: "existing-model"
 `
@@ -1210,6 +1241,7 @@ categories:
     model_scores:
       - model: "gpt-4"
         score: 0.95
+        use_reasoning: true
 
 default_model: "gpt-4"
 `
diff --git a/src/semantic-router/pkg/extproc/reason_mode_config_test.go b/src/semantic-router/pkg/extproc/reason_mode_config_test.go
index 3eccfa4b..99b5639c 100644
--- a/src/semantic-router/pkg/extproc/reason_mode_config_test.go
+++ b/src/semantic-router/pkg/extproc/reason_mode_config_test.go
@@ -18,18 +18,24 @@ func TestReasoningModeConfiguration(t *testing.T) {
 		Categories: []config.Category{
 			{
 				Name:                 "math",
-				UseReasoning:         true,
 				ReasoningDescription: "Mathematical problems require step-by-step reasoning",
+				ModelScores: []config.ModelScore{
+					{Model: "deepseek-v31", Score: 0.9, UseReasoning: config.BoolPtr(true)},
+				},
 			},
 			{
 				Name:                 "business",
-				UseReasoning:         false,
 				ReasoningDescription: "Business content is typically conversational",
+				ModelScores: []config.ModelScore{
+					{Model: "phi4", Score: 0.8, UseReasoning: config.BoolPtr(false)},
+				},
 			},
 			{
 				Name:                 "biology",
-				UseReasoning:         true,
 				ReasoningDescription: "Biological processes benefit from structured analysis",
+				ModelScores: []config.ModelScore{
+					{Model: "deepseek-v31", Score: 0.9, UseReasoning: config.BoolPtr(true)},
+				},
 			},
 		},
 	}
@@ -40,12 +46,16 @@ func TestReasoningModeConfiguration(t *testing.T) {
 	fmt.Println("--- Reasoning Mode Configuration ---")
 	for _, category := range cfg.Categories {
 		reasoningStatus := "DISABLED"
-		if category.UseReasoning {
-			reasoningStatus = "ENABLED"
+		bestModel := "no-model"
+		if len(category.ModelScores) > 0 {
+			bestModel = category.ModelScores[0].Model
+			if category.ModelScores[0].UseReasoning != nil && *category.ModelScores[0].UseReasoning {
+				reasoningStatus = "ENABLED"
+			}
 		}
 
-		fmt.Printf("Category: %-15s | Reasoning: %-8s | %s\n",
-			category.Name, reasoningStatus, category.ReasoningDescription)
+		fmt.Printf("Category: %-15s | Model: %-12s | Reasoning: %-8s | %s\n",
+			category.Name, bestModel, reasoningStatus, category.ReasoningDescription)
 	}
 
 	// Test queries with expected categories
@@ -72,7 +82,9 @@ func TestReasoningModeConfiguration(t *testing.T) {
 
 		for _, category := range cfg.Categories {
 			if strings.EqualFold(category.Name, test.category) {
-				useReasoning = category.UseReasoning
+				if len(category.ModelScores) > 0 && category.ModelScores[0].UseReasoning != nil {
+					useReasoning = *category.ModelScores[0].UseReasoning
+				}
 				reasoningDesc = category.ReasoningDescription
 				found = true
 				break
@@ -117,18 +129,21 @@ func TestReasoningModeConfiguration(t *testing.T) {
 	fmt.Print(`
 categories:
 - name: math
-  use_reasoning: true
   reasoning_description: "Mathematical problems require step-by-step reasoning"
   model_scores:
+  - model: deepseek-v31
+    score: 0.9
+    use_reasoning: true
   - model: phi4
-    score: 1.0
+    score: 0.7
+    use_reasoning: false
 
 - name: business
-  use_reasoning: false
   reasoning_description: "Business content is typically conversational"
   model_scores:
   - model: phi4
     score: 0.8
+    use_reasoning: false
 `)
 }
 
@@ -143,7 +158,12 @@ func GetReasoningConfigurationSummary(cfg *config.RouterConfig) map[string]inter
 	categoriesWithoutReasoning := []string{}
 
 	for _, category := range cfg.Categories {
-		if category.UseReasoning {
+		bestModelReasoning := false
+		if len(category.ModelScores) > 0 && category.ModelScores[0].UseReasoning != nil {
+			bestModelReasoning = *category.ModelScores[0].UseReasoning
+		}
+
+		if bestModelReasoning {
 			reasoningEnabled++
 			categoriesWithReasoning = append(categoriesWithReasoning, category.Name)
 		} else {
@@ -170,12 +190,21 @@ func DemonstrateConfigurationUsage() {
 	fmt.Print(`
 categories:
 - name: math
-  use_reasoning: true
   reasoning_description: "Mathematical problems require step-by-step reasoning"
+  model_scores:
+  - model: deepseek-v31
+    score: 0.9
+    use_reasoning: true
+  - model: phi4
+    score: 0.7
+    use_reasoning: false
 
 - name: creative_writing
-  use_reasoning: false
   reasoning_description: "Creative content flows better without structured reasoning"
+  model_scores:
+  - model: phi4
+    score: 0.8
+    use_reasoning: false
 `)
 
 	fmt.Println("\n2. Use in Go code:")
diff --git a/src/semantic-router/pkg/extproc/reason_mode_selector.go b/src/semantic-router/pkg/extproc/reason_mode_selector.go
index 58f880a3..f91cfad2 100644
--- a/src/semantic-router/pkg/extproc/reason_mode_selector.go
+++ b/src/semantic-router/pkg/extproc/reason_mode_selector.go
@@ -32,16 +32,24 @@ func (r *OpenAIRouter) getReasoningModeAndCategory(query string) (bool, string)
 	// Normalize category name for consistent lookup
 	normalizedCategory := strings.ToLower(strings.TrimSpace(categoryName))
 
-	// Look up the category in the configuration
+	// Look up the category in the configuration and get the best model for it
 	for _, category := range r.Config.Categories {
 		if strings.EqualFold(category.Name, normalizedCategory) {
-			reasoningStatus := "DISABLED"
-			if category.UseReasoning {
-				reasoningStatus = "ENABLED"
+			// Get the best model for this category (first in the list)
+			if len(category.ModelScores) > 0 {
+				bestModel := category.ModelScores[0]
+				useReasoning := bestModel.UseReasoning != nil && *bestModel.UseReasoning
+				reasoningStatus := "DISABLED"
+				if useReasoning {
+					reasoningStatus = "ENABLED"
+				}
+				observability.Infof("Reasoning mode decision: Category '%s', Model '%s' → %s",
+					categoryName, bestModel.Model, reasoningStatus)
+				return useReasoning, categoryName
+			} else {
+				observability.Infof("Category '%s' has no models configured, defaulting to no reasoning mode", categoryName)
+				return false, categoryName
 			}
-			observability.Infof("Reasoning mode decision: Category '%s' → %s",
-				categoryName, reasoningStatus)
-			return category.UseReasoning, categoryName
 		}
 	}
 
@@ -233,25 +241,30 @@ func (r *OpenAIRouter) logReasoningConfiguration() {
 		return
 	}
 
-	reasoningEnabled := []string{}
-	reasoningDisabled := []string{}
+	categoriesWithReasoning := []string{}
+	categoriesWithoutReasoning := []string{}
 
 	for _, category := range r.Config.Categories {
-		if category.UseReasoning {
-			reasoningEnabled = append(reasoningEnabled, category.Name)
+		// Check if the best model (first model) for this category supports reasoning
+		if len(category.ModelScores) > 0 && category.ModelScores[0].UseReasoning != nil && *category.ModelScores[0].UseReasoning {
+			categoriesWithReasoning = append(categoriesWithReasoning, fmt.Sprintf("%s(%s)", category.Name, category.ModelScores[0].Model))
 		} else {
-			reasoningDisabled = append(reasoningDisabled, category.Name)
+			modelName := "no-models"
+			if len(category.ModelScores) > 0 {
+				modelName = category.ModelScores[0].Model
+			}
+			categoriesWithoutReasoning = append(categoriesWithoutReasoning, fmt.Sprintf("%s(%s)", category.Name, modelName))
 		}
 	}
 
 	observability.Infof("Reasoning configuration - Total categories: %d", len(r.Config.Categories))
 
-	if len(reasoningEnabled) > 0 {
-		observability.Infof("Reasoning ENABLED for categories (%d): %v", len(reasoningEnabled), reasoningEnabled)
+	if len(categoriesWithReasoning) > 0 {
+		observability.Infof("Reasoning ENABLED for categories (%d): %v", len(categoriesWithReasoning), categoriesWithReasoning)
 	}
 
-	if len(reasoningDisabled) > 0 {
-		observability.Infof("Reasoning DISABLED for categories (%d): %v", len(reasoningDisabled), reasoningDisabled)
+	if len(categoriesWithoutReasoning) > 0 {
+		observability.Infof("Reasoning DISABLED for categories (%d): %v", len(categoriesWithoutReasoning), categoriesWithoutReasoning)
 	}
 }
 
@@ -280,12 +293,13 @@ func (r *OpenAIRouter) LogReasoningConfigurationSummary() {
 
 	enabledCount := 0
 	for _, category := range r.Config.Categories {
-		if category.UseReasoning {
+		// Check if the best model (first model) for this category supports reasoning
+		if len(category.ModelScores) > 0 && category.ModelScores[0].UseReasoning != nil && *category.ModelScores[0].UseReasoning {
 			enabledCount++
 		}
 	}
 
-	observability.Infof("Reasoning mode summary: %d/%d categories have reasoning enabled", enabledCount, len(r.Config.Categories))
+	observability.Infof("Reasoning mode summary: %d/%d categories have reasoning enabled (based on best model)", enabledCount, len(r.Config.Categories))
 }
 
 // getReasoningEffort returns the reasoning effort level for a given category
diff --git a/src/semantic-router/pkg/extproc/reasoning_integration_test.go b/src/semantic-router/pkg/extproc/reasoning_integration_test.go
index e4c9dbc7..a45bec07 100644
--- a/src/semantic-router/pkg/extproc/reasoning_integration_test.go
+++ b/src/semantic-router/pkg/extproc/reasoning_integration_test.go
@@ -15,13 +15,19 @@ func TestReasoningModeIntegration(t *testing.T) {
 		Categories: []config.Category{
 			{
 				Name:                 "math",
-				UseReasoning:         true,
 				ReasoningDescription: "Mathematical problems require step-by-step reasoning",
+				ModelScores: []config.ModelScore{
+					{Model: "deepseek-v31", Score: 0.9, UseReasoning: config.BoolPtr(true)},
+					{Model: "phi4", Score: 0.7, UseReasoning: config.BoolPtr(false)},
+				},
 			},
 			{
 				Name:                 "business",
-				UseReasoning:         false,
 				ReasoningDescription: "Business content is typically conversational",
+				ModelScores: []config.ModelScore{
+					{Model: "phi4", Score: 0.8, UseReasoning: config.BoolPtr(false)},
+					{Model: "deepseek-v31", Score: 0.6, UseReasoning: config.BoolPtr(false)},
+				},
 			},
 		},
 		ReasoningFamilies: map[string]config.ReasoningFamilyConfig{
@@ -75,8 +81,8 @@ func TestReasoningModeIntegration(t *testing.T) {
 
 		// Test the configuration logic directly
 		mathCategory := cfg.Categories[0] // math category
-		if !mathCategory.UseReasoning {
-			t.Error("Math category should have UseReasoning set to true in configuration")
+		if len(mathCategory.ModelScores) == 0 || mathCategory.ModelScores[0].UseReasoning == nil || !*mathCategory.ModelScores[0].UseReasoning {
+			t.Error("Math category's best model should have UseReasoning set to true in configuration")
 		}
 	})
 
@@ -281,8 +287,10 @@ func TestReasoningModeConfigurationValidation(t *testing.T) {
 			name: "Math category with reasoning enabled",
 			category: config.Category{
 				Name:                 "math",
-				UseReasoning:         true,
 				ReasoningDescription: "Mathematical problems require step-by-step reasoning",
+				ModelScores: []config.ModelScore{
+					{Model: "deepseek-v31", Score: 0.9, UseReasoning: config.BoolPtr(true)},
+				},
 			},
 			expected: true,
 		},
@@ -290,8 +298,10 @@ func TestReasoningModeConfigurationValidation(t *testing.T) {
 			name: "Business category with reasoning disabled",
 			category: config.Category{
 				Name:                 "business",
-				UseReasoning:         false,
 				ReasoningDescription: "Business content is typically conversational",
+				ModelScores: []config.ModelScore{
+					{Model: "phi4", Score: 0.8, UseReasoning: config.BoolPtr(false)},
+				},
 			},
 			expected: false,
 		},
@@ -299,8 +309,10 @@ func TestReasoningModeConfigurationValidation(t *testing.T) {
 			name: "Science category with reasoning enabled",
 			category: config.Category{
 				Name:                 "science",
-				UseReasoning:         true,
 				ReasoningDescription: "Scientific concepts benefit from structured analysis",
+				ModelScores: []config.ModelScore{
+					{Model: "deepseek-v31", Score: 0.9, UseReasoning: config.BoolPtr(true)},
+				},
 			},
 			expected: true,
 		},
@@ -308,9 +320,15 @@ func TestReasoningModeConfigurationValidation(t *testing.T) {
 
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
-			if tc.category.UseReasoning != tc.expected {
-				t.Errorf("Expected UseReasoning %v for %s, got %v",
-					tc.expected, tc.category.Name, tc.category.UseReasoning)
+			// Check the best model's reasoning capability
+			bestModelReasoning := false
+			if len(tc.category.ModelScores) > 0 && tc.category.ModelScores[0].UseReasoning != nil {
+				bestModelReasoning = *tc.category.ModelScores[0].UseReasoning
+			}
+
+			if bestModelReasoning != tc.expected {
+				t.Errorf("Expected best model UseReasoning %v for %s, got %v",
+					tc.expected, tc.category.Name, bestModelReasoning)
 			}
 
 			// Verify description is not empty
diff --git a/src/semantic-router/pkg/utils/classification/classifier.go b/src/semantic-router/pkg/utils/classification/classifier.go
index 2ae684de..bf7b55c0 100644
--- a/src/semantic-router/pkg/utils/classification/classifier.go
+++ b/src/semantic-router/pkg/utils/classification/classifier.go
@@ -496,9 +496,15 @@ func (c *Classifier) ClassifyCategoryWithEntropy(text string) (string, float64,
 	}
 
 	// Build category reasoning map from configuration
+	// Use the best model's reasoning capability for each category
 	categoryReasoningMap := make(map[string]bool)
 	for _, category := range c.Config.Categories {
-		categoryReasoningMap[strings.ToLower(category.Name)] = category.UseReasoning
+		useReasoning := false
+		if len(category.ModelScores) > 0 && category.ModelScores[0].UseReasoning != nil {
+			// Use the first (best) model's reasoning capability
+			useReasoning = *category.ModelScores[0].UseReasoning
+		}
+		categoryReasoningMap[strings.ToLower(category.Name)] = useReasoning
 	}
 
 	// Make entropy-based reasoning decision
diff --git a/src/semantic-router/pkg/utils/classification/classifier_test.go b/src/semantic-router/pkg/utils/classification/classifier_test.go
index 9fc1e736..5606b3a1 100644
--- a/src/semantic-router/pkg/utils/classification/classifier_test.go
+++ b/src/semantic-router/pkg/utils/classification/classifier_test.go
@@ -212,9 +212,9 @@ var _ = Describe("category classification and model selection", func() {
 
 				// Add UseReasoning configuration for the categories
 				classifier.Config.Categories = []config.Category{
-					{Name: "technology", UseReasoning: false},
-					{Name: "sports", UseReasoning: false},
-					{Name: "politics", UseReasoning: true},
+					{Name: "technology", ModelScores: []config.ModelScore{{Model: "phi4", Score: 0.8, UseReasoning: config.BoolPtr(false)}}},
+					{Name: "sports", ModelScores: []config.ModelScore{{Model: "phi4", Score: 0.8, UseReasoning: config.BoolPtr(false)}}},
+					{Name: "politics", ModelScores: []config.ModelScore{{Model: "deepseek-v31", Score: 0.9, UseReasoning: config.BoolPtr(true)}}},
 				}
 
 				category, confidence, reasoningDecision, err := classifier.ClassifyCategoryWithEntropy("This is about politics")
@@ -237,9 +237,9 @@ var _ = Describe("category classification and model selection", func() {
 				}
 
 				classifier.Config.Categories = []config.Category{
-					{Name: "technology", UseReasoning: false},
-					{Name: "sports", UseReasoning: true},
-					{Name: "politics", UseReasoning: true},
+					{Name: "technology", ModelScores: []config.ModelScore{{Model: "phi4", Score: 0.8, UseReasoning: config.BoolPtr(false)}}},
+					{Name: "sports", ModelScores: []config.ModelScore{{Model: "deepseek-v31", Score: 0.9, UseReasoning: config.BoolPtr(true)}}},
+					{Name: "politics", ModelScores: []config.ModelScore{{Model: "deepseek-v31", Score: 0.9, UseReasoning: config.BoolPtr(true)}}},
 				}
 
 				category, confidence, reasoningDecision, err := classifier.ClassifyCategoryWithEntropy("Ambiguous text")
diff --git a/website/docs/getting-started/configuration.md b/website/docs/getting-started/configuration.md
index de224553..3433f277 100644
--- a/website/docs/getting-started/configuration.md
+++ b/website/docs/getting-started/configuration.md
@@ -73,20 +73,20 @@ classifier:
 # Categories and routing rules
 categories:
 - name: math
-  use_reasoning: true  # Enable reasoning for math
   model_scores:
   - model: your-model
     score: 1.0
+    use_reasoning: true  # Enable reasoning for math problems
 - name: computer science
-  use_reasoning: true  # Enable reasoning for code
   model_scores:
   - model: your-model
     score: 1.0
+    use_reasoning: true  # Enable reasoning for code
 - name: other
-  use_reasoning: false # No reasoning for general queries
   model_scores:
   - model: your-model
     score: 0.8
+    use_reasoning: false # No reasoning for general queries
 
 default_model: your-model
 
@@ -203,32 +203,50 @@ classifier:
 
 ### Categories and Routing
 
-Define how different query types are handled:
+Define how different query types are handled. Each category can have multiple models with individual reasoning settings:
 
 ```yaml
 categories:
 - name: math
-  use_reasoning: true              # Enable reasoning for math problems
-  reasoning_description: "Mathematical problems require step-by-step reasoning"
   model_scores:
   - model: your-model
     score: 1.0                     # Preference score for this model
+    use_reasoning: true            # Enable reasoning for this model on math problems
 
 - name: computer science
-  use_reasoning: true              # Enable reasoning for code
   model_scores:
   - model: your-model
     score: 1.0
+    use_reasoning: true            # Enable reasoning for code
 
 - name: other
-  use_reasoning: false             # No reasoning for general queries
   model_scores:
   - model: your-model
     score: 0.8
+    use_reasoning: false           # No reasoning for general queries
 
 default_model: your-model          # Fallback model
 ```
 
+### Model-Specific Reasoning
+
+The `use_reasoning` field is configured per model within each category, allowing fine-grained control:
+
+```yaml
+categories:
+- name: math
+  model_scores:
+  - model: gpt-oss-120b
+    score: 1.0
+    use_reasoning: true            # GPT-OSS-120b supports reasoning for math
+  - model: phi4
+    score: 0.8
+    use_reasoning: false           # phi4 doesn't support reasoning mode
+  - model: deepseek-v31
+    score: 0.9
+    use_reasoning: true            # DeepSeek supports reasoning for math
+```
+
 ### Model Reasoning Configuration
 
 Configure how different models handle reasoning mode syntax. This allows you to add new models without code changes:
@@ -322,18 +340,18 @@ Override the default effort level per category:
 ```yaml
 categories:
 - name: math
-  use_reasoning: true
   reasoning_effort: "high"        # Use high effort for complex math
   model_scores:
   - model: your-model
     score: 1.0
+    use_reasoning: true           # Enable reasoning for this model
 
 - name: general
-  use_reasoning: true
   reasoning_effort: "low"         # Use low effort for general queries
   model_scores:
   - model: your-model
     score: 1.0
+    use_reasoning: true           # Enable reasoning for this model
 ```
 
 ### Security Features
@@ -610,10 +628,12 @@ categories:
   model_scores:
   - model: math-model
     score: 1.0
+    use_reasoning: true           # Enable reasoning for math
 - name: other
   model_scores:
   - model: general-model
     score: 1.0
+    use_reasoning: false          # No reasoning for general queries
 ```
 
 **Load Balancing:**
@@ -820,7 +840,6 @@ The generated configuration includes:
 - **Model Performance Rankings:** Models are ranked by performance for each category
 - **Reasoning Settings:** Automatically configures reasoning requirements per category:
   - `use_reasoning`: Whether to use step-by-step reasoning
-  - `reasoning_description`: Description of reasoning approach
   - `reasoning_effort`: Required effort level (low/medium/high)
 - **Default Model Selection:** Best overall performing model is set as default
 - **Security and Performance Settings:** Pre-configured optimal values for: