From 1f330469318813f53d71328aa747eb3fb4ee4410 Mon Sep 17 00:00:00 2001 From: Huamin Chen Date: Fri, 19 Sep 2025 16:19:36 +0000 Subject: [PATCH] refactor: move use_reasoning to the model level from the category level to support non-reasoning models Signed-off-by: Huamin Chen --- config/config.yaml | 285 +++++------------- src/semantic-router/pkg/config/config.go | 70 ++++- src/semantic-router/pkg/config/config_test.go | 34 ++- .../pkg/extproc/reason_mode_config_test.go | 57 +++- .../pkg/extproc/reason_mode_selector.go | 50 +-- .../pkg/extproc/reasoning_integration_test.go | 38 ++- .../pkg/utils/classification/classifier.go | 8 +- .../utils/classification/classifier_test.go | 12 +- website/docs/getting-started/configuration.md | 41 ++- 9 files changed, 317 insertions(+), 278 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index 184ddfa4..10094fd2 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -2,34 +2,22 @@ bert_model: model_id: sentence-transformers/all-MiniLM-L12-v2 threshold: 0.6 use_cpu: true + semantic_cache: enabled: true backend_type: "memory" # Options: "memory" or "milvus" similarity_threshold: 0.8 max_entries: 1000 # Only applies to memory backend ttl_seconds: 3600 - eviction_policy: "fifo" # "fifo", "lru", "lfu", currently only supports memory backend - - # For production environments, use Milvus for scalable caching: - # backend_type: "milvus" - # backend_config_path: "config/cache/milvus.yaml" + eviction_policy: "fifo" - # Development/Testing: Use in-memory cache (current configuration) - # - Fast startup and no external dependencies - # - Limited to single instance scaling - # - Data lost on restart - - # Production: Use Milvus vector database - # - Horizontally scalable and persistent - # - Supports distributed deployments - # - Requires Milvus cluster setup - # - To enable: uncomment the lines above and install Milvus dependencies tools: - enabled: true # Set to true to enable automatic tool selection - top_k: 3 # Number of most relevant tools to select - similarity_threshold: 0.2 # Threshold for tool similarity + enabled: true + top_k: 3 + similarity_threshold: 0.2 tools_db_path: "config/tools_db.json" - fallback_to_empty: true # If true, return no tools on failure; if false, return error + fallback_to_empty: true + prompt_guard: enabled: true use_modernbert: true @@ -38,258 +26,114 @@ prompt_guard: use_cpu: true jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json" -# vLLM Endpoints Configuration - supports multiple endpoints, each can serve multiple models +# vLLM Endpoints Configuration vllm_endpoints: - name: "endpoint1" address: "127.0.0.1" - port: 11434 - models: - - "phi4" - - "gemma3:27b" - weight: 1 # Load balancing weight - health_check_path: "/health" # Optional health check endpoint - - name: "endpoint2" - address: "127.0.0.1" - port: 11434 + port: 8000 models: - - "mistral-small3.1" + - "openai/gpt-oss-20b" weight: 1 health_check_path: "/health" - - name: "endpoint3" - address: "127.0.0.1" - port: 11434 - models: - - "phi4" # Same model can be served by multiple endpoints for redundancy - - "mistral-small3.1" - weight: 2 # Higher weight for more powerful endpoint model_config: - phi4: - pricing: - currency: USD - prompt_per_1m: 0.07 - completion_per_1m: 0.35 - pii_policy: - allow_by_default: false # Deny all PII by default - pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] # Only allow these specific PII types - # Specify which endpoints can serve this model (optional - if not specified, uses all endpoints that list this model) - preferred_endpoints: ["endpoint1", "endpoint3"] - # Reasoning family - phi4 doesn't support reasoning, so omit this field - - # Example: DeepSeek model with custom name - "ds-v31-custom": - reasoning_family: "deepseek" # This model uses DeepSeek reasoning syntax + "openai/gpt-oss-20b": + reasoning_family: "gpt-oss" # This model uses GPT-OSS reasoning syntax preferred_endpoints: ["endpoint1"] pii_policy: allow_by_default: true - # Example: Qwen3 model with custom name - "my-qwen3-model": - reasoning_family: "qwen3" # This model uses Qwen3 reasoning syntax - preferred_endpoints: ["endpoint2"] - pii_policy: - allow_by_default: true - - # Example: GPT-OSS model with custom name - "custom-gpt-oss": - reasoning_family: "gpt-oss" # This model uses GPT-OSS reasoning syntax - preferred_endpoints: ["endpoint1"] - pii_policy: - allow_by_default: true - gemma3:27b: - pricing: - currency: USD - prompt_per_1m: 0.067 - completion_per_1m: 0.267 - pii_policy: - allow_by_default: false # Deny all PII by default - pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] # Only allow these specific PII types - preferred_endpoints: ["endpoint1"] - "mistral-small3.1": - pricing: - currency: USD - prompt_per_1m: 0.1 - completion_per_1m: 0.3 - pii_policy: - allow_by_default: false # Deny all PII by default - pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] # Only allow these specific PII types - preferred_endpoints: ["endpoint2", "endpoint3"] - -# Classifier configuration for text classification +# Classifier configuration classifier: category_model: - model_id: "models/category_classifier_modernbert-base_model" # TODO: Use local model for now before the code can download the entire model from huggingface + model_id: "models/category_classifier_modernbert-base_model" use_modernbert: true threshold: 0.6 use_cpu: true category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json" pii_model: - model_id: "models/pii_classifier_modernbert-base_presidio_token_model" # TODO: Use local model for now before the code can download the entire model from huggingface + model_id: "models/pii_classifier_modernbert-base_presidio_token_model" use_modernbert: true threshold: 0.7 use_cpu: true pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json" + +# Categories with new use_reasoning field structure categories: - name: business - use_reasoning: false - reasoning_description: "Business content is typically conversational" - reasoning_effort: low # Business conversations need low reasoning effort model_scores: - - model: phi4 - score: 0.8 - - model: gemma3:27b - score: 0.4 - - model: mistral-small3.1 - score: 0.2 + - model: openai/gpt-oss-20b + score: 0.7 + use_reasoning: false # Business performs better without reasoning - name: law - use_reasoning: false - reasoning_description: "Legal content is typically explanatory" model_scores: - - model: gemma3:27b - score: 0.8 - - model: phi4 - score: 0.6 - - model: mistral-small3.1 + - model: openai/gpt-oss-20b score: 0.4 + use_reasoning: false - name: psychology - use_reasoning: false - reasoning_description: "Psychology content is usually explanatory" model_scores: - - model: mistral-small3.1 + - model: openai/gpt-oss-20b score: 0.6 - - model: gemma3:27b - score: 0.4 - - model: phi4 - score: 0.4 + use_reasoning: false - name: biology - use_reasoning: true - reasoning_description: "Biological processes benefit from structured analysis" model_scores: - - model: mistral-small3.1 - score: 0.8 - - model: gemma3:27b - score: 0.6 - - model: phi4 - score: 0.2 + - model: openai/gpt-oss-20b + score: 0.9 + use_reasoning: false - name: chemistry - use_reasoning: true - reasoning_description: "Chemical reactions and formulas require systematic thinking" - reasoning_effort: high # Chemistry requires high reasoning effort model_scores: - - model: mistral-small3.1 - score: 0.8 - - model: gemma3:27b - score: 0.6 - - model: phi4 + - model: openai/gpt-oss-20b score: 0.6 + use_reasoning: true # Enable reasoning for complex chemistry - name: history - use_reasoning: false - reasoning_description: "Historical content is narrative-based" model_scores: - - model: mistral-small3.1 - score: 0.8 - - model: phi4 - score: 0.6 - - model: gemma3:27b - score: 0.4 + - model: openai/gpt-oss-20b + score: 0.7 + use_reasoning: false - name: other - use_reasoning: false - reasoning_description: "General content doesn't require reasoning" model_scores: - - model: gemma3:27b - score: 0.8 - - model: phi4 - score: 0.6 - - model: mistral-small3.1 - score: 0.6 + - model: openai/gpt-oss-20b + score: 0.7 + use_reasoning: false - name: health - use_reasoning: false - reasoning_description: "Health information is typically informational" model_scores: - - model: gemma3:27b - score: 0.8 - - model: phi4 - score: 0.8 - - model: mistral-small3.1 - score: 0.6 + - model: openai/gpt-oss-20b + score: 0.5 + use_reasoning: false - name: economics - use_reasoning: false - reasoning_description: "Economic discussions are usually explanatory" model_scores: - - model: gemma3:27b - score: 0.8 - - model: mistral-small3.1 - score: 0.8 - - model: phi4 - score: 0.0 + - model: openai/gpt-oss-20b + score: 1.0 + use_reasoning: false - name: math - use_reasoning: true - reasoning_description: "Mathematical problems require step-by-step reasoning" - reasoning_effort: high # Math problems need high reasoning effort model_scores: - - model: phi4 + - model: openai/gpt-oss-20b score: 1.0 - - model: mistral-small3.1 - score: 0.8 - - model: gemma3:27b - score: 0.6 + use_reasoning: true # Enable reasoning for complex math - name: physics - use_reasoning: true - reasoning_description: "Physics concepts need logical analysis" model_scores: - - model: gemma3:27b - score: 0.4 - - model: phi4 - score: 0.4 - - model: mistral-small3.1 - score: 0.4 + - model: openai/gpt-oss-20b + score: 0.7 + use_reasoning: true # Enable reasoning for physics - name: computer science - use_reasoning: true - reasoning_description: "Programming and algorithms need logical reasoning" model_scores: - - model: gemma3:27b + - model: openai/gpt-oss-20b score: 0.6 - - model: mistral-small3.1 - score: 0.6 - - model: phi4 - score: 0.0 + use_reasoning: false - name: philosophy - use_reasoning: false - reasoning_description: "Philosophical discussions are conversational" model_scores: - - model: phi4 - score: 0.6 - - model: gemma3:27b - score: 0.2 - - model: mistral-small3.1 - score: 0.2 + - model: openai/gpt-oss-20b + score: 0.5 + use_reasoning: false - name: engineering - use_reasoning: true - reasoning_description: "Engineering problems require systematic problem-solving" model_scores: - - model: gemma3:27b - score: 0.6 - - model: mistral-small3.1 - score: 0.6 - - model: phi4 - score: 0.2 - -default_model: mistral-small3.1 + - model: openai/gpt-oss-20b + score: 0.7 + use_reasoning: false -# API Configuration -api: - batch_classification: - # Metrics configuration for monitoring batch classification performance - metrics: - enabled: true # Enable comprehensive metrics collection - detailed_goroutine_tracking: true # Track individual goroutine lifecycle - high_resolution_timing: false # Use nanosecond precision timing - sample_rate: 1.0 # Collect metrics for all requests (1.0 = 100%, 0.5 = 50%) - # Histogram buckets for metrics (directly configure what you need) - duration_buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30] - size_buckets: [1, 2, 5, 10, 20, 50, 100, 200] +default_model: openai/gpt-oss-20b -# Reasoning family configurations - define how different model families handle reasoning syntax +# Reasoning family configurations reasoning_families: deepseek: type: "chat_template_kwargs" @@ -302,10 +146,23 @@ reasoning_families: gpt-oss: type: "reasoning_effort" parameter: "reasoning_effort" - gpt: type: "reasoning_effort" parameter: "reasoning_effort" # Global default reasoning effort level -default_reasoning_effort: medium # Default reasoning effort level (low, medium, high) +default_reasoning_effort: high + +# API Configuration +api: + batch_classification: + max_batch_size: 100 + concurrency_threshold: 5 + max_concurrency: 8 + metrics: + enabled: true + detailed_goroutine_tracking: true + high_resolution_timing: false + sample_rate: 1.0 + duration_buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30] + size_buckets: [1, 2, 5, 10, 20, 50, 100, 200] diff --git a/src/semantic-router/pkg/config/config.go b/src/semantic-router/pkg/config/config.go index 9a3bfb70..cf4aafa9 100644 --- a/src/semantic-router/pkg/config/config.go +++ b/src/semantic-router/pkg/config/config.go @@ -261,14 +261,14 @@ func (c *RouterConfig) GetCacheSimilarityThreshold() float32 { // Category represents a category for routing queries type ModelScore struct { - Model string `yaml:"model"` - Score float64 `yaml:"score"` + Model string `yaml:"model"` + Score float64 `yaml:"score"` + UseReasoning *bool `yaml:"use_reasoning"` // Pointer to detect missing field } type Category struct { Name string `yaml:"name"` Description string `yaml:"description,omitempty"` - UseReasoning bool `yaml:"use_reasoning"` ReasoningDescription string `yaml:"reasoning_description,omitempty"` ReasoningEffort string `yaml:"reasoning_effort,omitempty"` // Configurable reasoning effort level (low, medium, high) ModelScores []ModelScore `yaml:"model_scores"` @@ -336,6 +336,36 @@ func LoadConfig(configPath string) (*RouterConfig, error) { return config, nil } +// BoolPtr returns a pointer to a bool value (helper for tests and config) +func BoolPtr(b bool) *bool { + return &b +} + +// validateConfigStructure performs additional validation on the parsed config +func validateConfigStructure(cfg *RouterConfig) error { + // Ensure all categories have at least one model with scores + for _, category := range cfg.Categories { + if len(category.ModelScores) == 0 { + return fmt.Errorf("category '%s' has no model_scores defined - each category must have at least one model", category.Name) + } + + // Validate each model score has the required fields + for i, modelScore := range category.ModelScores { + if modelScore.Model == "" { + return fmt.Errorf("category '%s', model_scores[%d]: model name cannot be empty", category.Name, i) + } + if modelScore.Score <= 0 { + return fmt.Errorf("category '%s', model '%s': score must be greater than 0, got %f", category.Name, modelScore.Model, modelScore.Score) + } + if modelScore.UseReasoning == nil { + return fmt.Errorf("category '%s', model '%s': missing required field 'use_reasoning'", category.Name, modelScore.Model) + } + } + } + + return nil +} + // ParseConfigFile parses the YAML config file without touching the global cache. func ParseConfigFile(configPath string) (*RouterConfig, error) { // Resolve symlinks to handle Kubernetes ConfigMap mounts @@ -347,10 +377,17 @@ func ParseConfigFile(configPath string) (*RouterConfig, error) { if err != nil { return nil, fmt.Errorf("failed to read config file: %w", err) } + cfg := &RouterConfig{} if err := yaml.Unmarshal(data, cfg); err != nil { return nil, fmt.Errorf("failed to parse config file: %w", err) } + + // Validation after parsing + if err := validateConfigStructure(cfg); err != nil { + return nil, err + } + return cfg, nil } @@ -573,6 +610,33 @@ func (c *RouterConfig) SelectBestEndpointAddressForModel(modelName string) (stri return fmt.Sprintf("%s:%d", bestEndpoint.Address, bestEndpoint.Port), true } +// GetModelReasoningForCategory returns whether a specific model supports reasoning in a given category +func (c *RouterConfig) GetModelReasoningForCategory(categoryName string, modelName string) bool { + for _, category := range c.Categories { + if category.Name == categoryName { + for _, modelScore := range category.ModelScores { + if modelScore.Model == modelName { + return modelScore.UseReasoning != nil && *modelScore.UseReasoning + } + } + } + } + return false // Default to false if category or model not found +} + +// GetBestModelForCategory returns the best scoring model for a given category +func (c *RouterConfig) GetBestModelForCategory(categoryName string) (string, bool) { + for _, category := range c.Categories { + if category.Name == categoryName { + if len(category.ModelScores) > 0 { + useReasoning := category.ModelScores[0].UseReasoning != nil && *category.ModelScores[0].UseReasoning + return category.ModelScores[0].Model, useReasoning + } + } + } + return "", false // Return empty string and false if category not found or has no models +} + // ValidateEndpoints validates that all configured models have at least one endpoint func (c *RouterConfig) ValidateEndpoints() error { // Get all models from categories diff --git a/src/semantic-router/pkg/config/config_test.go b/src/semantic-router/pkg/config/config_test.go index 5a820a09..839fc682 100644 --- a/src/semantic-router/pkg/config/config_test.go +++ b/src/semantic-router/pkg/config/config_test.go @@ -67,8 +67,10 @@ categories: model_scores: - model: "model-a" score: 0.9 + use_reasoning: true - model: "model-b" score: 0.8 + use_reasoning: false default_model: "model-b" @@ -339,12 +341,15 @@ categories: model_scores: - model: "model1" score: 0.9 + use_reasoning: true - model: "model2" score: 0.8 + use_reasoning: false - name: "category2" model_scores: - model: "model3" score: 0.95 + use_reasoning: true default_model: "default-model" ` err := os.WriteFile(configFile, []byte(configContent), 0o644) @@ -387,7 +392,10 @@ default_model: "default-model" configContent := ` categories: - name: "empty_category" - model_scores: [] + model_scores: + - model: "fallback-model" + score: 0.5 + use_reasoning: false default_model: "fallback-model" ` err := os.WriteFile(configFile, []byte(configContent), 0o644) @@ -640,8 +648,16 @@ prompt_guard: categories: - name: "category1" description: "Description for category 1" + model_scores: + - model: "model1" + score: 0.9 + use_reasoning: true - name: "category2" description: "Description for category 2" + model_scores: + - model: "model2" + score: 0.8 + use_reasoning: false ` err := os.WriteFile(configFile, []byte(configContent), 0o644) Expect(err).NotTo(HaveOccurred()) @@ -666,8 +682,16 @@ categories: categories: - name: "category1" description: "Has description" + model_scores: + - model: "model1" + score: 0.9 + use_reasoning: true - name: "category2" # No description field + model_scores: + - model: "model2" + score: 0.8 + use_reasoning: false ` err := os.WriteFile(configFile, []byte(configContent), 0o644) Expect(err).NotTo(HaveOccurred()) @@ -743,6 +767,10 @@ default_model: "model-with-hyphens_and_underscores" categories: - name: "category with spaces" description: "Description with special chars: @#$%^&*()" + model_scores: + - model: "model-with-hyphens_and_underscores" + score: 0.9 + use_reasoning: true ` err := os.WriteFile(configFile, []byte(configContent), 0o644) Expect(err).NotTo(HaveOccurred()) @@ -794,8 +822,10 @@ categories: model_scores: - model: "model-a" score: 0.9 + use_reasoning: true - model: "model-b" score: 0.8 + use_reasoning: false default_model: "model-b" ` @@ -932,6 +962,7 @@ categories: model_scores: - model: "missing-model" score: 0.9 + use_reasoning: true default_model: "existing-model" ` @@ -1210,6 +1241,7 @@ categories: model_scores: - model: "gpt-4" score: 0.95 + use_reasoning: true default_model: "gpt-4" ` diff --git a/src/semantic-router/pkg/extproc/reason_mode_config_test.go b/src/semantic-router/pkg/extproc/reason_mode_config_test.go index 3eccfa4b..99b5639c 100644 --- a/src/semantic-router/pkg/extproc/reason_mode_config_test.go +++ b/src/semantic-router/pkg/extproc/reason_mode_config_test.go @@ -18,18 +18,24 @@ func TestReasoningModeConfiguration(t *testing.T) { Categories: []config.Category{ { Name: "math", - UseReasoning: true, ReasoningDescription: "Mathematical problems require step-by-step reasoning", + ModelScores: []config.ModelScore{ + {Model: "deepseek-v31", Score: 0.9, UseReasoning: config.BoolPtr(true)}, + }, }, { Name: "business", - UseReasoning: false, ReasoningDescription: "Business content is typically conversational", + ModelScores: []config.ModelScore{ + {Model: "phi4", Score: 0.8, UseReasoning: config.BoolPtr(false)}, + }, }, { Name: "biology", - UseReasoning: true, ReasoningDescription: "Biological processes benefit from structured analysis", + ModelScores: []config.ModelScore{ + {Model: "deepseek-v31", Score: 0.9, UseReasoning: config.BoolPtr(true)}, + }, }, }, } @@ -40,12 +46,16 @@ func TestReasoningModeConfiguration(t *testing.T) { fmt.Println("--- Reasoning Mode Configuration ---") for _, category := range cfg.Categories { reasoningStatus := "DISABLED" - if category.UseReasoning { - reasoningStatus = "ENABLED" + bestModel := "no-model" + if len(category.ModelScores) > 0 { + bestModel = category.ModelScores[0].Model + if category.ModelScores[0].UseReasoning != nil && *category.ModelScores[0].UseReasoning { + reasoningStatus = "ENABLED" + } } - fmt.Printf("Category: %-15s | Reasoning: %-8s | %s\n", - category.Name, reasoningStatus, category.ReasoningDescription) + fmt.Printf("Category: %-15s | Model: %-12s | Reasoning: %-8s | %s\n", + category.Name, bestModel, reasoningStatus, category.ReasoningDescription) } // Test queries with expected categories @@ -72,7 +82,9 @@ func TestReasoningModeConfiguration(t *testing.T) { for _, category := range cfg.Categories { if strings.EqualFold(category.Name, test.category) { - useReasoning = category.UseReasoning + if len(category.ModelScores) > 0 && category.ModelScores[0].UseReasoning != nil { + useReasoning = *category.ModelScores[0].UseReasoning + } reasoningDesc = category.ReasoningDescription found = true break @@ -117,18 +129,21 @@ func TestReasoningModeConfiguration(t *testing.T) { fmt.Print(` categories: - name: math - use_reasoning: true reasoning_description: "Mathematical problems require step-by-step reasoning" model_scores: + - model: deepseek-v31 + score: 0.9 + use_reasoning: true - model: phi4 - score: 1.0 + score: 0.7 + use_reasoning: false - name: business - use_reasoning: false reasoning_description: "Business content is typically conversational" model_scores: - model: phi4 score: 0.8 + use_reasoning: false `) } @@ -143,7 +158,12 @@ func GetReasoningConfigurationSummary(cfg *config.RouterConfig) map[string]inter categoriesWithoutReasoning := []string{} for _, category := range cfg.Categories { - if category.UseReasoning { + bestModelReasoning := false + if len(category.ModelScores) > 0 && category.ModelScores[0].UseReasoning != nil { + bestModelReasoning = *category.ModelScores[0].UseReasoning + } + + if bestModelReasoning { reasoningEnabled++ categoriesWithReasoning = append(categoriesWithReasoning, category.Name) } else { @@ -170,12 +190,21 @@ func DemonstrateConfigurationUsage() { fmt.Print(` categories: - name: math - use_reasoning: true reasoning_description: "Mathematical problems require step-by-step reasoning" + model_scores: + - model: deepseek-v31 + score: 0.9 + use_reasoning: true + - model: phi4 + score: 0.7 + use_reasoning: false - name: creative_writing - use_reasoning: false reasoning_description: "Creative content flows better without structured reasoning" + model_scores: + - model: phi4 + score: 0.8 + use_reasoning: false `) fmt.Println("\n2. Use in Go code:") diff --git a/src/semantic-router/pkg/extproc/reason_mode_selector.go b/src/semantic-router/pkg/extproc/reason_mode_selector.go index 58f880a3..f91cfad2 100644 --- a/src/semantic-router/pkg/extproc/reason_mode_selector.go +++ b/src/semantic-router/pkg/extproc/reason_mode_selector.go @@ -32,16 +32,24 @@ func (r *OpenAIRouter) getReasoningModeAndCategory(query string) (bool, string) // Normalize category name for consistent lookup normalizedCategory := strings.ToLower(strings.TrimSpace(categoryName)) - // Look up the category in the configuration + // Look up the category in the configuration and get the best model for it for _, category := range r.Config.Categories { if strings.EqualFold(category.Name, normalizedCategory) { - reasoningStatus := "DISABLED" - if category.UseReasoning { - reasoningStatus = "ENABLED" + // Get the best model for this category (first in the list) + if len(category.ModelScores) > 0 { + bestModel := category.ModelScores[0] + useReasoning := bestModel.UseReasoning != nil && *bestModel.UseReasoning + reasoningStatus := "DISABLED" + if useReasoning { + reasoningStatus = "ENABLED" + } + observability.Infof("Reasoning mode decision: Category '%s', Model '%s' → %s", + categoryName, bestModel.Model, reasoningStatus) + return useReasoning, categoryName + } else { + observability.Infof("Category '%s' has no models configured, defaulting to no reasoning mode", categoryName) + return false, categoryName } - observability.Infof("Reasoning mode decision: Category '%s' → %s", - categoryName, reasoningStatus) - return category.UseReasoning, categoryName } } @@ -233,25 +241,30 @@ func (r *OpenAIRouter) logReasoningConfiguration() { return } - reasoningEnabled := []string{} - reasoningDisabled := []string{} + categoriesWithReasoning := []string{} + categoriesWithoutReasoning := []string{} for _, category := range r.Config.Categories { - if category.UseReasoning { - reasoningEnabled = append(reasoningEnabled, category.Name) + // Check if the best model (first model) for this category supports reasoning + if len(category.ModelScores) > 0 && category.ModelScores[0].UseReasoning != nil && *category.ModelScores[0].UseReasoning { + categoriesWithReasoning = append(categoriesWithReasoning, fmt.Sprintf("%s(%s)", category.Name, category.ModelScores[0].Model)) } else { - reasoningDisabled = append(reasoningDisabled, category.Name) + modelName := "no-models" + if len(category.ModelScores) > 0 { + modelName = category.ModelScores[0].Model + } + categoriesWithoutReasoning = append(categoriesWithoutReasoning, fmt.Sprintf("%s(%s)", category.Name, modelName)) } } observability.Infof("Reasoning configuration - Total categories: %d", len(r.Config.Categories)) - if len(reasoningEnabled) > 0 { - observability.Infof("Reasoning ENABLED for categories (%d): %v", len(reasoningEnabled), reasoningEnabled) + if len(categoriesWithReasoning) > 0 { + observability.Infof("Reasoning ENABLED for categories (%d): %v", len(categoriesWithReasoning), categoriesWithReasoning) } - if len(reasoningDisabled) > 0 { - observability.Infof("Reasoning DISABLED for categories (%d): %v", len(reasoningDisabled), reasoningDisabled) + if len(categoriesWithoutReasoning) > 0 { + observability.Infof("Reasoning DISABLED for categories (%d): %v", len(categoriesWithoutReasoning), categoriesWithoutReasoning) } } @@ -280,12 +293,13 @@ func (r *OpenAIRouter) LogReasoningConfigurationSummary() { enabledCount := 0 for _, category := range r.Config.Categories { - if category.UseReasoning { + // Check if the best model (first model) for this category supports reasoning + if len(category.ModelScores) > 0 && category.ModelScores[0].UseReasoning != nil && *category.ModelScores[0].UseReasoning { enabledCount++ } } - observability.Infof("Reasoning mode summary: %d/%d categories have reasoning enabled", enabledCount, len(r.Config.Categories)) + observability.Infof("Reasoning mode summary: %d/%d categories have reasoning enabled (based on best model)", enabledCount, len(r.Config.Categories)) } // getReasoningEffort returns the reasoning effort level for a given category diff --git a/src/semantic-router/pkg/extproc/reasoning_integration_test.go b/src/semantic-router/pkg/extproc/reasoning_integration_test.go index e4c9dbc7..a45bec07 100644 --- a/src/semantic-router/pkg/extproc/reasoning_integration_test.go +++ b/src/semantic-router/pkg/extproc/reasoning_integration_test.go @@ -15,13 +15,19 @@ func TestReasoningModeIntegration(t *testing.T) { Categories: []config.Category{ { Name: "math", - UseReasoning: true, ReasoningDescription: "Mathematical problems require step-by-step reasoning", + ModelScores: []config.ModelScore{ + {Model: "deepseek-v31", Score: 0.9, UseReasoning: config.BoolPtr(true)}, + {Model: "phi4", Score: 0.7, UseReasoning: config.BoolPtr(false)}, + }, }, { Name: "business", - UseReasoning: false, ReasoningDescription: "Business content is typically conversational", + ModelScores: []config.ModelScore{ + {Model: "phi4", Score: 0.8, UseReasoning: config.BoolPtr(false)}, + {Model: "deepseek-v31", Score: 0.6, UseReasoning: config.BoolPtr(false)}, + }, }, }, ReasoningFamilies: map[string]config.ReasoningFamilyConfig{ @@ -75,8 +81,8 @@ func TestReasoningModeIntegration(t *testing.T) { // Test the configuration logic directly mathCategory := cfg.Categories[0] // math category - if !mathCategory.UseReasoning { - t.Error("Math category should have UseReasoning set to true in configuration") + if len(mathCategory.ModelScores) == 0 || mathCategory.ModelScores[0].UseReasoning == nil || !*mathCategory.ModelScores[0].UseReasoning { + t.Error("Math category's best model should have UseReasoning set to true in configuration") } }) @@ -281,8 +287,10 @@ func TestReasoningModeConfigurationValidation(t *testing.T) { name: "Math category with reasoning enabled", category: config.Category{ Name: "math", - UseReasoning: true, ReasoningDescription: "Mathematical problems require step-by-step reasoning", + ModelScores: []config.ModelScore{ + {Model: "deepseek-v31", Score: 0.9, UseReasoning: config.BoolPtr(true)}, + }, }, expected: true, }, @@ -290,8 +298,10 @@ func TestReasoningModeConfigurationValidation(t *testing.T) { name: "Business category with reasoning disabled", category: config.Category{ Name: "business", - UseReasoning: false, ReasoningDescription: "Business content is typically conversational", + ModelScores: []config.ModelScore{ + {Model: "phi4", Score: 0.8, UseReasoning: config.BoolPtr(false)}, + }, }, expected: false, }, @@ -299,8 +309,10 @@ func TestReasoningModeConfigurationValidation(t *testing.T) { name: "Science category with reasoning enabled", category: config.Category{ Name: "science", - UseReasoning: true, ReasoningDescription: "Scientific concepts benefit from structured analysis", + ModelScores: []config.ModelScore{ + {Model: "deepseek-v31", Score: 0.9, UseReasoning: config.BoolPtr(true)}, + }, }, expected: true, }, @@ -308,9 +320,15 @@ func TestReasoningModeConfigurationValidation(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - if tc.category.UseReasoning != tc.expected { - t.Errorf("Expected UseReasoning %v for %s, got %v", - tc.expected, tc.category.Name, tc.category.UseReasoning) + // Check the best model's reasoning capability + bestModelReasoning := false + if len(tc.category.ModelScores) > 0 && tc.category.ModelScores[0].UseReasoning != nil { + bestModelReasoning = *tc.category.ModelScores[0].UseReasoning + } + + if bestModelReasoning != tc.expected { + t.Errorf("Expected best model UseReasoning %v for %s, got %v", + tc.expected, tc.category.Name, bestModelReasoning) } // Verify description is not empty diff --git a/src/semantic-router/pkg/utils/classification/classifier.go b/src/semantic-router/pkg/utils/classification/classifier.go index 2ae684de..bf7b55c0 100644 --- a/src/semantic-router/pkg/utils/classification/classifier.go +++ b/src/semantic-router/pkg/utils/classification/classifier.go @@ -496,9 +496,15 @@ func (c *Classifier) ClassifyCategoryWithEntropy(text string) (string, float64, } // Build category reasoning map from configuration + // Use the best model's reasoning capability for each category categoryReasoningMap := make(map[string]bool) for _, category := range c.Config.Categories { - categoryReasoningMap[strings.ToLower(category.Name)] = category.UseReasoning + useReasoning := false + if len(category.ModelScores) > 0 && category.ModelScores[0].UseReasoning != nil { + // Use the first (best) model's reasoning capability + useReasoning = *category.ModelScores[0].UseReasoning + } + categoryReasoningMap[strings.ToLower(category.Name)] = useReasoning } // Make entropy-based reasoning decision diff --git a/src/semantic-router/pkg/utils/classification/classifier_test.go b/src/semantic-router/pkg/utils/classification/classifier_test.go index 9fc1e736..5606b3a1 100644 --- a/src/semantic-router/pkg/utils/classification/classifier_test.go +++ b/src/semantic-router/pkg/utils/classification/classifier_test.go @@ -212,9 +212,9 @@ var _ = Describe("category classification and model selection", func() { // Add UseReasoning configuration for the categories classifier.Config.Categories = []config.Category{ - {Name: "technology", UseReasoning: false}, - {Name: "sports", UseReasoning: false}, - {Name: "politics", UseReasoning: true}, + {Name: "technology", ModelScores: []config.ModelScore{{Model: "phi4", Score: 0.8, UseReasoning: config.BoolPtr(false)}}}, + {Name: "sports", ModelScores: []config.ModelScore{{Model: "phi4", Score: 0.8, UseReasoning: config.BoolPtr(false)}}}, + {Name: "politics", ModelScores: []config.ModelScore{{Model: "deepseek-v31", Score: 0.9, UseReasoning: config.BoolPtr(true)}}}, } category, confidence, reasoningDecision, err := classifier.ClassifyCategoryWithEntropy("This is about politics") @@ -237,9 +237,9 @@ var _ = Describe("category classification and model selection", func() { } classifier.Config.Categories = []config.Category{ - {Name: "technology", UseReasoning: false}, - {Name: "sports", UseReasoning: true}, - {Name: "politics", UseReasoning: true}, + {Name: "technology", ModelScores: []config.ModelScore{{Model: "phi4", Score: 0.8, UseReasoning: config.BoolPtr(false)}}}, + {Name: "sports", ModelScores: []config.ModelScore{{Model: "deepseek-v31", Score: 0.9, UseReasoning: config.BoolPtr(true)}}}, + {Name: "politics", ModelScores: []config.ModelScore{{Model: "deepseek-v31", Score: 0.9, UseReasoning: config.BoolPtr(true)}}}, } category, confidence, reasoningDecision, err := classifier.ClassifyCategoryWithEntropy("Ambiguous text") diff --git a/website/docs/getting-started/configuration.md b/website/docs/getting-started/configuration.md index de224553..3433f277 100644 --- a/website/docs/getting-started/configuration.md +++ b/website/docs/getting-started/configuration.md @@ -73,20 +73,20 @@ classifier: # Categories and routing rules categories: - name: math - use_reasoning: true # Enable reasoning for math model_scores: - model: your-model score: 1.0 + use_reasoning: true # Enable reasoning for math problems - name: computer science - use_reasoning: true # Enable reasoning for code model_scores: - model: your-model score: 1.0 + use_reasoning: true # Enable reasoning for code - name: other - use_reasoning: false # No reasoning for general queries model_scores: - model: your-model score: 0.8 + use_reasoning: false # No reasoning for general queries default_model: your-model @@ -203,32 +203,50 @@ classifier: ### Categories and Routing -Define how different query types are handled: +Define how different query types are handled. Each category can have multiple models with individual reasoning settings: ```yaml categories: - name: math - use_reasoning: true # Enable reasoning for math problems - reasoning_description: "Mathematical problems require step-by-step reasoning" model_scores: - model: your-model score: 1.0 # Preference score for this model + use_reasoning: true # Enable reasoning for this model on math problems - name: computer science - use_reasoning: true # Enable reasoning for code model_scores: - model: your-model score: 1.0 + use_reasoning: true # Enable reasoning for code - name: other - use_reasoning: false # No reasoning for general queries model_scores: - model: your-model score: 0.8 + use_reasoning: false # No reasoning for general queries default_model: your-model # Fallback model ``` +### Model-Specific Reasoning + +The `use_reasoning` field is configured per model within each category, allowing fine-grained control: + +```yaml +categories: +- name: math + model_scores: + - model: gpt-oss-120b + score: 1.0 + use_reasoning: true # GPT-OSS-120b supports reasoning for math + - model: phi4 + score: 0.8 + use_reasoning: false # phi4 doesn't support reasoning mode + - model: deepseek-v31 + score: 0.9 + use_reasoning: true # DeepSeek supports reasoning for math +``` + ### Model Reasoning Configuration Configure how different models handle reasoning mode syntax. This allows you to add new models without code changes: @@ -322,18 +340,18 @@ Override the default effort level per category: ```yaml categories: - name: math - use_reasoning: true reasoning_effort: "high" # Use high effort for complex math model_scores: - model: your-model score: 1.0 + use_reasoning: true # Enable reasoning for this model - name: general - use_reasoning: true reasoning_effort: "low" # Use low effort for general queries model_scores: - model: your-model score: 1.0 + use_reasoning: true # Enable reasoning for this model ``` ### Security Features @@ -610,10 +628,12 @@ categories: model_scores: - model: math-model score: 1.0 + use_reasoning: true # Enable reasoning for math - name: other model_scores: - model: general-model score: 1.0 + use_reasoning: false # No reasoning for general queries ``` **Load Balancing:** @@ -820,7 +840,6 @@ The generated configuration includes: - **Model Performance Rankings:** Models are ranked by performance for each category - **Reasoning Settings:** Automatically configures reasoning requirements per category: - `use_reasoning`: Whether to use step-by-step reasoning - - `reasoning_description`: Description of reasoning approach - `reasoning_effort`: Required effort level (low/medium/high) - **Default Model Selection:** Best overall performing model is set as default - **Security and Performance Settings:** Pre-configured optimal values for: