Skip to content

Commit 215340f

Browse files
committed
fix: don't set reasoning effort for non-reasoning models
Signed-off-by: Huamin Chen <[email protected]>
1 parent 464ed6c commit 215340f

File tree

5 files changed

+591
-153
lines changed

5 files changed

+591
-153
lines changed

config/config.yaml

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,4 +253,32 @@ api:
253253
sample_rate: 1.0 # Collect metrics for all requests (1.0 = 100%, 0.5 = 50%)
254254
# Histogram buckets for metrics (directly configure what you need)
255255
duration_buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]
256-
size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]
256+
size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]
257+
258+
# Model reasoning configurations - define how different models handle reasoning syntax
259+
model_reasoning_configs:
260+
- name: "deepseek"
261+
patterns: ["deepseek", "ds-", "ds_", "ds:", "ds "]
262+
reasoning_syntax:
263+
type: "chat_template_kwargs"
264+
parameter: "thinking"
265+
266+
- name: "qwen3"
267+
patterns: ["qwen3"]
268+
reasoning_syntax:
269+
type: "chat_template_kwargs"
270+
parameter: "enable_thinking"
271+
272+
- name: "gpt-oss"
273+
patterns: ["gpt-oss", "gpt_oss"]
274+
reasoning_syntax:
275+
type: "reasoning_effort"
276+
parameter: "reasoning_effort"
277+
278+
- name: "gpt"
279+
patterns: ["gpt"]
280+
reasoning_syntax:
281+
type: "reasoning_effort"
282+
parameter: "reasoning_effort"
283+
284+
# No default fallback - unknown models should have no reasoning syntax applied

src/semantic-router/pkg/config/config.go

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"fmt"
55
"os"
66
"path/filepath"
7+
"strings"
78
"sync"
89

910
"gopkg.in/yaml.v3"
@@ -45,6 +46,9 @@ type RouterConfig struct {
4546
// Default reasoning effort level (low, medium, high) when not specified per category
4647
DefaultReasoningEffort string `yaml:"default_reasoning_effort,omitempty"`
4748

49+
// Model reasoning configurations to define how different models handle reasoning syntax
50+
ModelReasoningConfigs []ModelReasoningConfig `yaml:"model_reasoning_configs,omitempty"`
51+
4852
// Semantic cache configuration
4953
SemanticCache SemanticCacheConfig `yaml:"semantic_cache"`
5054

@@ -264,6 +268,73 @@ type Category struct {
264268
ModelScores []ModelScore `yaml:"model_scores"`
265269
}
266270

271+
// ModelReasoningSyntax defines how a model handles reasoning mode
272+
type ModelReasoningSyntax struct {
273+
Type string `yaml:"type"` // "chat_template_kwargs" or "reasoning_effort"
274+
Parameter string `yaml:"parameter"` // "thinking", "enable_thinking", "reasoning_effort", etc.
275+
}
276+
277+
// ModelReasoningConfig defines reasoning configuration for a model family
278+
type ModelReasoningConfig struct {
279+
Name string `yaml:"name"`
280+
Patterns []string `yaml:"patterns"` // patterns to match against model names
281+
ReasoningSyntax ModelReasoningSyntax `yaml:"reasoning_syntax"`
282+
}
283+
284+
// FindModelReasoningConfig finds the appropriate reasoning configuration for a given model name
285+
func (rc *RouterConfig) FindModelReasoningConfig(modelName string) *ModelReasoningConfig {
286+
if rc == nil || len(rc.ModelReasoningConfigs) == 0 {
287+
return nil
288+
}
289+
290+
modelLower := strings.ToLower(strings.TrimSpace(modelName))
291+
292+
// Look for exact pattern matches first
293+
for _, config := range rc.ModelReasoningConfigs {
294+
for _, pattern := range config.Patterns {
295+
if matchesPattern(modelLower, strings.ToLower(pattern)) {
296+
return &config
297+
}
298+
}
299+
}
300+
301+
// Look for a default/fallback configuration
302+
for _, config := range rc.ModelReasoningConfigs {
303+
if config.Name == "default" || contains(config.Patterns, "*") {
304+
return &config
305+
}
306+
}
307+
308+
return nil
309+
}
310+
311+
// matchesPattern checks if a model name matches a given pattern
312+
func matchesPattern(modelName, pattern string) bool {
313+
if pattern == "*" {
314+
return true
315+
}
316+
317+
// Handle prefix patterns like "ds-", "ds_", "ds:", "ds "
318+
if strings.HasSuffix(pattern, "-") || strings.HasSuffix(pattern, "_") ||
319+
strings.HasSuffix(pattern, ":") || strings.HasSuffix(pattern, " ") {
320+
return strings.HasPrefix(modelName, pattern) ||
321+
(len(modelName) >= len(pattern)-1 && modelName == pattern[:len(pattern)-1])
322+
}
323+
324+
// Handle exact matches and contains
325+
return strings.Contains(modelName, pattern)
326+
}
327+
328+
// contains checks if a slice contains a string
329+
func contains(slice []string, item string) bool {
330+
for _, s := range slice {
331+
if s == item {
332+
return true
333+
}
334+
}
335+
return false
336+
}
337+
267338
var (
268339
config *RouterConfig
269340
configOnce sync.Once

src/semantic-router/pkg/extproc/reason_mode_selector.go

Lines changed: 70 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"log"
77
"strings"
88

9+
"github.com/vllm-project/semantic-router/semantic-router/pkg/config"
910
"github.com/vllm-project/semantic-router/semantic-router/pkg/metrics"
1011
)
1112

@@ -47,61 +48,42 @@ func (r *OpenAIRouter) getReasoningModeAndCategory(query string) (bool, string)
4748
return false, categoryName
4849
}
4950

50-
// hasDeepSeekAlias returns true if the model uses a short alias for DeepSeek (e.g., "ds-*")
51-
// Rules:
52-
// - Accept only when the model string starts with: "ds-", "ds_", "ds:", "ds " or exactly equals "ds"
53-
// - Do NOT match occurrences of "ds" in the middle of the model name (e.g., "foo-ds-1b")
54-
func hasDeepSeekAlias(lower string) bool {
55-
lower = strings.TrimSpace(lower)
56-
if strings.HasPrefix(lower, "ds") {
57-
if len(lower) == 2 { // exactly "ds"
58-
return true
59-
}
60-
sep := lower[2]
61-
return sep == '-' || sep == '_' || sep == ':' || sep == ' '
51+
// getModelReasoningConfig finds the reasoning configuration for a model using the config system
52+
func (r *OpenAIRouter) getModelReasoningConfig(model string) *config.ModelReasoningConfig {
53+
if r.Config == nil {
54+
return nil
6255
}
63-
return false
56+
return r.Config.FindModelReasoningConfig(model)
6457
}
6558

66-
// getModelFamilyAndTemplateParam returns a normalized model family name and the template param to be used (if any)
67-
func getModelFamilyAndTemplateParam(model string) (string, string) {
68-
lower := strings.ToLower(strings.TrimSpace(model))
69-
if strings.Contains(lower, "qwen3") {
70-
return "qwen3", "enable_thinking"
71-
}
72-
if strings.Contains(lower, "deepseek") || hasDeepSeekAlias(lower) {
73-
return "deepseek", "thinking"
59+
// buildReasoningRequestFields returns the appropriate fields to add to the request based on model config
60+
func (r *OpenAIRouter) buildReasoningRequestFields(model string, useReasoning bool, categoryName string) (map[string]interface{}, string) {
61+
modelConfig := r.getModelReasoningConfig(model)
62+
if modelConfig == nil {
63+
// No configuration found for this model - don't apply any reasoning syntax
64+
// Unknown models should not have reasoning fields added
65+
return nil, "N/A"
7466
}
75-
// GPT-OSS family and generic GPT fall back to using reasoning_effort (OpenAI-compatible field)
76-
if strings.Contains(lower, "gpt-oss") || strings.Contains(lower, "gpt_oss") {
77-
return "gpt-oss", "reasoning_effort"
78-
}
79-
if strings.Contains(lower, "gpt") {
80-
return "gpt", "reasoning_effort"
81-
}
82-
return "unknown", ""
83-
}
8467

85-
// getChatTemplateKwargs returns the appropriate chat template kwargs based on model and reasoning mode
86-
func getChatTemplateKwargs(model string, useReasoning bool) map[string]interface{} {
87-
lower := strings.ToLower(strings.TrimSpace(model))
88-
89-
// Qwen3: use enable_thinking true/false
90-
if strings.Contains(lower, "qwen3") {
91-
return map[string]interface{}{
92-
"enable_thinking": useReasoning,
93-
}
68+
if !useReasoning {
69+
// When reasoning is disabled, don't add any reasoning fields
70+
return nil, "N/A"
9471
}
9572

96-
// DeepSeek v3 family: use thinking true/false
97-
if strings.Contains(lower, "deepseek") || strings.Contains(lower, "ds") {
98-
return map[string]interface{}{
99-
"thinking": useReasoning,
73+
// When reasoning is enabled, use the configured syntax
74+
switch modelConfig.ReasoningSyntax.Type {
75+
case "chat_template_kwargs":
76+
kwargs := map[string]interface{}{
77+
modelConfig.ReasoningSyntax.Parameter: useReasoning,
10078
}
79+
return map[string]interface{}{"chat_template_kwargs": kwargs}, "N/A"
80+
case "reasoning_effort":
81+
effort := r.getReasoningEffort(categoryName)
82+
return map[string]interface{}{"reasoning_effort": effort}, effort
83+
default:
84+
// Unknown reasoning syntax type - don't apply anything
85+
return nil, "N/A"
10186
}
102-
103-
// Default: no chat template kwargs for unknown models
104-
return nil
10587
}
10688

10789
// setReasoningModeToRequestBody adds chat_template_kwargs to the JSON request body
@@ -120,49 +102,60 @@ func (r *OpenAIRouter) setReasoningModeToRequestBody(requestBody []byte, enabled
120102
}
121103
}
122104

123-
family, param := getModelFamilyAndTemplateParam(model)
124-
125-
// Add chat_template_kwargs for reasoning mode
126-
kwargs := getChatTemplateKwargs(model, enabled)
127-
if kwargs != nil {
128-
requestMap["chat_template_kwargs"] = kwargs
129-
} else {
130-
delete(requestMap, "chat_template_kwargs")
131-
}
132-
// Also set Reasoning-Effort in openai request
133-
// This is a hack to get the reasoning mode for openai/gpt-oss-20b to work
134-
originalReasoningEffort, ok := requestMap["reasoning_effort"]
135-
if !ok {
136-
// This seems to be the default for openai/gpt-oss models
137-
originalReasoningEffort = "low"
105+
// Get original reasoning effort for potential preservation
106+
originalReasoningEffort, hasOriginalEffort := requestMap["reasoning_effort"]
107+
if !hasOriginalEffort {
108+
originalReasoningEffort = "low" // Default for compatibility
138109
}
139-
var appliedEffort string
110+
111+
// Clear existing reasoning-related fields
112+
delete(requestMap, "chat_template_kwargs")
113+
delete(requestMap, "reasoning_effort")
114+
115+
var appliedEffort string = "N/A"
116+
140117
if enabled {
141-
// Use configurable reasoning effort based on category
142-
effort := r.getReasoningEffort(categoryName)
143-
requestMap["reasoning_effort"] = effort
118+
// When reasoning is enabled, build the appropriate fields
119+
reasoningFields, effort := r.buildReasoningRequestFields(model, enabled, categoryName)
120+
for key, value := range reasoningFields {
121+
requestMap[key] = value
122+
}
144123
appliedEffort = effort
145124
} else {
146-
requestMap["reasoning_effort"] = originalReasoningEffort
147-
if s, ok := originalReasoningEffort.(string); ok {
148-
appliedEffort = s
125+
// When reasoning is disabled, only preserve reasoning_effort for gpt-oss models
126+
modelConfig := r.getModelReasoningConfig(model)
127+
if modelConfig != nil && modelConfig.Name == "gpt-oss" {
128+
requestMap["reasoning_effort"] = originalReasoningEffort
129+
if s, ok := originalReasoningEffort.(string); ok {
130+
appliedEffort = s
131+
}
149132
}
133+
// For all other models, reasoning fields remain cleared
150134
}
151135

152136
log.Printf("Original reasoning effort: %s", originalReasoningEffort)
153-
log.Printf("Added reasoning mode (enabled: %v) and reasoning effort (%s) to request for model: %s", enabled, requestMap["reasoning_effort"], model)
137+
log.Printf("Added reasoning mode (enabled: %v) and reasoning effort (%s) to request for model: %s", enabled, appliedEffort, model)
154138

155139
// Record metrics for template usage and effort when enabled
156140
if enabled {
157-
// If we applied a known template param, record its usage
158-
if kwargs != nil && param != "" {
159-
metrics.RecordReasoningTemplateUsage(family, param)
160-
} else if kwargs == nil && param == "reasoning_effort" {
161-
// For GPT/GPT-OSS, we only set reasoning_effort
162-
metrics.RecordReasoningTemplateUsage(family, param)
141+
modelConfig := r.getModelReasoningConfig(model)
142+
modelFamily := "unknown"
143+
templateParam := "reasoning_effort" // default fallback
144+
145+
if modelConfig != nil {
146+
modelFamily = modelConfig.Name
147+
if modelConfig.ReasoningSyntax.Type == "chat_template_kwargs" {
148+
templateParam = modelConfig.ReasoningSyntax.Parameter
149+
} else {
150+
templateParam = "reasoning_effort"
151+
}
152+
}
153+
154+
// Record template usage and effort
155+
metrics.RecordReasoningTemplateUsage(modelFamily, templateParam)
156+
if appliedEffort != "N/A" {
157+
metrics.RecordReasoningEffortUsage(modelFamily, appliedEffort)
163158
}
164-
// Record which effort level was used for this family
165-
metrics.RecordReasoningEffortUsage(family, appliedEffort)
166159
}
167160

168161
// Serialize back to JSON

0 commit comments

Comments
 (0)