Skip to content

Commit 446dfe6

Browse files
authored
fix: don't set reasoning effort for non-reasoning models (#97)
* fix: don't set reasoning effort for non-reasoning models Signed-off-by: Huamin Chen <[email protected]> * review feedback Signed-off-by: Huamin Chen <[email protected]> * update configuration doc Signed-off-by: Huamin Chen <[email protected]> * review feedback Signed-off-by: Huamin Chen <[email protected]> --------- Signed-off-by: Huamin Chen <[email protected]>
1 parent f7fb0da commit 446dfe6

File tree

7 files changed

+868
-176
lines changed

7 files changed

+868
-176
lines changed

config/config.yaml

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,28 @@ model_config:
5757
pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] # Only allow these specific PII types
5858
# Specify which endpoints can serve this model (optional - if not specified, uses all endpoints that list this model)
5959
preferred_endpoints: ["endpoint1", "endpoint3"]
60+
# Reasoning family - phi4 doesn't support reasoning, so omit this field
61+
62+
# Example: DeepSeek model with custom name
63+
"ds-v31-custom":
64+
reasoning_family: "deepseek" # This model uses DeepSeek reasoning syntax
65+
preferred_endpoints: ["endpoint1"]
66+
pii_policy:
67+
allow_by_default: true
68+
69+
# Example: Qwen3 model with custom name
70+
"my-qwen3-model":
71+
reasoning_family: "qwen3" # This model uses Qwen3 reasoning syntax
72+
preferred_endpoints: ["endpoint2"]
73+
pii_policy:
74+
allow_by_default: true
75+
76+
# Example: GPT-OSS model with custom name
77+
"custom-gpt-oss":
78+
reasoning_family: "gpt-oss" # This model uses GPT-OSS reasoning syntax
79+
preferred_endpoints: ["endpoint1"]
80+
pii_policy:
81+
allow_by_default: true
6082
gemma3:27b:
6183
pricing:
6284
currency: USD
@@ -236,7 +258,6 @@ categories:
236258
- model: phi4
237259
score: 0.2
238260
default_model: mistral-small3.1
239-
default_reasoning_effort: medium # Default reasoning effort level (low, medium, high)
240261

241262
# API Configuration
242263
api:
@@ -253,4 +274,25 @@ api:
253274
sample_rate: 1.0 # Collect metrics for all requests (1.0 = 100%, 0.5 = 50%)
254275
# Histogram buckets for metrics (directly configure what you need)
255276
duration_buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]
256-
size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]
277+
size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]
278+
279+
# Reasoning family configurations - define how different model families handle reasoning syntax
280+
reasoning_families:
281+
deepseek:
282+
type: "chat_template_kwargs"
283+
parameter: "thinking"
284+
285+
qwen3:
286+
type: "chat_template_kwargs"
287+
parameter: "enable_thinking"
288+
289+
gpt-oss:
290+
type: "reasoning_effort"
291+
parameter: "reasoning_effort"
292+
293+
gpt:
294+
type: "reasoning_effort"
295+
parameter: "reasoning_effort"
296+
297+
# Global default reasoning effort level
298+
default_reasoning_effort: medium # Default reasoning effort level (low, medium, high)

src/semantic-router/pkg/config/config.go

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ type RouterConfig struct {
4545
// Default reasoning effort level (low, medium, high) when not specified per category
4646
DefaultReasoningEffort string `yaml:"default_reasoning_effort,omitempty"`
4747

48+
// Reasoning family configurations to define how different model families handle reasoning syntax
49+
ReasoningFamilies map[string]ReasoningFamilyConfig `yaml:"reasoning_families,omitempty"`
50+
4851
// Semantic cache configuration
4952
SemanticCache SemanticCacheConfig `yaml:"semantic_cache"`
5053

@@ -208,6 +211,16 @@ type ModelParams struct {
208211

209212
// Optional pricing used for cost computation
210213
Pricing ModelPricing `yaml:"pricing,omitempty"`
214+
215+
// Reasoning family for this model (e.g., "deepseek", "qwen3", "gpt-oss")
216+
// If empty, the model doesn't support reasoning mode
217+
ReasoningFamily string `yaml:"reasoning_family,omitempty"`
218+
}
219+
220+
// ReasoningFamilyConfig defines how a reasoning family handles reasoning mode
221+
type ReasoningFamilyConfig struct {
222+
Type string `yaml:"type"` // "chat_template_kwargs" or "reasoning_effort"
223+
Parameter string `yaml:"parameter"` // "thinking", "enable_thinking", "reasoning_effort", etc.
211224
}
212225

213226
// PIIPolicy represents the PII (Personally Identifiable Information) policy for a model
@@ -264,6 +277,41 @@ type Category struct {
264277
ModelScores []ModelScore `yaml:"model_scores"`
265278
}
266279

280+
// Legacy types - can be removed once migration is complete
281+
282+
// GetModelReasoningFamily returns the reasoning family configuration for a given model name
283+
func (rc *RouterConfig) GetModelReasoningFamily(modelName string) *ReasoningFamilyConfig {
284+
if rc == nil || rc.ModelConfig == nil || rc.ReasoningFamilies == nil {
285+
return nil
286+
}
287+
288+
// Look up the model in model_config
289+
modelParams, exists := rc.ModelConfig[modelName]
290+
if !exists || modelParams.ReasoningFamily == "" {
291+
return nil
292+
}
293+
294+
// Look up the reasoning family configuration
295+
familyConfig, exists := rc.ReasoningFamilies[modelParams.ReasoningFamily]
296+
if !exists {
297+
return nil
298+
}
299+
300+
return &familyConfig
301+
}
302+
303+
// Legacy functions - can be removed once migration is complete
304+
305+
// contains checks if a slice contains a string
306+
func contains(slice []string, item string) bool {
307+
for _, s := range slice {
308+
if s == item {
309+
return true
310+
}
311+
}
312+
return false
313+
}
314+
267315
var (
268316
config *RouterConfig
269317
configOnce sync.Once

src/semantic-router/pkg/extproc/reason_mode_config_test.go

Lines changed: 98 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -199,10 +199,42 @@ requestBody := buildRequestBody(model, messages, useReasoning, stream)
199199
func TestAddReasoningModeToRequestBody(t *testing.T) {
200200
fmt.Println("=== Testing addReasoningModeToRequestBody Function ===")
201201

202-
// Create a mock router with minimal config
203-
router := &OpenAIRouter{}
202+
// Create a mock router with family-based reasoning config
203+
router := &OpenAIRouter{
204+
Config: &config.RouterConfig{
205+
DefaultReasoningEffort: "medium",
206+
ReasoningFamilies: map[string]config.ReasoningFamilyConfig{
207+
"deepseek": {
208+
Type: "chat_template_kwargs",
209+
Parameter: "thinking",
210+
},
211+
"qwen3": {
212+
Type: "chat_template_kwargs",
213+
Parameter: "enable_thinking",
214+
},
215+
"gpt-oss": {
216+
Type: "reasoning_effort",
217+
Parameter: "reasoning_effort",
218+
},
219+
},
220+
ModelConfig: map[string]config.ModelParams{
221+
"deepseek-v31": {
222+
ReasoningFamily: "deepseek",
223+
},
224+
"qwen3-model": {
225+
ReasoningFamily: "qwen3",
226+
},
227+
"gpt-oss-model": {
228+
ReasoningFamily: "gpt-oss",
229+
},
230+
"phi4": {
231+
// No reasoning family - doesn't support reasoning
232+
},
233+
},
234+
},
235+
}
204236

205-
// Test case 1: Basic request body
237+
// Test case 1: Basic request body with model that has NO reasoning support (phi4)
206238
originalRequest := map[string]interface{}{
207239
"model": "phi4",
208240
"messages": []map[string]interface{}{
@@ -235,29 +267,76 @@ func TestAddReasoningModeToRequestBody(t *testing.T) {
235267
return
236268
}
237269

238-
// Check if chat_template_kwargs was added
239-
if chatTemplateKwargs, exists := modifiedRequest["chat_template_kwargs"]; exists {
270+
// Check that chat_template_kwargs was NOT added for phi4 (since it has no reasoning_family)
271+
if _, exists := modifiedRequest["chat_template_kwargs"]; exists {
272+
fmt.Println("ERROR: chat_template_kwargs should not be added for phi4 (no reasoning family configured)")
273+
} else {
274+
fmt.Println("SUCCESS: chat_template_kwargs correctly not added for phi4 (no reasoning support)")
275+
}
276+
277+
// Check that reasoning_effort was NOT added for phi4
278+
if _, exists := modifiedRequest["reasoning_effort"]; exists {
279+
fmt.Println("ERROR: reasoning_effort should not be added for phi4 (no reasoning family configured)")
280+
} else {
281+
fmt.Println("SUCCESS: reasoning_effort correctly not added for phi4 (no reasoning support)")
282+
}
283+
284+
// Test case 2: Request with model that HAS reasoning support (deepseek-v31)
285+
fmt.Println("\n--- Test Case 2: Model with reasoning support ---")
286+
deepseekRequest := map[string]interface{}{
287+
"model": "deepseek-v31",
288+
"messages": []map[string]interface{}{
289+
{"role": "user", "content": "What is 2 + 2?"},
290+
},
291+
"stream": false,
292+
}
293+
294+
deepseekBody, err := json.Marshal(deepseekRequest)
295+
if err != nil {
296+
fmt.Printf("Error marshaling deepseek request: %v\n", err)
297+
return
298+
}
299+
300+
fmt.Printf("Original deepseek request:\n%s\n\n", string(deepseekBody))
301+
302+
// Add reasoning mode to DeepSeek model
303+
modifiedDeepseekBody, err := router.setReasoningModeToRequestBody(deepseekBody, true, "math")
304+
if err != nil {
305+
fmt.Printf("Error adding reasoning mode to deepseek: %v\n", err)
306+
return
307+
}
308+
309+
fmt.Printf("Modified deepseek request with reasoning:\n%s\n\n", string(modifiedDeepseekBody))
310+
311+
var modifiedDeepseekRequest map[string]interface{}
312+
if err := json.Unmarshal(modifiedDeepseekBody, &modifiedDeepseekRequest); err != nil {
313+
fmt.Printf("Error unmarshaling modified deepseek request: %v\n", err)
314+
return
315+
}
316+
317+
// Check that chat_template_kwargs WAS added for deepseek-v31
318+
if chatTemplateKwargs, exists := modifiedDeepseekRequest["chat_template_kwargs"]; exists {
240319
if kwargs, ok := chatTemplateKwargs.(map[string]interface{}); ok {
241320
if thinking, hasThinking := kwargs["thinking"]; hasThinking {
242321
if thinkingBool, isBool := thinking.(bool); isBool && thinkingBool {
243-
fmt.Println("SUCCESS: chat_template_kwargs with thinking: true was correctly added")
322+
fmt.Println("SUCCESS: chat_template_kwargs with thinking: true correctly added for deepseek-v31")
244323
} else {
245-
fmt.Printf("ERROR: thinking value is not true, got: %v\n", thinking)
324+
fmt.Printf("ERROR: thinking value is not true for deepseek-v31, got: %v\n", thinking)
246325
}
247326
} else {
248-
fmt.Println("ERROR: thinking field not found in chat_template_kwargs")
327+
fmt.Println("ERROR: thinking field not found in chat_template_kwargs for deepseek-v31")
249328
}
250329
} else {
251-
fmt.Printf("ERROR: chat_template_kwargs is not a map, got: %T\n", chatTemplateKwargs)
330+
fmt.Printf("ERROR: chat_template_kwargs is not a map for deepseek-v31, got: %T\n", chatTemplateKwargs)
252331
}
253332
} else {
254-
fmt.Println("ERROR: chat_template_kwargs not found in modified request")
333+
fmt.Println("ERROR: chat_template_kwargs not found for deepseek-v31 (should be present)")
255334
}
256335

257-
// Test case 2: Request with existing fields
258-
fmt.Println("\n--- Test Case 2: Request with existing fields ---")
336+
// Test case 3: Request with existing fields
337+
fmt.Println("\n--- Test Case 3: Request with existing fields ---")
259338
complexRequest := map[string]interface{}{
260-
"model": "phi4",
339+
"model": "deepseek-v31",
261340
"messages": []map[string]interface{}{
262341
{"role": "system", "content": "You are a helpful assistant"},
263342
{"role": "user", "content": "Solve x^2 + 5x + 6 = 0"},
@@ -290,20 +369,20 @@ func TestAddReasoningModeToRequestBody(t *testing.T) {
290369
allFieldsPreserved := true
291370
for _, field := range originalFields {
292371
if _, exists := modifiedComplexRequest[field]; !exists {
293-
fmt.Printf("ERROR: Original field '%s' was lost\n", field)
372+
fmt.Printf("ERROR: Original field '%s' was lost\n", field)
294373
allFieldsPreserved = false
295374
}
296375
}
297376

298377
if allFieldsPreserved {
299-
fmt.Println("SUCCESS: All original fields preserved")
378+
fmt.Println("SUCCESS: All original fields preserved")
300379
}
301380

302-
// Verify chat_template_kwargs was added
381+
// Verify chat_template_kwargs was added for deepseek-v31
303382
if _, exists := modifiedComplexRequest["chat_template_kwargs"]; exists {
304-
fmt.Println("SUCCESS: chat_template_kwargs added to complex request")
305-
fmt.Printf("Final modified request:\n%s\n", string(modifiedComplexBody))
383+
fmt.Println("SUCCESS: chat_template_kwargs added to complex deepseek request")
384+
fmt.Printf("Final modified deepseek request:\n%s\n", string(modifiedComplexBody))
306385
} else {
307-
fmt.Println("ERROR: chat_template_kwargs not added to complex request")
386+
fmt.Println("ERROR: chat_template_kwargs not added to complex deepseek request")
308387
}
309388
}

0 commit comments

Comments
 (0)