Skip to content

Commit 4b45157

Browse files
authored
Merge pull request #21 from OneZero-Y/fix/configurable-reasoning-effort
fix:make reasoning effort configurable
2 parents 7b4eb2c + 2bdfc9d commit 4b45157

File tree

6 files changed

+58
-16
lines changed

6 files changed

+58
-16
lines changed

config/config.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ categories:
9696
- name: business
9797
use_reasoning: false
9898
reasoning_description: "Business content is typically conversational"
99+
reasoning_effort: low # Business conversations need low reasoning effort
99100
model_scores:
100101
- model: phi4
101102
score: 0.8
@@ -136,6 +137,7 @@ categories:
136137
- name: chemistry
137138
use_reasoning: true
138139
reasoning_description: "Chemical reactions and formulas require systematic thinking"
140+
reasoning_effort: high # Chemistry requires high reasoning effort
139141
model_scores:
140142
- model: mistral-small3.1
141143
score: 0.8
@@ -186,6 +188,7 @@ categories:
186188
- name: math
187189
use_reasoning: true
188190
reasoning_description: "Mathematical problems require step-by-step reasoning"
191+
reasoning_effort: high # Math problems need high reasoning effort
189192
model_scores:
190193
- model: phi4
191194
score: 1.0
@@ -233,4 +236,5 @@ categories:
233236
score: 0.6
234237
- model: phi4
235238
score: 0.2
236-
default_model: mistral-small3.1
239+
default_model: mistral-small3.1
240+
default_reasoning_effort: medium # Default reasoning effort level (low, medium, high)

src/semantic-router/pkg/config/config.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ type RouterConfig struct {
4141
// Default LLM model to use if no match is found
4242
DefaultModel string `yaml:"default_model"`
4343

44+
// Default reasoning effort level (low, medium, high) when not specified per category
45+
DefaultReasoningEffort string `yaml:"default_reasoning_effort,omitempty"`
46+
4447
// Semantic cache configuration
4548
SemanticCache SemanticCacheConfig `yaml:"semantic_cache"`
4649

@@ -217,6 +220,7 @@ type Category struct {
217220
Description string `yaml:"description,omitempty"`
218221
UseReasoning bool `yaml:"use_reasoning"`
219222
ReasoningDescription string `yaml:"reasoning_description,omitempty"`
223+
ReasoningEffort string `yaml:"reasoning_effort,omitempty"` // Configurable reasoning effort level (low, medium, high)
220224
ModelScores []ModelScore `yaml:"model_scores"`
221225
}
222226

src/semantic-router/pkg/extproc/reason_mode_config_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ func TestAddReasoningModeToRequestBody(t *testing.T) {
220220
fmt.Printf("Original request body:\n%s\n\n", string(originalBody))
221221

222222
// Add reasoning mode
223-
modifiedBody, err := router.setReasoningModeToRequestBody(originalBody, true)
223+
modifiedBody, err := router.setReasoningModeToRequestBody(originalBody, true, "math")
224224
if err != nil {
225225
fmt.Printf("Error adding reasoning mode: %v\n", err)
226226
return
@@ -273,7 +273,7 @@ func TestAddReasoningModeToRequestBody(t *testing.T) {
273273
return
274274
}
275275

276-
modifiedComplexBody, err := router.setReasoningModeToRequestBody(complexBody, true)
276+
modifiedComplexBody, err := router.setReasoningModeToRequestBody(complexBody, true, "chemistry")
277277
if err != nil {
278278
fmt.Printf("Error adding reasoning mode to complex request: %v\n", err)
279279
return

src/semantic-router/pkg/extproc/reason_mode_selector.go

Lines changed: 41 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,19 @@ import (
99

1010
// shouldUseReasoningMode determines if reasoning mode should be enabled based on the query category
1111
func (r *OpenAIRouter) shouldUseReasoningMode(query string) bool {
12+
enabled, _ := r.getReasoningModeAndCategory(query)
13+
return enabled
14+
}
15+
16+
// getReasoningModeAndCategory determines if reasoning mode should be enabled and returns the category name
17+
func (r *OpenAIRouter) getReasoningModeAndCategory(query string) (bool, string) {
1218
// Get the category for this query using the existing classification system
1319
categoryName := r.findCategoryForClassification(query)
1420

1521
// If no category was determined (empty string), default to no reasoning
1622
if categoryName == "" {
1723
log.Printf("No category determined for query, defaulting to no reasoning mode")
18-
return false
24+
return false, ""
1925
}
2026

2127
// Normalize category name for consistent lookup
@@ -30,13 +36,13 @@ func (r *OpenAIRouter) shouldUseReasoningMode(query string) bool {
3036
}
3137
log.Printf("Reasoning mode decision: Category '%s' → %s",
3238
categoryName, reasoningStatus)
33-
return category.UseReasoning
39+
return category.UseReasoning, categoryName
3440
}
3541
}
3642

3743
// If category not found in config, default to no reasoning
3844
log.Printf("Category '%s' not found in configuration, defaulting to no reasoning mode", categoryName)
39-
return false
45+
return false, categoryName
4046
}
4147

4248
// getChatTemplateKwargs returns the appropriate chat template kwargs based on model and reasoning mode
@@ -57,12 +63,12 @@ func getChatTemplateKwargs(model string, useReasoning bool) map[string]interface
5763
}
5864
}
5965

60-
// Default: no chat template kwargs
66+
// Default: no chat template kwargs for unknown models
6167
return nil
6268
}
6369

6470
// setReasoningModeToRequestBody adds chat_template_kwargs to the JSON request body
65-
func (r *OpenAIRouter) setReasoningModeToRequestBody(requestBody []byte, enabled bool) ([]byte, error) {
71+
func (r *OpenAIRouter) setReasoningModeToRequestBody(requestBody []byte, enabled bool, categoryName string) ([]byte, error) {
6672
// Parse the JSON request body
6773
var requestMap map[string]interface{}
6874
if err := json.Unmarshal(requestBody, &requestMap); err != nil {
@@ -91,8 +97,9 @@ func (r *OpenAIRouter) setReasoningModeToRequestBody(requestBody []byte, enabled
9197
originalReasoningEffort = "low"
9298
}
9399
if enabled {
94-
// TODO: make this configurable
95-
requestMap["reasoning_effort"] = "high"
100+
// Use configurable reasoning effort based on category
101+
effort := r.getReasoningEffort(categoryName)
102+
requestMap["reasoning_effort"] = effort
96103
} else {
97104
requestMap["reasoning_effort"] = originalReasoningEffort
98105
}
@@ -170,3 +177,30 @@ func (r *OpenAIRouter) LogReasoningConfigurationSummary() {
170177

171178
log.Printf("Reasoning mode summary: %d/%d categories have reasoning enabled", enabledCount, len(r.Config.Categories))
172179
}
180+
181+
// getReasoningEffort returns the reasoning effort level for a given category
182+
func (r *OpenAIRouter) getReasoningEffort(categoryName string) string {
183+
// Handle case where Config is nil (e.g., in tests)
184+
if r.Config == nil {
185+
return "medium"
186+
}
187+
188+
// Find the category configuration
189+
for _, category := range r.Config.Categories {
190+
if category.Name == categoryName {
191+
// Use category-specific effort if configured
192+
if category.ReasoningEffort != "" {
193+
return category.ReasoningEffort
194+
}
195+
break
196+
}
197+
}
198+
199+
// Fall back to global default if configured
200+
if r.Config.DefaultReasoningEffort != "" {
201+
return r.Config.DefaultReasoningEffort
202+
}
203+
204+
// Final fallback to "medium" as a reasonable default
205+
return "medium"
206+
}

src/semantic-router/pkg/extproc/reasoning_integration_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ func TestReasoningModeIntegration(t *testing.T) {
7979
t.Fatalf("Failed to marshal original request: %v", err)
8080
}
8181

82-
modifiedBody, err := router.setReasoningModeToRequestBody(originalBody, true)
82+
modifiedBody, err := router.setReasoningModeToRequestBody(originalBody, true, "math")
8383
if err != nil {
8484
t.Fatalf("Failed to add reasoning mode: %v", err)
8585
}
@@ -130,7 +130,7 @@ func TestReasoningModeIntegration(t *testing.T) {
130130
t.Fatalf("Failed to marshal phi4 request: %v", err)
131131
}
132132

133-
modifiedBodyPhi4, err := router.setReasoningModeToRequestBody(originalBodyPhi4, true)
133+
modifiedBodyPhi4, err := router.setReasoningModeToRequestBody(originalBodyPhi4, true, "math")
134134
if err != nil {
135135
t.Fatalf("Failed to process phi4 request: %v", err)
136136
}
@@ -148,8 +148,8 @@ func TestReasoningModeIntegration(t *testing.T) {
148148
// But reasoning_effort should still be set
149149
if reasoningEffort, exists := modifiedRequestPhi4["reasoning_effort"]; !exists {
150150
t.Error("reasoning_effort should be set for phi4 model")
151-
} else if reasoningEffort != "high" {
152-
t.Errorf("Expected reasoning_effort: high for phi4 model, got %v", reasoningEffort)
151+
} else if reasoningEffort != "medium" {
152+
t.Errorf("Expected reasoning_effort: medium for phi4 model (default), got %v", reasoningEffort)
153153
}
154154
})
155155

src/semantic-router/pkg/extproc/request_handler.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
335335
log.Printf("Routing to model: %s", matchedModel)
336336

337337
// Check reasoning mode for this category
338-
useReasoning := r.shouldUseReasoningMode(userContent)
338+
useReasoning, categoryName := r.getReasoningModeAndCategory(userContent)
339339
log.Printf("Reasoning mode decision for this query: %v on [%s] model", useReasoning, matchedModel)
340340

341341
// Track the model load for the selected model
@@ -366,7 +366,7 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
366366
return nil, status.Errorf(codes.Internal, "error serializing modified request: %v", err)
367367
}
368368

369-
modifiedBody, err = r.setReasoningModeToRequestBody(modifiedBody, useReasoning)
369+
modifiedBody, err = r.setReasoningModeToRequestBody(modifiedBody, useReasoning, categoryName)
370370
if err != nil {
371371
log.Printf("Error setting reasoning mode %v to request: %v", useReasoning, err)
372372
return nil, status.Errorf(codes.Internal, "error setting reasoning mode: %v", err)

0 commit comments

Comments
 (0)