Skip to content

Commit 47b83d2

Browse files
committed
feat: support qwen3 reasoning
Signed-off-by: bitliu <[email protected]>
1 parent 9ee8ad4 commit 47b83d2

File tree

4 files changed

+130
-35
lines changed

4 files changed

+130
-35
lines changed

bench/router_reason_bench.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,14 @@ def build_extra_body_for_model(
363363
# Base: do not set thinking for DeepSeek
364364
return None
365365

366+
# Qwen3 family
367+
if "qwen3" in lower:
368+
if reasoning is True:
369+
return {"chat_template_kwargs": {"enable_thinking": True}}
370+
if reasoning is False:
371+
return {"chat_template_kwargs": {"enable_thinking": False}}
372+
return None
373+
366374
# GPT OSS family
367375
if "gpt-oss" in lower or "openai/gpt-oss" in lower or "gpt_oss" in lower:
368376
# Base -> low effort, On -> provided effort (e.g., high)
@@ -527,9 +535,7 @@ def run_variants(q: Dict[str, Any]) -> List[Dict[str, Any]]:
527535

528536
with ThreadPoolExecutor(max_workers=concurrent_requests) as executor:
529537
futures = [executor.submit(run_variants, q) for q in questions_data]
530-
for future in tqdm(
531-
futures, total=len(futures), desc=f"Evaluating {model} (vLLM modes)"
532-
):
538+
for future in tqdm(futures, total=len(futures), desc=f"Evaluating {model} (vLLM modes)"):
533539
results.extend(future.result())
534540

535541
return pd.DataFrame(results)
@@ -578,9 +584,7 @@ def run_all_modes(q: Dict[str, Any]) -> List[Dict[str, Any]]:
578584

579585
with ThreadPoolExecutor(max_workers=concurrent_requests) as executor:
580586
futures = [executor.submit(run_all_modes, q) for q in questions]
581-
for future in tqdm(
582-
futures, total=len(futures), desc=f"Evaluating {model} (policies)"
583-
):
587+
for future in tqdm(futures, total=len(futures), desc=f"Evaluating {model} (policies)"):
584588
per_call_records.extend(future.result())
585589

586590
calls_df = pd.DataFrame(per_call_records)

src/semantic-router/pkg/extproc/endpoint_selection_test.go

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,12 +78,22 @@ var _ = Describe("Endpoint Selection", func() {
7878
if header.Header.Key == "x-semantic-destination-endpoint" {
7979
endpointHeaderFound = true
8080
// Should be one of the configured endpoint addresses
81-
Expect(header.Header.Value).To(BeElementOf("127.0.0.1:8000", "127.0.0.1:8001"))
81+
// Check both Value and RawValue since implementation uses RawValue
82+
headerValue := header.Header.Value
83+
if headerValue == "" && len(header.Header.RawValue) > 0 {
84+
headerValue = string(header.Header.RawValue)
85+
}
86+
Expect(headerValue).To(BeElementOf("127.0.0.1:8000", "127.0.0.1:8001"))
8287
}
8388
if header.Header.Key == "x-selected-model" {
8489
modelHeaderFound = true
8590
// Should be one of the configured models
86-
Expect(header.Header.Value).To(BeElementOf("model-a", "model-b"))
91+
// Check both Value and RawValue since implementation may use either
92+
headerValue := header.Header.Value
93+
if headerValue == "" && len(header.Header.RawValue) > 0 {
94+
headerValue = string(header.Header.RawValue)
95+
}
96+
Expect(headerValue).To(BeElementOf("model-a", "model-b"))
8797
}
8898
}
8999

@@ -141,7 +151,11 @@ var _ = Describe("Endpoint Selection", func() {
141151
for _, header := range headerMutation.SetHeaders {
142152
if header.Header.Key == "x-semantic-destination-endpoint" {
143153
endpointHeaderFound = true
154+
// Check both Value and RawValue since implementation uses RawValue
144155
selectedEndpoint = header.Header.Value
156+
if selectedEndpoint == "" && len(header.Header.RawValue) > 0 {
157+
selectedEndpoint = string(header.Header.RawValue)
158+
}
145159
break
146160
}
147161
}
@@ -200,7 +214,11 @@ var _ = Describe("Endpoint Selection", func() {
200214
for _, header := range headerMutation.SetHeaders {
201215
if header.Header.Key == "x-semantic-destination-endpoint" {
202216
endpointHeaderFound = true
217+
// Check both Value and RawValue since implementation uses RawValue
203218
selectedEndpoint = header.Header.Value
219+
if selectedEndpoint == "" && len(header.Header.RawValue) > 0 {
220+
selectedEndpoint = string(header.Header.RawValue)
221+
}
204222
break
205223
}
206224
}

src/semantic-router/pkg/extproc/reason_mode_selector.go

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,25 @@ func (r *OpenAIRouter) shouldUseReasoningMode(query string) bool {
3939
return false
4040
}
4141

42-
// getChatTemplateKwargs returns the appropriate chat template kwargs based on reasoning mode and streaming
43-
func getChatTemplateKwargs(useReasoning bool) map[string]interface{} {
44-
if useReasoning {
42+
// getChatTemplateKwargs returns the appropriate chat template kwargs based on model and reasoning mode
43+
func getChatTemplateKwargs(model string, useReasoning bool) map[string]interface{} {
44+
lower := strings.ToLower(strings.TrimSpace(model))
45+
46+
// Qwen3: use enable_thinking true/false
47+
if strings.Contains(lower, "qwen3") {
48+
return map[string]interface{}{
49+
"enable_thinking": useReasoning,
50+
}
51+
}
52+
53+
// DeepSeek v3 family: use thinking true/false
54+
if strings.Contains(lower, "deepseek") || strings.Contains(lower, "ds") {
4555
return map[string]interface{}{
4656
"thinking": useReasoning,
4757
}
4858
}
59+
60+
// Default: no chat template kwargs
4961
return nil
5062
}
5163

@@ -57,8 +69,20 @@ func (r *OpenAIRouter) setReasoningModeToRequestBody(requestBody []byte, enabled
5769
return nil, fmt.Errorf("failed to parse request body: %w", err)
5870
}
5971

72+
// Determine model for kwargs and logging
73+
model := "unknown"
74+
if modelValue, ok := requestMap["model"]; ok {
75+
if modelStr, ok := modelValue.(string); ok {
76+
model = modelStr
77+
}
78+
}
79+
6080
// Add chat_template_kwargs for reasoning mode
61-
requestMap["chat_template_kwargs"] = getChatTemplateKwargs(enabled)
81+
if kwargs := getChatTemplateKwargs(model, enabled); kwargs != nil {
82+
requestMap["chat_template_kwargs"] = kwargs
83+
} else {
84+
delete(requestMap, "chat_template_kwargs")
85+
}
6286
// Also set Reasoning-Effort in openai request
6387
// This is a hack to get the reasoning mode for openai/gpt-oss-20b to work
6488
originalReasoningEffort, ok := requestMap["reasoning_effort"]
@@ -73,16 +97,8 @@ func (r *OpenAIRouter) setReasoningModeToRequestBody(requestBody []byte, enabled
7397
requestMap["reasoning_effort"] = originalReasoningEffort
7498
}
7599

76-
// Get the model name for logging
77-
model := "unknown"
78-
if modelValue, ok := requestMap["model"]; ok {
79-
if modelStr, ok := modelValue.(string); ok {
80-
model = modelStr
81-
}
82-
}
83-
84100
log.Printf("Original reasoning effort: %s", originalReasoningEffort)
85-
log.Printf("Added reasoning mode (thinking: %v) and reasoning effort (%s) to request for model: %s", enabled, requestMap["reasoning_effort"], model)
101+
log.Printf("Added reasoning mode (enabled: %v) and reasoning effort (%s) to request for model: %s", enabled, requestMap["reasoning_effort"], model)
86102

87103
// Serialize back to JSON
88104
modifiedBody, err := json.Marshal(requestMap)

src/semantic-router/pkg/extproc/reasoning_integration_test.go

Lines changed: 71 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,9 @@ func TestReasoningModeIntegration(t *testing.T) {
6565

6666
// Test case 3: Test addReasoningModeToRequestBody function
6767
t.Run("addReasoningModeToRequestBody adds correct fields", func(t *testing.T) {
68+
// Test with DeepSeek model (which supports chat_template_kwargs)
6869
originalRequest := map[string]interface{}{
69-
"model": "phi4",
70+
"model": "deepseek-v31",
7071
"messages": []map[string]interface{}{
7172
{"role": "user", "content": "What is 2 + 2?"},
7273
},
@@ -88,23 +89,23 @@ func TestReasoningModeIntegration(t *testing.T) {
8889
t.Fatalf("Failed to unmarshal modified request: %v", err)
8990
}
9091

91-
// Check if chat_template_kwargs was added
92+
// Check if chat_template_kwargs was added for DeepSeek model
9293
chatTemplateKwargs, exists := modifiedRequest["chat_template_kwargs"]
9394
if !exists {
94-
t.Error("chat_template_kwargs not found in modified request")
95+
t.Error("chat_template_kwargs not found in modified request for DeepSeek model")
9596
}
9697

97-
// Check if thinking: true was set
98+
// Check if thinking: true was set for DeepSeek model
9899
if kwargs, ok := chatTemplateKwargs.(map[string]interface{}); ok {
99100
if thinking, hasThinking := kwargs["thinking"]; hasThinking {
100101
if thinkingBool, isBool := thinking.(bool); !isBool || !thinkingBool {
101-
t.Errorf("Expected thinking: true, got %v", thinking)
102+
t.Errorf("Expected thinking: true for DeepSeek model, got %v", thinking)
102103
}
103104
} else {
104-
t.Error("thinking field not found in chat_template_kwargs")
105+
t.Error("thinking field not found in chat_template_kwargs for DeepSeek model")
105106
}
106107
} else {
107-
t.Errorf("chat_template_kwargs is not a map, got %T", chatTemplateKwargs)
108+
t.Errorf("chat_template_kwargs is not a map for DeepSeek model, got %T", chatTemplateKwargs)
108109
}
109110

110111
// Verify original fields are preserved
@@ -114,24 +115,80 @@ func TestReasoningModeIntegration(t *testing.T) {
114115
t.Errorf("Original field '%s' was lost", field)
115116
}
116117
}
118+
119+
// Test with unsupported model (phi4) - should not add chat_template_kwargs
120+
originalRequestPhi4 := map[string]interface{}{
121+
"model": "phi4",
122+
"messages": []map[string]interface{}{
123+
{"role": "user", "content": "What is 2 + 2?"},
124+
},
125+
"stream": false,
126+
}
127+
128+
originalBodyPhi4, err := json.Marshal(originalRequestPhi4)
129+
if err != nil {
130+
t.Fatalf("Failed to marshal phi4 request: %v", err)
131+
}
132+
133+
modifiedBodyPhi4, err := router.setReasoningModeToRequestBody(originalBodyPhi4, true)
134+
if err != nil {
135+
t.Fatalf("Failed to process phi4 request: %v", err)
136+
}
137+
138+
var modifiedRequestPhi4 map[string]interface{}
139+
if err := json.Unmarshal(modifiedBodyPhi4, &modifiedRequestPhi4); err != nil {
140+
t.Fatalf("Failed to unmarshal phi4 request: %v", err)
141+
}
142+
143+
// For phi4, chat_template_kwargs should not be added (since it's not supported)
144+
if _, exists := modifiedRequestPhi4["chat_template_kwargs"]; exists {
145+
t.Error("chat_template_kwargs should not be added for unsupported model phi4")
146+
}
147+
148+
// But reasoning_effort should still be set
149+
if reasoningEffort, exists := modifiedRequestPhi4["reasoning_effort"]; !exists {
150+
t.Error("reasoning_effort should be set for phi4 model")
151+
} else if reasoningEffort != "high" {
152+
t.Errorf("Expected reasoning_effort: high for phi4 model, got %v", reasoningEffort)
153+
}
117154
})
118155

119156
// Test case 4: Test getChatTemplateKwargs function
120157
t.Run("getChatTemplateKwargs returns correct values", func(t *testing.T) {
121-
// Test with reasoning enabled
122-
kwargs := getChatTemplateKwargs(true)
158+
// Test with DeepSeek model and reasoning enabled
159+
kwargs := getChatTemplateKwargs("deepseek-v31", true)
123160
if kwargs == nil {
124-
t.Error("Expected non-nil kwargs for reasoning enabled")
161+
t.Error("Expected non-nil kwargs for DeepSeek model with reasoning enabled")
125162
}
126163

127164
if thinking, ok := kwargs["thinking"]; !ok || thinking != true {
128-
t.Errorf("Expected thinking: true, got %v", thinking)
165+
t.Errorf("Expected thinking: true for DeepSeek model, got %v", thinking)
166+
}
167+
168+
// Test with DeepSeek model and reasoning disabled
169+
kwargs = getChatTemplateKwargs("deepseek-v31", false)
170+
if kwargs == nil {
171+
t.Error("Expected non-nil kwargs for DeepSeek model with reasoning disabled")
172+
}
173+
174+
if thinking, ok := kwargs["thinking"]; !ok || thinking != false {
175+
t.Errorf("Expected thinking: false for DeepSeek model, got %v", thinking)
176+
}
177+
178+
// Test with Qwen3 model and reasoning enabled
179+
kwargs = getChatTemplateKwargs("qwen3-7b", true)
180+
if kwargs == nil {
181+
t.Error("Expected non-nil kwargs for Qwen3 model with reasoning enabled")
182+
}
183+
184+
if enableThinking, ok := kwargs["enable_thinking"]; !ok || enableThinking != true {
185+
t.Errorf("Expected enable_thinking: true for Qwen3 model, got %v", enableThinking)
129186
}
130187

131-
// Test with reasoning disabled
132-
kwargs = getChatTemplateKwargs(false)
188+
// Test with unknown model (should return nil)
189+
kwargs = getChatTemplateKwargs("unknown-model", true)
133190
if kwargs != nil {
134-
t.Errorf("Expected nil kwargs for reasoning disabled, got %v", kwargs)
191+
t.Errorf("Expected nil kwargs for unknown model, got %v", kwargs)
135192
}
136193
})
137194

0 commit comments

Comments
 (0)