Skip to content

Commit 76d48e7

Browse files
authored
Merge branch 'main' into test/classifier-add-tests
2 parents ad93419 + 7104b72 commit 76d48e7

File tree

10 files changed

+878
-203
lines changed

10 files changed

+878
-203
lines changed

.github/workflows/test-and-build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: Run Test
1+
name: Test And Build
22

33
on:
44
schedule:

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
[![Hugging Face](https://img.shields.io/badge/🤗%20Hugging%20Face-Community-yellow)](https://huggingface.co/LLM-Semantic-Router)
77
[![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](LICENSE)
88
[![Crates.io](https://img.shields.io/crates/v/candle-semantic-router.svg)](https://crates.io/crates/candle-semantic-router)
9+
![](https://github.com/vllm-project/semantic-router/workflows/Test%20And%20Build/badge.svg)
910

1011
**📚 [Complete Documentation](https://vllm-semantic-router.com) | 🚀 [Quick Start](https://vllm-semantic-router.com/docs/getting-started/installation) | 📣 [Blog](https://vllm-semantic-router.com/blog/) | 📖 [API Reference](https://vllm-semantic-router.com/docs/api/router/)**
1112

config/config.yaml

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,28 @@ model_config:
5757
pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] # Only allow these specific PII types
5858
# Specify which endpoints can serve this model (optional - if not specified, uses all endpoints that list this model)
5959
preferred_endpoints: ["endpoint1", "endpoint3"]
60+
# Reasoning family - phi4 doesn't support reasoning, so omit this field
61+
62+
# Example: DeepSeek model with custom name
63+
"ds-v31-custom":
64+
reasoning_family: "deepseek" # This model uses DeepSeek reasoning syntax
65+
preferred_endpoints: ["endpoint1"]
66+
pii_policy:
67+
allow_by_default: true
68+
69+
# Example: Qwen3 model with custom name
70+
"my-qwen3-model":
71+
reasoning_family: "qwen3" # This model uses Qwen3 reasoning syntax
72+
preferred_endpoints: ["endpoint2"]
73+
pii_policy:
74+
allow_by_default: true
75+
76+
# Example: GPT-OSS model with custom name
77+
"custom-gpt-oss":
78+
reasoning_family: "gpt-oss" # This model uses GPT-OSS reasoning syntax
79+
preferred_endpoints: ["endpoint1"]
80+
pii_policy:
81+
allow_by_default: true
6082
gemma3:27b:
6183
pricing:
6284
currency: USD
@@ -235,7 +257,6 @@ categories:
235257
- model: phi4
236258
score: 0.2
237259
default_model: mistral-small3.1
238-
default_reasoning_effort: medium # Default reasoning effort level (low, medium, high)
239260

240261
# API Configuration
241262
api:
@@ -252,4 +273,25 @@ api:
252273
sample_rate: 1.0 # Collect metrics for all requests (1.0 = 100%, 0.5 = 50%)
253274
# Histogram buckets for metrics (directly configure what you need)
254275
duration_buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]
255-
size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]
276+
size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]
277+
278+
# Reasoning family configurations - define how different model families handle reasoning syntax
279+
reasoning_families:
280+
deepseek:
281+
type: "chat_template_kwargs"
282+
parameter: "thinking"
283+
284+
qwen3:
285+
type: "chat_template_kwargs"
286+
parameter: "enable_thinking"
287+
288+
gpt-oss:
289+
type: "reasoning_effort"
290+
parameter: "reasoning_effort"
291+
292+
gpt:
293+
type: "reasoning_effort"
294+
parameter: "reasoning_effort"
295+
296+
# Global default reasoning effort level
297+
default_reasoning_effort: medium # Default reasoning effort level (low, medium, high)

src/semantic-router/pkg/config/config.go

Lines changed: 54 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"fmt"
55
"os"
66
"path/filepath"
7+
"slices"
78
"sync"
89

910
"gopkg.in/yaml.v3"
@@ -44,6 +45,9 @@ type RouterConfig struct {
4445
// Default reasoning effort level (low, medium, high) when not specified per category
4546
DefaultReasoningEffort string `yaml:"default_reasoning_effort,omitempty"`
4647

48+
// Reasoning family configurations to define how different model families handle reasoning syntax
49+
ReasoningFamilies map[string]ReasoningFamilyConfig `yaml:"reasoning_families,omitempty"`
50+
4751
// Semantic cache configuration
4852
SemanticCache SemanticCacheConfig `yaml:"semantic_cache"`
4953

@@ -207,6 +211,16 @@ type ModelParams struct {
207211

208212
// Optional pricing used for cost computation
209213
Pricing ModelPricing `yaml:"pricing,omitempty"`
214+
215+
// Reasoning family for this model (e.g., "deepseek", "qwen3", "gpt-oss")
216+
// If empty, the model doesn't support reasoning mode
217+
ReasoningFamily string `yaml:"reasoning_family,omitempty"`
218+
}
219+
220+
// ReasoningFamilyConfig defines how a reasoning family handles reasoning mode
221+
type ReasoningFamilyConfig struct {
222+
Type string `yaml:"type"` // "chat_template_kwargs" or "reasoning_effort"
223+
Parameter string `yaml:"parameter"` // "thinking", "enable_thinking", "reasoning_effort", etc.
210224
}
211225

212226
// PIIPolicy represents the PII (Personally Identifiable Information) policy for a model
@@ -263,6 +277,41 @@ type Category struct {
263277
ModelScores []ModelScore `yaml:"model_scores"`
264278
}
265279

280+
// Legacy types - can be removed once migration is complete
281+
282+
// GetModelReasoningFamily returns the reasoning family configuration for a given model name
283+
func (rc *RouterConfig) GetModelReasoningFamily(modelName string) *ReasoningFamilyConfig {
284+
if rc == nil || rc.ModelConfig == nil || rc.ReasoningFamilies == nil {
285+
return nil
286+
}
287+
288+
// Look up the model in model_config
289+
modelParams, exists := rc.ModelConfig[modelName]
290+
if !exists || modelParams.ReasoningFamily == "" {
291+
return nil
292+
}
293+
294+
// Look up the reasoning family configuration
295+
familyConfig, exists := rc.ReasoningFamilies[modelParams.ReasoningFamily]
296+
if !exists {
297+
return nil
298+
}
299+
300+
return &familyConfig
301+
}
302+
303+
// Legacy functions - can be removed once migration is complete
304+
305+
// contains checks if a slice contains a string
306+
func contains(slice []string, item string) bool {
307+
for _, s := range slice {
308+
if s == item {
309+
return true
310+
}
311+
}
312+
return false
313+
}
314+
266315
var (
267316
config *RouterConfig
268317
configOnce sync.Once
@@ -390,14 +439,7 @@ func (c *RouterConfig) IsModelAllowedForPIIType(modelName string, piiType string
390439
}
391440

392441
// If allow_by_default is false, only explicitly allowed PII types are permitted
393-
for _, allowedPII := range policy.PIITypes {
394-
if allowedPII == piiType {
395-
return true
396-
}
397-
}
398-
399-
// PII type not found in allowed list and allow_by_default is false
400-
return false
442+
return slices.Contains(policy.PIITypes, piiType)
401443
}
402444

403445
// IsModelAllowedForPIITypes checks if a model is allowed to process any of the given PII types
@@ -438,23 +480,17 @@ func (c *RouterConfig) GetEndpointsForModel(modelName string) []VLLMEndpoint {
438480

439481
// First, find all endpoints that can serve this model
440482
for _, endpoint := range c.VLLMEndpoints {
441-
for _, model := range endpoint.Models {
442-
if model == modelName {
443-
availableEndpoints = append(availableEndpoints, endpoint)
444-
break
445-
}
483+
if slices.Contains(endpoint.Models, modelName) {
484+
availableEndpoints = append(availableEndpoints, endpoint)
446485
}
447486
}
448487

449488
// Check if model has preferred endpoints configured
450489
if modelConfig, ok := c.ModelConfig[modelName]; ok && len(modelConfig.PreferredEndpoints) > 0 {
451490
var preferredEndpoints []VLLMEndpoint
452491
for _, endpoint := range availableEndpoints {
453-
for _, preferredName := range modelConfig.PreferredEndpoints {
454-
if endpoint.Name == preferredName {
455-
preferredEndpoints = append(preferredEndpoints, endpoint)
456-
break
457-
}
492+
if slices.Contains(modelConfig.PreferredEndpoints, endpoint.Name) {
493+
preferredEndpoints = append(preferredEndpoints, endpoint)
458494
}
459495
}
460496
if len(preferredEndpoints) > 0 {

src/semantic-router/pkg/extproc/reason_mode_config_test.go

Lines changed: 98 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -199,10 +199,42 @@ requestBody := buildRequestBody(model, messages, useReasoning, stream)
199199
func TestAddReasoningModeToRequestBody(t *testing.T) {
200200
fmt.Println("=== Testing addReasoningModeToRequestBody Function ===")
201201

202-
// Create a mock router with minimal config
203-
router := &OpenAIRouter{}
202+
// Create a mock router with family-based reasoning config
203+
router := &OpenAIRouter{
204+
Config: &config.RouterConfig{
205+
DefaultReasoningEffort: "medium",
206+
ReasoningFamilies: map[string]config.ReasoningFamilyConfig{
207+
"deepseek": {
208+
Type: "chat_template_kwargs",
209+
Parameter: "thinking",
210+
},
211+
"qwen3": {
212+
Type: "chat_template_kwargs",
213+
Parameter: "enable_thinking",
214+
},
215+
"gpt-oss": {
216+
Type: "reasoning_effort",
217+
Parameter: "reasoning_effort",
218+
},
219+
},
220+
ModelConfig: map[string]config.ModelParams{
221+
"deepseek-v31": {
222+
ReasoningFamily: "deepseek",
223+
},
224+
"qwen3-model": {
225+
ReasoningFamily: "qwen3",
226+
},
227+
"gpt-oss-model": {
228+
ReasoningFamily: "gpt-oss",
229+
},
230+
"phi4": {
231+
// No reasoning family - doesn't support reasoning
232+
},
233+
},
234+
},
235+
}
204236

205-
// Test case 1: Basic request body
237+
// Test case 1: Basic request body with model that has NO reasoning support (phi4)
206238
originalRequest := map[string]interface{}{
207239
"model": "phi4",
208240
"messages": []map[string]interface{}{
@@ -235,29 +267,76 @@ func TestAddReasoningModeToRequestBody(t *testing.T) {
235267
return
236268
}
237269

238-
// Check if chat_template_kwargs was added
239-
if chatTemplateKwargs, exists := modifiedRequest["chat_template_kwargs"]; exists {
270+
// Check that chat_template_kwargs was NOT added for phi4 (since it has no reasoning_family)
271+
if _, exists := modifiedRequest["chat_template_kwargs"]; exists {
272+
fmt.Println("ERROR: chat_template_kwargs should not be added for phi4 (no reasoning family configured)")
273+
} else {
274+
fmt.Println("SUCCESS: chat_template_kwargs correctly not added for phi4 (no reasoning support)")
275+
}
276+
277+
// Check that reasoning_effort was NOT added for phi4
278+
if _, exists := modifiedRequest["reasoning_effort"]; exists {
279+
fmt.Println("ERROR: reasoning_effort should not be added for phi4 (no reasoning family configured)")
280+
} else {
281+
fmt.Println("SUCCESS: reasoning_effort correctly not added for phi4 (no reasoning support)")
282+
}
283+
284+
// Test case 2: Request with model that HAS reasoning support (deepseek-v31)
285+
fmt.Println("\n--- Test Case 2: Model with reasoning support ---")
286+
deepseekRequest := map[string]interface{}{
287+
"model": "deepseek-v31",
288+
"messages": []map[string]interface{}{
289+
{"role": "user", "content": "What is 2 + 2?"},
290+
},
291+
"stream": false,
292+
}
293+
294+
deepseekBody, err := json.Marshal(deepseekRequest)
295+
if err != nil {
296+
fmt.Printf("Error marshaling deepseek request: %v\n", err)
297+
return
298+
}
299+
300+
fmt.Printf("Original deepseek request:\n%s\n\n", string(deepseekBody))
301+
302+
// Add reasoning mode to DeepSeek model
303+
modifiedDeepseekBody, err := router.setReasoningModeToRequestBody(deepseekBody, true, "math")
304+
if err != nil {
305+
fmt.Printf("Error adding reasoning mode to deepseek: %v\n", err)
306+
return
307+
}
308+
309+
fmt.Printf("Modified deepseek request with reasoning:\n%s\n\n", string(modifiedDeepseekBody))
310+
311+
var modifiedDeepseekRequest map[string]interface{}
312+
if err := json.Unmarshal(modifiedDeepseekBody, &modifiedDeepseekRequest); err != nil {
313+
fmt.Printf("Error unmarshaling modified deepseek request: %v\n", err)
314+
return
315+
}
316+
317+
// Check that chat_template_kwargs WAS added for deepseek-v31
318+
if chatTemplateKwargs, exists := modifiedDeepseekRequest["chat_template_kwargs"]; exists {
240319
if kwargs, ok := chatTemplateKwargs.(map[string]interface{}); ok {
241320
if thinking, hasThinking := kwargs["thinking"]; hasThinking {
242321
if thinkingBool, isBool := thinking.(bool); isBool && thinkingBool {
243-
fmt.Println("SUCCESS: chat_template_kwargs with thinking: true was correctly added")
322+
fmt.Println("SUCCESS: chat_template_kwargs with thinking: true correctly added for deepseek-v31")
244323
} else {
245-
fmt.Printf("ERROR: thinking value is not true, got: %v\n", thinking)
324+
fmt.Printf("ERROR: thinking value is not true for deepseek-v31, got: %v\n", thinking)
246325
}
247326
} else {
248-
fmt.Println("ERROR: thinking field not found in chat_template_kwargs")
327+
fmt.Println("ERROR: thinking field not found in chat_template_kwargs for deepseek-v31")
249328
}
250329
} else {
251-
fmt.Printf("ERROR: chat_template_kwargs is not a map, got: %T\n", chatTemplateKwargs)
330+
fmt.Printf("ERROR: chat_template_kwargs is not a map for deepseek-v31, got: %T\n", chatTemplateKwargs)
252331
}
253332
} else {
254-
fmt.Println("ERROR: chat_template_kwargs not found in modified request")
333+
fmt.Println("ERROR: chat_template_kwargs not found for deepseek-v31 (should be present)")
255334
}
256335

257-
// Test case 2: Request with existing fields
258-
fmt.Println("\n--- Test Case 2: Request with existing fields ---")
336+
// Test case 3: Request with existing fields
337+
fmt.Println("\n--- Test Case 3: Request with existing fields ---")
259338
complexRequest := map[string]interface{}{
260-
"model": "phi4",
339+
"model": "deepseek-v31",
261340
"messages": []map[string]interface{}{
262341
{"role": "system", "content": "You are a helpful assistant"},
263342
{"role": "user", "content": "Solve x^2 + 5x + 6 = 0"},
@@ -290,20 +369,20 @@ func TestAddReasoningModeToRequestBody(t *testing.T) {
290369
allFieldsPreserved := true
291370
for _, field := range originalFields {
292371
if _, exists := modifiedComplexRequest[field]; !exists {
293-
fmt.Printf("ERROR: Original field '%s' was lost\n", field)
372+
fmt.Printf("ERROR: Original field '%s' was lost\n", field)
294373
allFieldsPreserved = false
295374
}
296375
}
297376

298377
if allFieldsPreserved {
299-
fmt.Println("SUCCESS: All original fields preserved")
378+
fmt.Println("SUCCESS: All original fields preserved")
300379
}
301380

302-
// Verify chat_template_kwargs was added
381+
// Verify chat_template_kwargs was added for deepseek-v31
303382
if _, exists := modifiedComplexRequest["chat_template_kwargs"]; exists {
304-
fmt.Println("SUCCESS: chat_template_kwargs added to complex request")
305-
fmt.Printf("Final modified request:\n%s\n", string(modifiedComplexBody))
383+
fmt.Println("SUCCESS: chat_template_kwargs added to complex deepseek request")
384+
fmt.Printf("Final modified deepseek request:\n%s\n", string(modifiedComplexBody))
306385
} else {
307-
fmt.Println("ERROR: chat_template_kwargs not added to complex request")
386+
fmt.Println("ERROR: chat_template_kwargs not added to complex deepseek request")
308387
}
309388
}

0 commit comments

Comments
 (0)