66 "log"
77 "strings"
88
9+ "github.com/vllm-project/semantic-router/semantic-router/pkg/config"
910 "github.com/vllm-project/semantic-router/semantic-router/pkg/metrics"
1011)
1112
@@ -47,61 +48,42 @@ func (r *OpenAIRouter) getReasoningModeAndCategory(query string) (bool, string)
4748 return false , categoryName
4849}
4950
50- // hasDeepSeekAlias returns true if the model uses a short alias for DeepSeek (e.g., "ds-*")
51- // Rules:
52- // - Accept only when the model string starts with: "ds-", "ds_", "ds:", "ds " or exactly equals "ds"
53- // - Do NOT match occurrences of "ds" in the middle of the model name (e.g., "foo-ds-1b")
54- func hasDeepSeekAlias (lower string ) bool {
55- lower = strings .TrimSpace (lower )
56- if strings .HasPrefix (lower , "ds" ) {
57- if len (lower ) == 2 { // exactly "ds"
58- return true
59- }
60- sep := lower [2 ]
61- return sep == '-' || sep == '_' || sep == ':' || sep == ' '
51+ // getModelReasoningConfig finds the reasoning configuration for a model using the config system
52+ func (r * OpenAIRouter ) getModelReasoningConfig (model string ) * config.ModelReasoningConfig {
53+ if r .Config == nil {
54+ return nil
6255 }
63- return false
56+ return r . Config . FindModelReasoningConfig ( model )
6457}
6558
66- // getModelFamilyAndTemplateParam returns a normalized model family name and the template param to be used (if any)
67- func getModelFamilyAndTemplateParam (model string ) (string , string ) {
68- lower := strings .ToLower (strings .TrimSpace (model ))
69- if strings .Contains (lower , "qwen3" ) {
70- return "qwen3" , "enable_thinking"
71- }
72- if strings .Contains (lower , "deepseek" ) || hasDeepSeekAlias (lower ) {
73- return "deepseek" , "thinking"
59+ // buildReasoningRequestFields returns the appropriate fields to add to the request based on model config
60+ func (r * OpenAIRouter ) buildReasoningRequestFields (model string , useReasoning bool , categoryName string ) (map [string ]interface {}, string ) {
61+ modelConfig := r .getModelReasoningConfig (model )
62+ if modelConfig == nil {
63+ // No configuration found for this model - don't apply any reasoning syntax
64+ // Unknown models should not have reasoning fields added
65+ return nil , "N/A"
7466 }
75- // GPT-OSS family and generic GPT fall back to using reasoning_effort (OpenAI-compatible field)
76- if strings .Contains (lower , "gpt-oss" ) || strings .Contains (lower , "gpt_oss" ) {
77- return "gpt-oss" , "reasoning_effort"
78- }
79- if strings .Contains (lower , "gpt" ) {
80- return "gpt" , "reasoning_effort"
81- }
82- return "unknown" , ""
83- }
8467
85- // getChatTemplateKwargs returns the appropriate chat template kwargs based on model and reasoning mode
86- func getChatTemplateKwargs (model string , useReasoning bool ) map [string ]interface {} {
87- lower := strings .ToLower (strings .TrimSpace (model ))
88-
89- // Qwen3: use enable_thinking true/false
90- if strings .Contains (lower , "qwen3" ) {
91- return map [string ]interface {}{
92- "enable_thinking" : useReasoning ,
93- }
68+ if ! useReasoning {
69+ // When reasoning is disabled, don't add any reasoning fields
70+ return nil , "N/A"
9471 }
9572
96- // DeepSeek v3 family: use thinking true/false
97- if strings .Contains (lower , "deepseek" ) || strings .Contains (lower , "ds" ) {
98- return map [string ]interface {}{
99- "thinking" : useReasoning ,
73+ // When reasoning is enabled, use the configured syntax
74+ switch modelConfig .ReasoningSyntax .Type {
75+ case "chat_template_kwargs" :
76+ kwargs := map [string ]interface {}{
77+ modelConfig .ReasoningSyntax .Parameter : useReasoning ,
10078 }
79+ return map [string ]interface {}{"chat_template_kwargs" : kwargs }, "N/A"
80+ case "reasoning_effort" :
81+ effort := r .getReasoningEffort (categoryName )
82+ return map [string ]interface {}{"reasoning_effort" : effort }, effort
83+ default :
84+ // Unknown reasoning syntax type - don't apply anything
85+ return nil , "N/A"
10186 }
102-
103- // Default: no chat template kwargs for unknown models
104- return nil
10587}
10688
10789// setReasoningModeToRequestBody adds chat_template_kwargs to the JSON request body
@@ -120,49 +102,60 @@ func (r *OpenAIRouter) setReasoningModeToRequestBody(requestBody []byte, enabled
120102 }
121103 }
122104
123- family , param := getModelFamilyAndTemplateParam (model )
124-
125- // Add chat_template_kwargs for reasoning mode
126- kwargs := getChatTemplateKwargs (model , enabled )
127- if kwargs != nil {
128- requestMap ["chat_template_kwargs" ] = kwargs
129- } else {
130- delete (requestMap , "chat_template_kwargs" )
131- }
132- // Also set Reasoning-Effort in openai request
133- // This is a hack to get the reasoning mode for openai/gpt-oss-20b to work
134- originalReasoningEffort , ok := requestMap ["reasoning_effort" ]
135- if ! ok {
136- // This seems to be the default for openai/gpt-oss models
137- originalReasoningEffort = "low"
105+ // Get original reasoning effort for potential preservation
106+ originalReasoningEffort , hasOriginalEffort := requestMap ["reasoning_effort" ]
107+ if ! hasOriginalEffort {
108+ originalReasoningEffort = "low" // Default for compatibility
138109 }
139- var appliedEffort string
110+
111+ // Clear existing reasoning-related fields
112+ delete (requestMap , "chat_template_kwargs" )
113+ delete (requestMap , "reasoning_effort" )
114+
115+ var appliedEffort string = "N/A"
116+
140117 if enabled {
141- // Use configurable reasoning effort based on category
142- effort := r .getReasoningEffort (categoryName )
143- requestMap ["reasoning_effort" ] = effort
118+ // When reasoning is enabled, build the appropriate fields
119+ reasoningFields , effort := r .buildReasoningRequestFields (model , enabled , categoryName )
120+ for key , value := range reasoningFields {
121+ requestMap [key ] = value
122+ }
144123 appliedEffort = effort
145124 } else {
146- requestMap ["reasoning_effort" ] = originalReasoningEffort
147- if s , ok := originalReasoningEffort .(string ); ok {
148- appliedEffort = s
125+ // When reasoning is disabled, only preserve reasoning_effort for gpt-oss models
126+ modelConfig := r .getModelReasoningConfig (model )
127+ if modelConfig != nil && modelConfig .Name == "gpt-oss" {
128+ requestMap ["reasoning_effort" ] = originalReasoningEffort
129+ if s , ok := originalReasoningEffort .(string ); ok {
130+ appliedEffort = s
131+ }
149132 }
133+ // For all other models, reasoning fields remain cleared
150134 }
151135
152136 log .Printf ("Original reasoning effort: %s" , originalReasoningEffort )
153- log .Printf ("Added reasoning mode (enabled: %v) and reasoning effort (%s) to request for model: %s" , enabled , requestMap [ "reasoning_effort" ] , model )
137+ log .Printf ("Added reasoning mode (enabled: %v) and reasoning effort (%s) to request for model: %s" , enabled , appliedEffort , model )
154138
155139 // Record metrics for template usage and effort when enabled
156140 if enabled {
157- // If we applied a known template param, record its usage
158- if kwargs != nil && param != "" {
159- metrics .RecordReasoningTemplateUsage (family , param )
160- } else if kwargs == nil && param == "reasoning_effort" {
161- // For GPT/GPT-OSS, we only set reasoning_effort
162- metrics .RecordReasoningTemplateUsage (family , param )
141+ modelConfig := r .getModelReasoningConfig (model )
142+ modelFamily := "unknown"
143+ templateParam := "reasoning_effort" // default fallback
144+
145+ if modelConfig != nil {
146+ modelFamily = modelConfig .Name
147+ if modelConfig .ReasoningSyntax .Type == "chat_template_kwargs" {
148+ templateParam = modelConfig .ReasoningSyntax .Parameter
149+ } else {
150+ templateParam = "reasoning_effort"
151+ }
152+ }
153+
154+ // Record template usage and effort
155+ metrics .RecordReasoningTemplateUsage (modelFamily , templateParam )
156+ if appliedEffort != "N/A" {
157+ metrics .RecordReasoningEffortUsage (modelFamily , appliedEffort )
163158 }
164- // Record which effort level was used for this family
165- metrics .RecordReasoningEffortUsage (family , appliedEffort )
166159 }
167160
168161 // Serialize back to JSON
0 commit comments