-
Notifications
You must be signed in to change notification settings - Fork 256
feat: reasoning model controller #56
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,6 +5,8 @@ import ( | |
| "fmt" | ||
| "log" | ||
| "strings" | ||
|
|
||
| "github.com/vllm-project/semantic-router/semantic-router/pkg/metrics" | ||
| ) | ||
|
|
||
| // shouldUseReasoningMode determines if reasoning mode should be enabled based on the query category | ||
|
|
@@ -45,6 +47,25 @@ func (r *OpenAIRouter) getReasoningModeAndCategory(query string) (bool, string) | |
| return false, categoryName | ||
| } | ||
|
|
||
| // getModelFamilyAndTemplateParam returns a normalized model family name and the template param to be used (if any) | ||
| func getModelFamilyAndTemplateParam(model string) (string, string) { | ||
| lower := strings.ToLower(strings.TrimSpace(model)) | ||
| if strings.Contains(lower, "qwen3") { | ||
| return "qwen3", "enable_thinking" | ||
| } | ||
| if strings.Contains(lower, "deepseek") || strings.Contains(lower, "ds") { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. would you add an issue and follow up with this in the next PR for more robust model name filter? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I write "ds" here because many people (including on social media) refer to DeepSeek as "ds" and Claude Code as "cc." 🤣 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I created an issue for tracking #61 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. my setup modelname for deepseek-v3 is also ds-v3 😄 |
||
| return "deepseek", "thinking" | ||
| } | ||
| // GPT-OSS family and generic GPT fall back to using reasoning_effort (OpenAI-compatible field) | ||
| if strings.Contains(lower, "gpt-oss") || strings.Contains(lower, "gpt_oss") { | ||
| return "gpt-oss", "reasoning_effort" | ||
| } | ||
| if strings.Contains(lower, "gpt") { | ||
| return "gpt", "reasoning_effort" | ||
| } | ||
| return "unknown", "" | ||
| } | ||
|
|
||
| // getChatTemplateKwargs returns the appropriate chat template kwargs based on model and reasoning mode | ||
| func getChatTemplateKwargs(model string, useReasoning bool) map[string]interface{} { | ||
| lower := strings.ToLower(strings.TrimSpace(model)) | ||
|
|
@@ -83,8 +104,11 @@ func (r *OpenAIRouter) setReasoningModeToRequestBody(requestBody []byte, enabled | |
| } | ||
| } | ||
|
|
||
| family, param := getModelFamilyAndTemplateParam(model) | ||
|
|
||
| // Add chat_template_kwargs for reasoning mode | ||
| if kwargs := getChatTemplateKwargs(model, enabled); kwargs != nil { | ||
| kwargs := getChatTemplateKwargs(model, enabled) | ||
| if kwargs != nil { | ||
| requestMap["chat_template_kwargs"] = kwargs | ||
| } else { | ||
| delete(requestMap, "chat_template_kwargs") | ||
|
|
@@ -96,17 +120,35 @@ func (r *OpenAIRouter) setReasoningModeToRequestBody(requestBody []byte, enabled | |
| // This seems to be the default for openai/gpt-oss models | ||
| originalReasoningEffort = "low" | ||
| } | ||
| var appliedEffort string | ||
| if enabled { | ||
| // Use configurable reasoning effort based on category | ||
| effort := r.getReasoningEffort(categoryName) | ||
| requestMap["reasoning_effort"] = effort | ||
| appliedEffort = effort | ||
| } else { | ||
| requestMap["reasoning_effort"] = originalReasoningEffort | ||
| if s, ok := originalReasoningEffort.(string); ok { | ||
| appliedEffort = s | ||
| } | ||
| } | ||
|
|
||
| log.Printf("Original reasoning effort: %s", originalReasoningEffort) | ||
| log.Printf("Added reasoning mode (enabled: %v) and reasoning effort (%s) to request for model: %s", enabled, requestMap["reasoning_effort"], model) | ||
|
|
||
| // Record metrics for template usage and effort when enabled | ||
| if enabled { | ||
| // If we applied a known template param, record its usage | ||
| if kwargs != nil && param != "" { | ||
| metrics.RecordReasoningTemplateUsage(family, param) | ||
| } else if kwargs == nil && param == "reasoning_effort" { | ||
| // For GPT/GPT-OSS, we only set reasoning_effort | ||
| metrics.RecordReasoningTemplateUsage(family, param) | ||
| } | ||
| // Record which effort level was used for this family | ||
| metrics.RecordReasoningEffortUsage(family, appliedEffort) | ||
| } | ||
|
|
||
| // Serialize back to JSON | ||
| modifiedBody, err := json.Marshal(requestMap) | ||
| if err != nil { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -192,6 +192,33 @@ var ( | |
| }, | ||
| []string{"model", "pii_type"}, | ||
| ) | ||
|
|
||
| // ReasoningDecisions tracks the reasoning mode decision outcome by category, model, and effort | ||
| ReasoningDecisions = promauto.NewCounterVec( | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please come up with a follow up PR to add them to the doc There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added an issue for tracking #62 |
||
| prometheus.CounterOpts{ | ||
| Name: "llm_reasoning_decisions_total", | ||
| Help: "The total number of reasoning mode decisions by category, model, and effort", | ||
| }, | ||
| []string{"category", "model", "enabled", "effort"}, | ||
| ) | ||
|
|
||
| // ReasoningTemplateUsage tracks usage of model-family-specific template parameters | ||
| ReasoningTemplateUsage = promauto.NewCounterVec( | ||
| prometheus.CounterOpts{ | ||
| Name: "llm_reasoning_template_usage_total", | ||
| Help: "The total number of times a model family template parameter was applied", | ||
| }, | ||
| []string{"family", "param"}, | ||
| ) | ||
|
|
||
| // ReasoningEffortUsage tracks the distribution of reasoning efforts by model family | ||
| ReasoningEffortUsage = promauto.NewCounterVec( | ||
| prometheus.CounterOpts{ | ||
| Name: "llm_reasoning_effort_usage_total", | ||
| Help: "The total number of times a reasoning effort level was set per model family", | ||
| }, | ||
| []string{"family", "effort"}, | ||
| ) | ||
| ) | ||
|
|
||
| // RecordModelRequest increments the counter for requests to a specific model | ||
|
|
@@ -463,3 +490,34 @@ func InitializeBatchMetrics(config BatchMetricsConfig) { | |
| ) | ||
| }) | ||
| } | ||
|
|
||
| // RecordReasoningDecision records a reasoning-mode decision for a category, model and effort | ||
| func RecordReasoningDecision(category, model string, enabled bool, effort string) { | ||
| status := "false" | ||
| if enabled { | ||
| status = "true" | ||
| } | ||
| ReasoningDecisions.WithLabelValues(category, model, status, effort).Inc() | ||
| } | ||
|
|
||
| // RecordReasoningTemplateUsage records usage of a model-family-specific template parameter | ||
| func RecordReasoningTemplateUsage(family, param string) { | ||
| if family == "" { | ||
| family = "unknown" | ||
| } | ||
| if param == "" { | ||
| param = "none" | ||
| } | ||
| ReasoningTemplateUsage.WithLabelValues(family, param).Inc() | ||
| } | ||
|
|
||
| // RecordReasoningEffortUsage records the effort usage by model family | ||
| func RecordReasoningEffortUsage(family, effort string) { | ||
| if family == "" { | ||
| family = "unknown" | ||
| } | ||
| if effort == "" { | ||
| effort = "unspecified" | ||
| } | ||
| ReasoningEffortUsage.WithLabelValues(family, effort).Inc() | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you add a unit test for this? you can do it in a follow up PR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we should add an UT in this PR, let us make sure it does not break the strategy for modifying the body.