Skip to content

Commit e7a57ea

Browse files
authored
Expose reasoning effort option for reasoning models on OpenRouter (#2483)
* Specify reasoning effort for OpenRouter reasoning models * Add ReasoningEffort type * Fix ReasoningEffort props * Remove copypasta * Set reasoning effort for Grok 3 Mini * Use translations * Add translations * Remove this check
1 parent 51bcade commit e7a57ea

File tree

21 files changed

+172
-5
lines changed

21 files changed

+172
-5
lines changed

src/api/index.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,21 +88,25 @@ export function getModelParams({
8888
model,
8989
defaultMaxTokens,
9090
defaultTemperature = 0,
91+
defaultReasoningEffort,
9192
}: {
9293
options: ApiHandlerOptions
9394
model: ModelInfo
9495
defaultMaxTokens?: number
9596
defaultTemperature?: number
97+
defaultReasoningEffort?: "low" | "medium" | "high"
9698
}) {
9799
const {
98100
modelMaxTokens: customMaxTokens,
99101
modelMaxThinkingTokens: customMaxThinkingTokens,
100102
modelTemperature: customTemperature,
103+
reasoningEffort: customReasoningEffort,
101104
} = options
102105

103106
let maxTokens = model.maxTokens ?? defaultMaxTokens
104107
let thinking: BetaThinkingConfigParam | undefined = undefined
105108
let temperature = customTemperature ?? defaultTemperature
109+
const reasoningEffort = customReasoningEffort ?? defaultReasoningEffort
106110

107111
if (model.thinking) {
108112
// Only honor `customMaxTokens` for thinking models.
@@ -118,5 +122,5 @@ export function getModelParams({
118122
temperature = 1.0
119123
}
120124

121-
return { maxTokens, thinking, temperature }
125+
return { maxTokens, thinking, temperature, reasoningEffort }
122126
}

src/api/providers/openai.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
8282
const urlHost = this._getUrlHost(modelUrl)
8383
const deepseekReasoner = modelId.includes("deepseek-reasoner") || enabledR1Format
8484
const ark = modelUrl.includes(".volces.com")
85+
8586
if (modelId.startsWith("o3-mini")) {
8687
yield* this.handleO3FamilyMessage(modelId, systemPrompt, messages)
8788
return
@@ -94,6 +95,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
9495
}
9596

9697
let convertedMessages
98+
9799
if (deepseekReasoner) {
98100
convertedMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
99101
} else if (ark || enabledLegacyFormat) {
@@ -112,16 +114,20 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
112114
],
113115
}
114116
}
117+
115118
convertedMessages = [systemMessage, ...convertToOpenAiMessages(messages)]
119+
116120
if (modelInfo.supportsPromptCache) {
117121
// Note: the following logic is copied from openrouter:
118122
// Add cache_control to the last two user messages
119123
// (note: this works because we only ever add one user message at a time, but if we added multiple we'd need to mark the user message before the last assistant message)
120124
const lastTwoUserMessages = convertedMessages.filter((msg) => msg.role === "user").slice(-2)
125+
121126
lastTwoUserMessages.forEach((msg) => {
122127
if (typeof msg.content === "string") {
123128
msg.content = [{ type: "text", text: msg.content }]
124129
}
130+
125131
if (Array.isArray(msg.content)) {
126132
// NOTE: this is fine since env details will always be added at the end. but if it weren't there, and the user added a image_url type message, it would pop a text part before it and then move it after to the end.
127133
let lastTextPart = msg.content.filter((part) => part.type === "text").pop()
@@ -130,6 +136,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
130136
lastTextPart = { type: "text", text: "..." }
131137
msg.content.push(lastTextPart)
132138
}
139+
133140
// @ts-ignore-next-line
134141
lastTextPart["cache_control"] = { type: "ephemeral" }
135142
}
@@ -145,7 +152,9 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
145152
messages: convertedMessages,
146153
stream: true as const,
147154
...(isGrokXAI ? {} : { stream_options: { include_usage: true } }),
155+
reasoning_effort: this.getModel().info.reasoningEffort,
148156
}
157+
149158
if (this.options.includeMaxTokens) {
150159
requestOptions.max_tokens = modelInfo.maxTokens
151160
}
@@ -185,6 +194,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
185194
lastUsage = chunk.usage
186195
}
187196
}
197+
188198
for (const chunk of matcher.final()) {
189199
yield chunk
190200
}
@@ -217,6 +227,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
217227
type: "text",
218228
text: response.choices[0]?.message.content || "",
219229
}
230+
220231
yield this.processUsageMetrics(response.usage, modelInfo)
221232
}
222233
}
@@ -241,6 +252,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
241252
async completePrompt(prompt: string): Promise<string> {
242253
try {
243254
const isAzureAiInference = this._isAzureAiInference(this.options.openAiBaseUrl)
255+
244256
const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
245257
model: this.getModel().id,
246258
messages: [{ role: "user", content: prompt }],
@@ -250,11 +262,13 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
250262
requestOptions,
251263
isAzureAiInference ? { path: AZURE_AI_INFERENCE_PATH } : {},
252264
)
265+
253266
return response.choices[0]?.message.content || ""
254267
} catch (error) {
255268
if (error instanceof Error) {
256269
throw new Error(`OpenAI completion error: ${error.message}`)
257270
}
271+
258272
throw error
259273
}
260274
}
@@ -333,6 +347,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
333347
}
334348
}
335349
}
350+
336351
private _getUrlHost(baseUrl?: string): string {
337352
try {
338353
return new URL(baseUrl ?? "").host

src/api/providers/openrouter.ts

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
import { Anthropic } from "@anthropic-ai/sdk"
22
import { BetaThinkingConfigParam } from "@anthropic-ai/sdk/resources/beta"
3-
import axios, { AxiosRequestConfig } from "axios"
3+
import axios from "axios"
44
import OpenAI from "openai"
5-
import delay from "delay"
65

76
import { ApiHandlerOptions, ModelInfo, openRouterDefaultModelId, openRouterDefaultModelInfo } from "../../shared/api"
87
import { parseApiPrice } from "../../utils/cost"
@@ -22,6 +21,12 @@ type OpenRouterChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParams & {
2221
transforms?: string[]
2322
include_reasoning?: boolean
2423
thinking?: BetaThinkingConfigParam
24+
// https://openrouter.ai/docs/use-cases/reasoning-tokens
25+
reasoning?: {
26+
effort?: "high" | "medium" | "low"
27+
max_tokens?: number
28+
exclude?: boolean
29+
}
2530
}
2631

2732
export class OpenRouterHandler extends BaseProvider implements SingleCompletionHandler {
@@ -42,7 +47,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
4247
systemPrompt: string,
4348
messages: Anthropic.Messages.MessageParam[],
4449
): AsyncGenerator<ApiStreamChunk> {
45-
let { id: modelId, maxTokens, thinking, temperature, topP } = this.getModel()
50+
let { id: modelId, maxTokens, thinking, temperature, topP, reasoningEffort } = this.getModel()
4651

4752
// Convert Anthropic messages to OpenAI format.
4853
let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
@@ -70,13 +75,16 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
7075
},
7176
],
7277
}
78+
7379
// Add cache_control to the last two user messages
7480
// (note: this works because we only ever add one user message at a time, but if we added multiple we'd need to mark the user message before the last assistant message)
7581
const lastTwoUserMessages = openAiMessages.filter((msg) => msg.role === "user").slice(-2)
82+
7683
lastTwoUserMessages.forEach((msg) => {
7784
if (typeof msg.content === "string") {
7885
msg.content = [{ type: "text", text: msg.content }]
7986
}
87+
8088
if (Array.isArray(msg.content)) {
8189
// NOTE: this is fine since env details will always be added at the end. but if it weren't there, and the user added a image_url type message, it would pop a text part before it and then move it after to the end.
8290
let lastTextPart = msg.content.filter((part) => part.type === "text").pop()
@@ -113,6 +121,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
113121
}),
114122
// This way, the transforms field will only be included in the parameters when openRouterUseMiddleOutTransform is true.
115123
...((this.options.openRouterUseMiddleOutTransform ?? true) && { transforms: ["middle-out"] }),
124+
...(reasoningEffort && { reasoning: { effort: reasoningEffort } }),
116125
}
117126

118127
const stream = await this.client.chat.completions.create(completionParams)

webview-ui/src/components/settings/ApiOptions.tsx

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ import {
4646
OPENROUTER_DEFAULT_PROVIDER_NAME,
4747
} from "@/components/ui/hooks/useOpenRouterModelProviders"
4848
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue, SelectSeparator, Button } from "@/components/ui"
49-
import { MODELS_BY_PROVIDER, PROVIDERS, VERTEX_REGIONS } from "./constants"
49+
import { MODELS_BY_PROVIDER, PROVIDERS, VERTEX_REGIONS, REASONING_MODELS } from "./constants"
5050
import { AWS_REGIONS } from "../../../../src/shared/aws_regions"
5151
import { VSCodeButtonLink } from "../common/VSCodeButtonLink"
5252
import { ModelInfoView } from "./ModelInfoView"
@@ -59,6 +59,7 @@ import { ThinkingBudget } from "./ThinkingBudget"
5959
import { R1FormatSetting } from "./R1FormatSetting"
6060
import { OpenRouterBalanceDisplay } from "./OpenRouterBalanceDisplay"
6161
import { RequestyBalanceDisplay } from "./RequestyBalanceDisplay"
62+
import { ReasoningEffort } from "./ReasoningEffort"
6263

6364
interface ApiOptionsProps {
6465
uriScheme: string | undefined
@@ -1538,6 +1539,13 @@ const ApiOptions = ({
15381539
</div>
15391540
)}
15401541

1542+
{selectedProvider === "openrouter" && REASONING_MODELS.has(selectedModelId) && (
1543+
<ReasoningEffort
1544+
apiConfiguration={apiConfiguration}
1545+
setApiConfigurationField={setApiConfigurationField}
1546+
/>
1547+
)}
1548+
15411549
{selectedProvider === "glama" && (
15421550
<ModelPicker
15431551
apiConfiguration={apiConfiguration}
@@ -1665,12 +1673,14 @@ const ApiOptions = ({
16651673
})()}
16661674
</>
16671675
)}
1676+
16681677
<ModelInfoView
16691678
selectedModelId={selectedModelId}
16701679
modelInfo={selectedModelInfo}
16711680
isDescriptionExpanded={isDescriptionExpanded}
16721681
setIsDescriptionExpanded={setIsDescriptionExpanded}
16731682
/>
1683+
16741684
<ThinkingBudget
16751685
key={`${selectedProvider}-${selectedModelId}`}
16761686
apiConfiguration={apiConfiguration}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import { useAppTranslation } from "@/i18n/TranslationContext"
2+
3+
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui"
4+
5+
import { ApiConfiguration } from "../../../../src/shared/api"
6+
import { reasoningEfforts, ReasoningEffort as ReasoningEffortType } from "../../../../src/schemas"
7+
8+
interface ReasoningEffortProps {
9+
apiConfiguration: ApiConfiguration
10+
setApiConfigurationField: <K extends keyof ApiConfiguration>(field: K, value: ApiConfiguration[K]) => void
11+
}
12+
13+
export const ReasoningEffort = ({ apiConfiguration, setApiConfigurationField }: ReasoningEffortProps) => {
14+
const { t } = useAppTranslation()
15+
16+
return (
17+
<div className="flex flex-col gap-1">
18+
<div className="flex justify-between items-center">
19+
<label className="block font-medium mb-1">{t("settings:providers.reasoningEffort.label")}</label>
20+
</div>
21+
<Select
22+
value={apiConfiguration.reasoningEffort}
23+
onValueChange={(value) => setApiConfigurationField("reasoningEffort", value as ReasoningEffortType)}>
24+
<SelectTrigger className="w-full">
25+
<SelectValue placeholder={t("settings:common.select")} />
26+
</SelectTrigger>
27+
<SelectContent>
28+
{reasoningEfforts.map((value) => (
29+
<SelectItem key={value} value={value}>
30+
{t(`settings:providers.reasoningEffort.${value}`)}
31+
</SelectItem>
32+
))}
33+
</SelectContent>
34+
</Select>
35+
</div>
36+
)
37+
}

webview-ui/src/components/settings/constants.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,5 @@ export const VERTEX_REGIONS = [
4646
{ value: "europe-west4", label: "europe-west4" },
4747
{ value: "asia-southeast1", label: "asia-southeast1" },
4848
]
49+
50+
export const REASONING_MODELS = new Set(["x-ai/grok-3-mini-beta"])

webview-ui/src/i18n/locales/ca/settings.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,12 @@
225225
"rateLimitSeconds": {
226226
"label": "Límit de freqüència",
227227
"description": "Temps mínim entre sol·licituds d'API."
228+
},
229+
"reasoningEffort": {
230+
"label": "Esforç de raonament del model",
231+
"high": "Alt",
232+
"medium": "Mitjà",
233+
"low": "Baix"
228234
}
229235
},
230236
"browser": {

webview-ui/src/i18n/locales/de/settings.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,12 @@
225225
"rateLimitSeconds": {
226226
"label": "Ratenbegrenzung",
227227
"description": "Minimale Zeit zwischen API-Anfragen."
228+
},
229+
"reasoningEffort": {
230+
"label": "Modell-Denkaufwand",
231+
"high": "Hoch",
232+
"medium": "Mittel",
233+
"low": "Niedrig"
228234
}
229235
},
230236
"browser": {

webview-ui/src/i18n/locales/en/settings.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,12 @@
225225
"rateLimitSeconds": {
226226
"label": "Rate limit",
227227
"description": "Minimum time between API requests."
228+
},
229+
"reasoningEffort": {
230+
"label": "Model Reasoning Effort",
231+
"high": "High",
232+
"medium": "Medium",
233+
"low": "Low"
228234
}
229235
},
230236
"browser": {

webview-ui/src/i18n/locales/es/settings.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,12 @@
225225
"rateLimitSeconds": {
226226
"label": "Límite de tasa",
227227
"description": "Tiempo mínimo entre solicitudes de API."
228+
},
229+
"reasoningEffort": {
230+
"label": "Esfuerzo de razonamiento del modelo",
231+
"high": "Alto",
232+
"medium": "Medio",
233+
"low": "Bajo"
228234
}
229235
},
230236
"browser": {

0 commit comments

Comments
 (0)