Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/api/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,21 +88,25 @@ export function getModelParams({
model,
defaultMaxTokens,
defaultTemperature = 0,
defaultReasoningEffort,
}: {
options: ApiHandlerOptions
model: ModelInfo
defaultMaxTokens?: number
defaultTemperature?: number
defaultReasoningEffort?: "low" | "medium" | "high"
}) {
const {
modelMaxTokens: customMaxTokens,
modelMaxThinkingTokens: customMaxThinkingTokens,
modelTemperature: customTemperature,
reasoningEffort: customReasoningEffort,
} = options

let maxTokens = model.maxTokens ?? defaultMaxTokens
let thinking: BetaThinkingConfigParam | undefined = undefined
let temperature = customTemperature ?? defaultTemperature
const reasoningEffort = customReasoningEffort ?? defaultReasoningEffort

if (model.thinking) {
// Only honor `customMaxTokens` for thinking models.
Expand All @@ -118,5 +122,5 @@ export function getModelParams({
temperature = 1.0
}

return { maxTokens, thinking, temperature }
return { maxTokens, thinking, temperature, reasoningEffort }
}
15 changes: 15 additions & 0 deletions src/api/providers/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
const urlHost = this._getUrlHost(modelUrl)
const deepseekReasoner = modelId.includes("deepseek-reasoner") || enabledR1Format
const ark = modelUrl.includes(".volces.com")

if (modelId.startsWith("o3-mini")) {
yield* this.handleO3FamilyMessage(modelId, systemPrompt, messages)
return
Expand All @@ -94,6 +95,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
}

let convertedMessages

if (deepseekReasoner) {
convertedMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
} else if (ark || enabledLegacyFormat) {
Expand All @@ -112,16 +114,20 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
],
}
}

convertedMessages = [systemMessage, ...convertToOpenAiMessages(messages)]

if (modelInfo.supportsPromptCache) {
// Note: the following logic is copied from openrouter:
// Add cache_control to the last two user messages
// (note: this works because we only ever add one user message at a time, but if we added multiple we'd need to mark the user message before the last assistant message)
const lastTwoUserMessages = convertedMessages.filter((msg) => msg.role === "user").slice(-2)

lastTwoUserMessages.forEach((msg) => {
if (typeof msg.content === "string") {
msg.content = [{ type: "text", text: msg.content }]
}

if (Array.isArray(msg.content)) {
// NOTE: this is fine since env details will always be added at the end. but if it weren't there, and the user added a image_url type message, it would pop a text part before it and then move it after to the end.
let lastTextPart = msg.content.filter((part) => part.type === "text").pop()
Expand All @@ -130,6 +136,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
lastTextPart = { type: "text", text: "..." }
msg.content.push(lastTextPart)
}

// @ts-ignore-next-line
lastTextPart["cache_control"] = { type: "ephemeral" }
}
Expand All @@ -145,7 +152,9 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
messages: convertedMessages,
stream: true as const,
...(isGrokXAI ? {} : { stream_options: { include_usage: true } }),
reasoning_effort: this.getModel().info.reasoningEffort,
}

if (this.options.includeMaxTokens) {
requestOptions.max_tokens = modelInfo.maxTokens
}
Expand Down Expand Up @@ -185,6 +194,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
lastUsage = chunk.usage
}
}

for (const chunk of matcher.final()) {
yield chunk
}
Expand Down Expand Up @@ -217,6 +227,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
type: "text",
text: response.choices[0]?.message.content || "",
}

yield this.processUsageMetrics(response.usage, modelInfo)
}
}
Expand All @@ -241,6 +252,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
async completePrompt(prompt: string): Promise<string> {
try {
const isAzureAiInference = this._isAzureAiInference(this.options.openAiBaseUrl)

const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
model: this.getModel().id,
messages: [{ role: "user", content: prompt }],
Expand All @@ -250,11 +262,13 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
requestOptions,
isAzureAiInference ? { path: AZURE_AI_INFERENCE_PATH } : {},
)

return response.choices[0]?.message.content || ""
} catch (error) {
if (error instanceof Error) {
throw new Error(`OpenAI completion error: ${error.message}`)
}

throw error
}
}
Expand Down Expand Up @@ -333,6 +347,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
}
}
}

private _getUrlHost(baseUrl?: string): string {
try {
return new URL(baseUrl ?? "").host
Expand Down
15 changes: 12 additions & 3 deletions src/api/providers/openrouter.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import { Anthropic } from "@anthropic-ai/sdk"
import { BetaThinkingConfigParam } from "@anthropic-ai/sdk/resources/beta"
import axios, { AxiosRequestConfig } from "axios"
import axios from "axios"
import OpenAI from "openai"
import delay from "delay"

import { ApiHandlerOptions, ModelInfo, openRouterDefaultModelId, openRouterDefaultModelInfo } from "../../shared/api"
import { parseApiPrice } from "../../utils/cost"
Expand All @@ -22,6 +21,12 @@ type OpenRouterChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParams & {
transforms?: string[]
include_reasoning?: boolean
thinking?: BetaThinkingConfigParam
// https://openrouter.ai/docs/use-cases/reasoning-tokens
reasoning?: {
effort?: "high" | "medium" | "low"
max_tokens?: number
exclude?: boolean
}
}

export class OpenRouterHandler extends BaseProvider implements SingleCompletionHandler {
Expand All @@ -42,7 +47,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
systemPrompt: string,
messages: Anthropic.Messages.MessageParam[],
): AsyncGenerator<ApiStreamChunk> {
let { id: modelId, maxTokens, thinking, temperature, topP } = this.getModel()
let { id: modelId, maxTokens, thinking, temperature, topP, reasoningEffort } = this.getModel()

// Convert Anthropic messages to OpenAI format.
let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
Expand Down Expand Up @@ -70,13 +75,16 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
},
],
}

// Add cache_control to the last two user messages
// (note: this works because we only ever add one user message at a time, but if we added multiple we'd need to mark the user message before the last assistant message)
const lastTwoUserMessages = openAiMessages.filter((msg) => msg.role === "user").slice(-2)

lastTwoUserMessages.forEach((msg) => {
if (typeof msg.content === "string") {
msg.content = [{ type: "text", text: msg.content }]
}

if (Array.isArray(msg.content)) {
// NOTE: this is fine since env details will always be added at the end. but if it weren't there, and the user added a image_url type message, it would pop a text part before it and then move it after to the end.
let lastTextPart = msg.content.filter((part) => part.type === "text").pop()
Expand Down Expand Up @@ -113,6 +121,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
}),
// This way, the transforms field will only be included in the parameters when openRouterUseMiddleOutTransform is true.
...((this.options.openRouterUseMiddleOutTransform ?? true) && { transforms: ["middle-out"] }),
...(reasoningEffort && { reasoning: { effort: reasoningEffort } }),
}

const stream = await this.client.chat.completions.create(completionParams)
Expand Down
12 changes: 11 additions & 1 deletion webview-ui/src/components/settings/ApiOptions.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ import {
OPENROUTER_DEFAULT_PROVIDER_NAME,
} from "@/components/ui/hooks/useOpenRouterModelProviders"
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue, SelectSeparator, Button } from "@/components/ui"
import { MODELS_BY_PROVIDER, PROVIDERS, VERTEX_REGIONS } from "./constants"
import { MODELS_BY_PROVIDER, PROVIDERS, VERTEX_REGIONS, REASONING_MODELS } from "./constants"
import { AWS_REGIONS } from "../../../../src/shared/aws_regions"
import { VSCodeButtonLink } from "../common/VSCodeButtonLink"
import { ModelInfoView } from "./ModelInfoView"
Expand All @@ -58,6 +58,7 @@ import { ThinkingBudget } from "./ThinkingBudget"
import { R1FormatSetting } from "./R1FormatSetting"
import { OpenRouterBalanceDisplay } from "./OpenRouterBalanceDisplay"
import { RequestyBalanceDisplay } from "./RequestyBalanceDisplay"
import { ReasoningEffort } from "./ReasoningEffort"

interface ApiOptionsProps {
uriScheme: string | undefined
Expand Down Expand Up @@ -1537,6 +1538,13 @@ const ApiOptions = ({
</div>
)}

{selectedProvider === "openrouter" && REASONING_MODELS.has(selectedModelId) && (
<ReasoningEffort
apiConfiguration={apiConfiguration}
setApiConfigurationField={setApiConfigurationField}
/>
)}

{selectedProvider === "glama" && (
<ModelPicker
apiConfiguration={apiConfiguration}
Expand Down Expand Up @@ -1664,12 +1672,14 @@ const ApiOptions = ({
})()}
</>
)}

<ModelInfoView
selectedModelId={selectedModelId}
modelInfo={selectedModelInfo}
isDescriptionExpanded={isDescriptionExpanded}
setIsDescriptionExpanded={setIsDescriptionExpanded}
/>

<ThinkingBudget
key={`${selectedProvider}-${selectedModelId}`}
apiConfiguration={apiConfiguration}
Expand Down
37 changes: 37 additions & 0 deletions webview-ui/src/components/settings/ReasoningEffort.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import { useAppTranslation } from "@/i18n/TranslationContext"

import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui"

import { ApiConfiguration } from "../../../../src/shared/api"
import { reasoningEfforts, ReasoningEffort as ReasoningEffortType } from "../../../../src/schemas"

interface ReasoningEffortProps {
apiConfiguration: ApiConfiguration
setApiConfigurationField: <K extends keyof ApiConfiguration>(field: K, value: ApiConfiguration[K]) => void
}

export const ReasoningEffort = ({ apiConfiguration, setApiConfigurationField }: ReasoningEffortProps) => {
const { t } = useAppTranslation()

return (
<div className="flex flex-col gap-1">
<div className="flex justify-between items-center">
<label className="block font-medium mb-1">{t("settings:providers.reasoningEffort.label")}</label>
</div>
<Select
value={apiConfiguration.reasoningEffort}
onValueChange={(value) => setApiConfigurationField("reasoningEffort", value as ReasoningEffortType)}>
<SelectTrigger className="w-full">
<SelectValue placeholder={t("settings:common.select")} />
</SelectTrigger>
<SelectContent>
{reasoningEfforts.map((value) => (
<SelectItem key={value} value={value}>
{t(`settings:providers.reasoningEffort.${value}`)}
</SelectItem>
))}
</SelectContent>
</Select>
</div>
)
}
2 changes: 2 additions & 0 deletions webview-ui/src/components/settings/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,5 @@ export const VERTEX_REGIONS = [
{ value: "europe-west4", label: "europe-west4" },
{ value: "asia-southeast1", label: "asia-southeast1" },
]

export const REASONING_MODELS = new Set(["x-ai/grok-3-mini-beta"])
6 changes: 6 additions & 0 deletions webview-ui/src/i18n/locales/ca/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,12 @@
"rateLimitSeconds": {
"label": "Límit de freqüència",
"description": "Temps mínim entre sol·licituds d'API."
},
"reasoningEffort": {
"label": "Esforç de raonament del model",
"high": "Alt",
"medium": "Mitjà",
"low": "Baix"
}
},
"browser": {
Expand Down
6 changes: 6 additions & 0 deletions webview-ui/src/i18n/locales/de/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,12 @@
"rateLimitSeconds": {
"label": "Ratenbegrenzung",
"description": "Minimale Zeit zwischen API-Anfragen."
},
"reasoningEffort": {
"label": "Modell-Denkaufwand",
"high": "Hoch",
"medium": "Mittel",
"low": "Niedrig"
}
},
"browser": {
Expand Down
6 changes: 6 additions & 0 deletions webview-ui/src/i18n/locales/en/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,12 @@
"rateLimitSeconds": {
"label": "Rate limit",
"description": "Minimum time between API requests."
},
"reasoningEffort": {
"label": "Model Reasoning Effort",
"high": "High",
"medium": "Medium",
"low": "Low"
}
},
"browser": {
Expand Down
6 changes: 6 additions & 0 deletions webview-ui/src/i18n/locales/es/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,12 @@
"rateLimitSeconds": {
"label": "Límite de tasa",
"description": "Tiempo mínimo entre solicitudes de API."
},
"reasoningEffort": {
"label": "Esfuerzo de razonamiento del modelo",
"high": "Alto",
"medium": "Medio",
"low": "Bajo"
}
},
"browser": {
Expand Down
6 changes: 6 additions & 0 deletions webview-ui/src/i18n/locales/fr/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,12 @@
"rateLimitSeconds": {
"label": "Limite de débit",
"description": "Temps minimum entre les requêtes API."
},
"reasoningEffort": {
"label": "Effort de raisonnement du modèle",
"high": "Élevé",
"medium": "Moyen",
"low": "Faible"
}
},
"browser": {
Expand Down
6 changes: 6 additions & 0 deletions webview-ui/src/i18n/locales/hi/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,12 @@
"rateLimitSeconds": {
"label": "दर सीमा",
"description": "API अनुरोधों के बीच न्यूनतम समय।"
},
"reasoningEffort": {
"label": "मॉडल तर्क प्रयास",
"high": "उच्च",
"medium": "मध्यम",
"low": "निम्न"
}
},
"browser": {
Expand Down
6 changes: 6 additions & 0 deletions webview-ui/src/i18n/locales/it/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,12 @@
"rateLimitSeconds": {
"label": "Limite di frequenza",
"description": "Tempo minimo tra le richieste API."
},
"reasoningEffort": {
"label": "Sforzo di ragionamento del modello",
"high": "Alto",
"medium": "Medio",
"low": "Basso"
}
},
"browser": {
Expand Down
6 changes: 6 additions & 0 deletions webview-ui/src/i18n/locales/ja/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,12 @@
"rateLimitSeconds": {
"label": "レート制限",
"description": "APIリクエスト間の最小時間。"
},
"reasoningEffort": {
"label": "モデル推論の労力",
"high": "高",
"medium": "中",
"low": "低"
}
},
"browser": {
Expand Down
6 changes: 6 additions & 0 deletions webview-ui/src/i18n/locales/ko/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,12 @@
"rateLimitSeconds": {
"label": "속도 제한",
"description": "API 요청 간 최소 시간."
},
"reasoningEffort": {
"label": "모델 추론 노력",
"high": "높음",
"medium": "중간",
"low": "낮음"
}
},
"browser": {
Expand Down
Loading