From dacbd6cb78ce2097e5080dbc26502bbb4419286a Mon Sep 17 00:00:00 2001 From: cte Date: Thu, 10 Apr 2025 20:10:30 -0700 Subject: [PATCH 1/8] Specify reasoning effort for OpenRouter reasoning models --- src/api/index.ts | 6 ++- src/api/providers/openrouter.ts | 15 +++++-- src/exports/roo-code.d.ts | 3 +- src/exports/types.ts | 3 +- src/schemas/index.ts | 6 ++- .../src/components/settings/ApiOptions.tsx | 9 +++- .../components/settings/ReasoningEffort.tsx | 45 +++++++++++++++++++ .../src/components/settings/constants.ts | 2 + 8 files changed, 80 insertions(+), 9 deletions(-) create mode 100644 webview-ui/src/components/settings/ReasoningEffort.tsx diff --git a/src/api/index.ts b/src/api/index.ts index 0880f42218..c6d2b07cd2 100644 --- a/src/api/index.ts +++ b/src/api/index.ts @@ -88,21 +88,25 @@ export function getModelParams({ model, defaultMaxTokens, defaultTemperature = 0, + defaultReasoningEffort, }: { options: ApiHandlerOptions model: ModelInfo defaultMaxTokens?: number defaultTemperature?: number + defaultReasoningEffort?: "low" | "medium" | "high" }) { const { modelMaxTokens: customMaxTokens, modelMaxThinkingTokens: customMaxThinkingTokens, modelTemperature: customTemperature, + reasoningEffort: customReasoningEffort, } = options let maxTokens = model.maxTokens ?? defaultMaxTokens let thinking: BetaThinkingConfigParam | undefined = undefined let temperature = customTemperature ?? defaultTemperature + const reasoningEffort = customReasoningEffort ?? defaultReasoningEffort if (model.thinking) { // Only honor `customMaxTokens` for thinking models. @@ -118,5 +122,5 @@ export function getModelParams({ temperature = 1.0 } - return { maxTokens, thinking, temperature } + return { maxTokens, thinking, temperature, reasoningEffort } } diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts index 72e4fe576a..2a279d09a1 100644 --- a/src/api/providers/openrouter.ts +++ b/src/api/providers/openrouter.ts @@ -1,8 +1,7 @@ import { Anthropic } from "@anthropic-ai/sdk" import { BetaThinkingConfigParam } from "@anthropic-ai/sdk/resources/beta" -import axios, { AxiosRequestConfig } from "axios" +import axios from "axios" import OpenAI from "openai" -import delay from "delay" import { ApiHandlerOptions, ModelInfo, openRouterDefaultModelId, openRouterDefaultModelInfo } from "../../shared/api" import { parseApiPrice } from "../../utils/cost" @@ -22,6 +21,12 @@ type OpenRouterChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParams & { transforms?: string[] include_reasoning?: boolean thinking?: BetaThinkingConfigParam + // https://openrouter.ai/docs/use-cases/reasoning-tokens + reasoning?: { + effort?: "high" | "medium" | "low" + max_tokens?: number + exclude?: boolean + } } export class OpenRouterHandler extends BaseProvider implements SingleCompletionHandler { @@ -42,7 +47,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH systemPrompt: string, messages: Anthropic.Messages.MessageParam[], ): AsyncGenerator { - let { id: modelId, maxTokens, thinking, temperature, topP } = this.getModel() + let { id: modelId, maxTokens, thinking, temperature, topP, reasoningEffort } = this.getModel() // Convert Anthropic messages to OpenAI format. let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [ @@ -70,13 +75,16 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH }, ], } + // Add cache_control to the last two user messages // (note: this works because we only ever add one user message at a time, but if we added multiple we'd need to mark the user message before the last assistant message) const lastTwoUserMessages = openAiMessages.filter((msg) => msg.role === "user").slice(-2) + lastTwoUserMessages.forEach((msg) => { if (typeof msg.content === "string") { msg.content = [{ type: "text", text: msg.content }] } + if (Array.isArray(msg.content)) { // NOTE: this is fine since env details will always be added at the end. but if it weren't there, and the user added a image_url type message, it would pop a text part before it and then move it after to the end. let lastTextPart = msg.content.filter((part) => part.type === "text").pop() @@ -113,6 +121,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH }), // This way, the transforms field will only be included in the parameters when openRouterUseMiddleOutTransform is true. ...((this.options.openRouterUseMiddleOutTransform ?? true) && { transforms: ["middle-out"] }), + ...(reasoningEffort && { reasoning: { effort: reasoningEffort } }), } const stream = await this.client.chat.completions.create(completionParams) diff --git a/src/exports/roo-code.d.ts b/src/exports/roo-code.d.ts index 40939e4e32..95117083d8 100644 --- a/src/exports/roo-code.d.ts +++ b/src/exports/roo-code.d.ts @@ -175,10 +175,11 @@ type ProviderSettings = { cachableFields?: string[] | undefined } | null) | undefined - modelTemperature?: (number | null) | undefined modelMaxTokens?: number | undefined modelMaxThinkingTokens?: number | undefined includeMaxTokens?: boolean | undefined + modelTemperature?: (number | null) | undefined + reasoningEffort?: ("low" | "medium" | "high") | undefined rateLimitSeconds?: number | undefined fakeAi?: unknown | undefined } diff --git a/src/exports/types.ts b/src/exports/types.ts index 64a955554e..413172141c 100644 --- a/src/exports/types.ts +++ b/src/exports/types.ts @@ -176,10 +176,11 @@ type ProviderSettings = { cachableFields?: string[] | undefined } | null) | undefined - modelTemperature?: (number | null) | undefined modelMaxTokens?: number | undefined modelMaxThinkingTokens?: number | undefined includeMaxTokens?: boolean | undefined + modelTemperature?: (number | null) | undefined + reasoningEffort?: ("low" | "medium" | "high") | undefined rateLimitSeconds?: number | undefined fakeAi?: unknown | undefined } diff --git a/src/schemas/index.ts b/src/schemas/index.ts index d2471882ec..47174c90b7 100644 --- a/src/schemas/index.ts +++ b/src/schemas/index.ts @@ -383,11 +383,12 @@ export const providerSettingsSchema = z.object({ requestyModelId: z.string().optional(), requestyModelInfo: modelInfoSchema.nullish(), // Claude 3.7 Sonnet Thinking - modelTemperature: z.number().nullish(), modelMaxTokens: z.number().optional(), modelMaxThinkingTokens: z.number().optional(), // Generic includeMaxTokens: z.boolean().optional(), + modelTemperature: z.number().nullish(), + reasoningEffort: z.enum(["low", "medium", "high"]).optional(), rateLimitSeconds: z.number().optional(), // Fake AI fakeAi: z.unknown().optional(), @@ -470,11 +471,12 @@ const providerSettingsRecord: ProviderSettingsRecord = { requestyModelId: undefined, requestyModelInfo: undefined, // Claude 3.7 Sonnet Thinking - modelTemperature: undefined, modelMaxTokens: undefined, modelMaxThinkingTokens: undefined, // Generic includeMaxTokens: undefined, + modelTemperature: undefined, + reasoningEffort: undefined, rateLimitSeconds: undefined, // Fake AI fakeAi: undefined, diff --git a/webview-ui/src/components/settings/ApiOptions.tsx b/webview-ui/src/components/settings/ApiOptions.tsx index 55690d4806..1d76148c10 100644 --- a/webview-ui/src/components/settings/ApiOptions.tsx +++ b/webview-ui/src/components/settings/ApiOptions.tsx @@ -46,7 +46,7 @@ import { OPENROUTER_DEFAULT_PROVIDER_NAME, } from "@/components/ui/hooks/useOpenRouterModelProviders" import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue, SelectSeparator, Button } from "@/components/ui" -import { MODELS_BY_PROVIDER, PROVIDERS, VERTEX_REGIONS } from "./constants" +import { MODELS_BY_PROVIDER, PROVIDERS, VERTEX_REGIONS, REASONING_MODELS } from "./constants" import { AWS_REGIONS } from "../../../../src/shared/aws_regions" import { VSCodeButtonLink } from "../common/VSCodeButtonLink" import { ModelInfoView } from "./ModelInfoView" @@ -58,6 +58,7 @@ import { ThinkingBudget } from "./ThinkingBudget" import { R1FormatSetting } from "./R1FormatSetting" import { OpenRouterBalanceDisplay } from "./OpenRouterBalanceDisplay" import { RequestyBalanceDisplay } from "./RequestyBalanceDisplay" +import { ReasoningEffort } from "./ReasoningEffort" interface ApiOptionsProps { uriScheme: string | undefined @@ -1519,6 +1520,10 @@ const ApiOptions = ({ )} + {selectedProvider === "openrouter" && REASONING_MODELS.has(selectedModelId) && ( + + )} + {selectedProvider === "glama" && ( )} + + (field: K, value: ApiConfiguration[K]) => void + modelInfo: ModelInfo +} + +export const ReasoningEffort = ({ setApiConfigurationField, modelInfo }: ReasoningEffortProps) => { + const { t } = useAppTranslation() + + return ( +
+
+ +
+ +
+ ) +} diff --git a/webview-ui/src/components/settings/constants.ts b/webview-ui/src/components/settings/constants.ts index 01f24a2ed5..772fe04b5f 100644 --- a/webview-ui/src/components/settings/constants.ts +++ b/webview-ui/src/components/settings/constants.ts @@ -46,3 +46,5 @@ export const VERTEX_REGIONS = [ { value: "europe-west4", label: "europe-west4" }, { value: "asia-southeast1", label: "asia-southeast1" }, ] + +export const REASONING_MODELS = new Set(["x-ai/grok-3-mini-beta"]) From e588b3f059ba460ffe3766d3b484afea2047c0a8 Mon Sep 17 00:00:00 2001 From: cte Date: Thu, 10 Apr 2025 20:13:49 -0700 Subject: [PATCH 2/8] Add ReasoningEffort type --- src/schemas/index.ts | 14 ++++++++++++-- .../src/components/settings/ReasoningEffort.tsx | 10 +++------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/schemas/index.ts b/src/schemas/index.ts index 47174c90b7..013f09ac07 100644 --- a/src/schemas/index.ts +++ b/src/schemas/index.ts @@ -95,6 +95,16 @@ export const telemetrySettingsSchema = z.enum(telemetrySettings) export type TelemetrySetting = z.infer +/** + * ReasoningEffort + */ + +export const reasoningEfforts = ["low", "medium", "high"] as const + +export const reasoningEffortsSchema = z.enum(reasoningEfforts) + +export type ReasoningEffort = z.infer + /** * ModelInfo */ @@ -110,7 +120,7 @@ export const modelInfoSchema = z.object({ cacheWritesPrice: z.number().optional(), cacheReadsPrice: z.number().optional(), description: z.string().optional(), - reasoningEffort: z.enum(["low", "medium", "high"]).optional(), + reasoningEffort: reasoningEffortsSchema.optional(), thinking: z.boolean().optional(), minTokensPerCachePoint: z.number().optional(), maxCachePoints: z.number().optional(), @@ -388,7 +398,7 @@ export const providerSettingsSchema = z.object({ // Generic includeMaxTokens: z.boolean().optional(), modelTemperature: z.number().nullish(), - reasoningEffort: z.enum(["low", "medium", "high"]).optional(), + reasoningEffort: reasoningEffortsSchema.optional(), rateLimitSeconds: z.number().optional(), // Fake AI fakeAi: z.unknown().optional(), diff --git a/webview-ui/src/components/settings/ReasoningEffort.tsx b/webview-ui/src/components/settings/ReasoningEffort.tsx index efe8c427b6..5b5b877c3a 100644 --- a/webview-ui/src/components/settings/ReasoningEffort.tsx +++ b/webview-ui/src/components/settings/ReasoningEffort.tsx @@ -3,8 +3,7 @@ import { useAppTranslation } from "@/i18n/TranslationContext" import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue, SelectSeparator } from "@/components/ui" import { ApiConfiguration, ModelInfo } from "../../../../src/shared/api" - -export const EFFORTS = ["high", "medium", "low"] as const +import { reasoningEfforts } from "../../../../src/schemas" interface ReasoningEffortProps { setApiConfigurationField: (field: K, value: ApiConfiguration[K]) => void @@ -22,10 +21,7 @@ export const ReasoningEffort = ({ setApiConfigurationField, modelInfo }: Reasoni setApiConfigurationField("reasoningEffort", value as "high" | "medium" | "low") }> From 6a69d48119414a4d8814065fd91511065df01605 Mon Sep 17 00:00:00 2001 From: cte Date: Thu, 10 Apr 2025 20:20:59 -0700 Subject: [PATCH 4/8] Remove copypasta --- webview-ui/src/components/settings/ReasoningEffort.tsx | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/webview-ui/src/components/settings/ReasoningEffort.tsx b/webview-ui/src/components/settings/ReasoningEffort.tsx index 5ffb6682be..00ec57cc89 100644 --- a/webview-ui/src/components/settings/ReasoningEffort.tsx +++ b/webview-ui/src/components/settings/ReasoningEffort.tsx @@ -1,6 +1,6 @@ import { useAppTranslation } from "@/i18n/TranslationContext" -import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue, SelectSeparator } from "@/components/ui" +import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui" import { ApiConfiguration } from "../../../../src/shared/api" import { reasoningEfforts } from "../../../../src/schemas" @@ -27,8 +27,6 @@ export const ReasoningEffort = ({ apiConfiguration, setApiConfigurationField }: - OpenRouter - {reasoningEfforts.map((value) => ( {value} From c091ba11b486fcda8e1497978318939d04ce8ccf Mon Sep 17 00:00:00 2001 From: cte Date: Thu, 10 Apr 2025 21:58:58 -0700 Subject: [PATCH 5/8] Set reasoning effort for Grok 3 Mini --- evals/packages/types/src/roo-code-defaults.ts | 13 ++++++++----- evals/packages/types/src/roo-code.ts | 13 +++++++++---- src/api/providers/openai.ts | 17 ++++++++++++++++- 3 files changed, 33 insertions(+), 10 deletions(-) diff --git a/evals/packages/types/src/roo-code-defaults.ts b/evals/packages/types/src/roo-code-defaults.ts index 8def51f085..f126f33ff0 100644 --- a/evals/packages/types/src/roo-code-defaults.ts +++ b/evals/packages/types/src/roo-code-defaults.ts @@ -20,18 +20,21 @@ export const rooCodeDefaults: RooCodeSettings = { // thinking: false, // }, + modelTemperature: null, + // reasoningEffort: "high", + pinnedApiConfigs: {}, - lastShownAnnouncementId: "mar-20-2025-3-10", + lastShownAnnouncementId: "apr-04-2025-boomerang", autoApprovalEnabled: true, alwaysAllowReadOnly: true, alwaysAllowReadOnlyOutsideWorkspace: false, alwaysAllowWrite: true, alwaysAllowWriteOutsideWorkspace: false, - writeDelayMs: 200, + writeDelayMs: 1000, alwaysAllowBrowser: true, alwaysApproveResubmit: true, - requestDelaySeconds: 5, + requestDelaySeconds: 10, alwaysAllowMcp: true, alwaysAllowModeSwitch: true, alwaysAllowSubtasks: true, @@ -40,8 +43,8 @@ export const rooCodeDefaults: RooCodeSettings = { browserToolEnabled: false, browserViewportSize: "900x600", - screenshotQuality: 38, - remoteBrowserEnabled: true, + screenshotQuality: 75, + remoteBrowserEnabled: false, enableCheckpoints: false, checkpointStorage: "task", diff --git a/evals/packages/types/src/roo-code.ts b/evals/packages/types/src/roo-code.ts index 22bff70d16..32e7fd9750 100644 --- a/evals/packages/types/src/roo-code.ts +++ b/evals/packages/types/src/roo-code.ts @@ -96,7 +96,7 @@ export type TelemetrySetting = z.infer */ export const modelInfoSchema = z.object({ - maxTokens: z.number().optional(), + maxTokens: z.number().nullish(), contextWindow: z.number(), supportsImages: z.boolean().optional(), supportsComputerUse: z.boolean().optional(), @@ -373,11 +373,14 @@ export const providerSettingsSchema = z.object({ requestyApiKey: z.string().optional(), requestyModelId: z.string().optional(), requestyModelInfo: modelInfoSchema.optional(), - // Generic + // Claude 3.7 Sonnet Thinking modelMaxTokens: z.number().optional(), // Currently only used by Anthropic hybrid thinking models. modelMaxThinkingTokens: z.number().optional(), // Currently only used by Anthropic hybrid thinking models. - modelTemperature: z.number().nullish(), + // Generic includeMaxTokens: z.boolean().optional(), + modelTemperature: z.number().nullish(), + reasoningEffort: z.enum(["low", "medium", "high"]).optional(), + rateLimitSeconds: z.number().optional(), // Fake AI fakeAi: z.unknown().optional(), }) @@ -457,11 +460,13 @@ const providerSettingsRecord: ProviderSettingsRecord = { requestyModelId: undefined, requestyModelInfo: undefined, // Claude 3.7 Sonnet Thinking - modelTemperature: undefined, modelMaxTokens: undefined, modelMaxThinkingTokens: undefined, // Generic includeMaxTokens: undefined, + modelTemperature: undefined, + reasoningEffort: undefined, + rateLimitSeconds: undefined, // Fake AI fakeAi: undefined, } diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index fc739b3110..1e8e7ef30b 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -82,6 +82,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl const urlHost = this._getUrlHost(modelUrl) const deepseekReasoner = modelId.includes("deepseek-reasoner") || enabledR1Format const ark = modelUrl.includes(".volces.com") + if (modelId.startsWith("o3-mini")) { yield* this.handleO3FamilyMessage(modelId, systemPrompt, messages) return @@ -94,6 +95,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl } let convertedMessages + if (deepseekReasoner) { convertedMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages]) } else if (ark || enabledLegacyFormat) { @@ -112,16 +114,20 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl ], } } + convertedMessages = [systemMessage, ...convertToOpenAiMessages(messages)] + if (modelInfo.supportsPromptCache) { // Note: the following logic is copied from openrouter: // Add cache_control to the last two user messages // (note: this works because we only ever add one user message at a time, but if we added multiple we'd need to mark the user message before the last assistant message) const lastTwoUserMessages = convertedMessages.filter((msg) => msg.role === "user").slice(-2) + lastTwoUserMessages.forEach((msg) => { if (typeof msg.content === "string") { msg.content = [{ type: "text", text: msg.content }] } + if (Array.isArray(msg.content)) { // NOTE: this is fine since env details will always be added at the end. but if it weren't there, and the user added a image_url type message, it would pop a text part before it and then move it after to the end. let lastTextPart = msg.content.filter((part) => part.type === "text").pop() @@ -130,6 +136,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl lastTextPart = { type: "text", text: "..." } msg.content.push(lastTextPart) } + // @ts-ignore-next-line lastTextPart["cache_control"] = { type: "ephemeral" } } @@ -145,7 +152,9 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl messages: convertedMessages, stream: true as const, ...(isGrokXAI ? {} : { stream_options: { include_usage: true } }), + reasoning_effort: modelId === "grok-3-mini-beta" ? "high" : this.getModel().info.reasoningEffort, } + if (this.options.includeMaxTokens) { requestOptions.max_tokens = modelInfo.maxTokens } @@ -185,6 +194,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl lastUsage = chunk.usage } } + for (const chunk of matcher.final()) { yield chunk } @@ -217,6 +227,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl type: "text", text: response.choices[0]?.message.content || "", } + yield this.processUsageMetrics(response.usage, modelInfo) } } @@ -241,6 +252,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl async completePrompt(prompt: string): Promise { try { const isAzureAiInference = this._isAzureAiInference(this.options.openAiBaseUrl) + const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = { model: this.getModel().id, messages: [{ role: "user", content: prompt }], @@ -250,11 +262,13 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl requestOptions, isAzureAiInference ? { path: AZURE_AI_INFERENCE_PATH } : {}, ) + return response.choices[0]?.message.content || "" } catch (error) { if (error instanceof Error) { throw new Error(`OpenAI completion error: ${error.message}`) } + throw error } } @@ -281,7 +295,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl ], stream: true, ...(isGrokXAI ? {} : { stream_options: { include_usage: true } }), - reasoning_effort: this.getModel().info.reasoningEffort, + reasoning_effort: modelId === "grok-3-mini-beta" ? "high" : this.getModel().info.reasoningEffort, }, methodIsAzureAiInference ? { path: AZURE_AI_INFERENCE_PATH } : {}, ) @@ -333,6 +347,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl } } } + private _getUrlHost(baseUrl?: string): string { try { return new URL(baseUrl ?? "").host From 26bb8c9ecb4c8fae163eb732b301362c542ec188 Mon Sep 17 00:00:00 2001 From: cte Date: Tue, 15 Apr 2025 11:47:30 -0700 Subject: [PATCH 6/8] Use translations --- webview-ui/src/components/settings/ReasoningEffort.tsx | 10 ++++------ webview-ui/src/i18n/locales/en/settings.json | 6 ++++++ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/webview-ui/src/components/settings/ReasoningEffort.tsx b/webview-ui/src/components/settings/ReasoningEffort.tsx index 00ec57cc89..7c8ee3a385 100644 --- a/webview-ui/src/components/settings/ReasoningEffort.tsx +++ b/webview-ui/src/components/settings/ReasoningEffort.tsx @@ -3,7 +3,7 @@ import { useAppTranslation } from "@/i18n/TranslationContext" import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui" import { ApiConfiguration } from "../../../../src/shared/api" -import { reasoningEfforts } from "../../../../src/schemas" +import { reasoningEfforts, ReasoningEffort as ReasoningEffortType } from "../../../../src/schemas" interface ReasoningEffortProps { apiConfiguration: ApiConfiguration @@ -16,20 +16,18 @@ export const ReasoningEffort = ({ apiConfiguration, setApiConfigurationField }: return (
- +