diff --git a/packages/types/src/model.ts b/packages/types/src/model.ts index e979f654dfb..803a6d4664a 100644 --- a/packages/types/src/model.ts +++ b/packages/types/src/model.ts @@ -14,7 +14,11 @@ export type ReasoningEffort = z.infer * ReasoningEffortWithMinimal */ -export const reasoningEffortWithMinimalSchema = z.union([reasoningEffortsSchema, z.literal("minimal")]) +export const reasoningEffortWithMinimalSchema = z.union([ + reasoningEffortsSchema, + z.literal("minimal"), + z.literal("off"), +]) export type ReasoningEffortWithMinimal = z.infer diff --git a/src/api/providers/requesty.ts b/src/api/providers/requesty.ts index 16aefae5286..dc56e272b57 100644 --- a/src/api/providers/requesty.ts +++ b/src/api/providers/requesty.ts @@ -123,7 +123,9 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan model, max_tokens, temperature, - ...(reasoning_effort && reasoning_effort !== "minimal" && { reasoning_effort }), + ...(reasoning_effort && + reasoning_effort !== "minimal" && + reasoning_effort !== "off" && { reasoning_effort }), ...(thinking && { thinking }), stream: true, stream_options: { include_usage: true }, diff --git a/src/api/transform/reasoning.ts b/src/api/transform/reasoning.ts index 100b1c26846..b0349deaa8c 100644 --- a/src/api/transform/reasoning.ts +++ b/src/api/transform/reasoning.ts @@ -34,7 +34,7 @@ export const getOpenRouterReasoning = ({ shouldUseReasoningBudget({ model, settings }) ? { max_tokens: reasoningBudget } : shouldUseReasoningEffort({ model, settings }) - ? reasoningEffort + ? reasoningEffort && reasoningEffort !== "off" ? { effort: reasoningEffort } : undefined : undefined @@ -57,7 +57,7 @@ export const getOpenAiReasoning = ({ // If model has reasoning effort capability, return object even if effort is undefined // This preserves the reasoning_effort field in the API call - if (reasoningEffort === "minimal") { + if (reasoningEffort === "minimal" || reasoningEffort === "off") { return undefined } diff --git a/src/shared/api.ts b/src/shared/api.ts index 79001cb0ad0..6b6fb1a6dac 100644 --- a/src/shared/api.ts +++ b/src/shared/api.ts @@ -68,6 +68,11 @@ export const shouldUseReasoningEffort = ({ return false } + // If reasoningEffort is explicitly set to "off", reasoning should be disabled + if (settings?.reasoningEffort === "off") { + return false + } + // Otherwise, use reasoning if: // 1. Model supports reasoning effort AND settings provide reasoning effort, OR // 2. Model itself has a reasoningEffort property diff --git a/webview-ui/src/components/settings/ThinkingBudget.tsx b/webview-ui/src/components/settings/ThinkingBudget.tsx index 7a85e61e7a5..2ab5fb98d36 100644 --- a/webview-ui/src/components/settings/ThinkingBudget.tsx +++ b/webview-ui/src/components/settings/ThinkingBudget.tsx @@ -62,8 +62,8 @@ export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, mod // Build available reasoning efforts list const baseEfforts = [...reasoningEfforts] as ReasoningEffortWithMinimal[] const availableReasoningEfforts: ReadonlyArray = showMinimalOption - ? (["minimal", ...baseEfforts] as ReasoningEffortWithMinimal[]) - : baseEfforts + ? (["off", "minimal", ...baseEfforts] as ReasoningEffortWithMinimal[]) + : (["off", ...baseEfforts] as ReasoningEffortWithMinimal[]) // Default reasoning effort - use model's default if available // GPT-5 models have "medium" as their default in the model configuration diff --git a/webview-ui/src/i18n/locales/en/settings.json b/webview-ui/src/i18n/locales/en/settings.json index dfccc49cc4c..6bd6f9c2be6 100644 --- a/webview-ui/src/i18n/locales/en/settings.json +++ b/webview-ui/src/i18n/locales/en/settings.json @@ -465,6 +465,7 @@ }, "reasoningEffort": { "label": "Model Reasoning Effort", + "off": "Off", "minimal": "Minimal (Fastest)", "low": "Low", "medium": "Medium",