Skip to content

Commit 01cb12f

Browse files
Add GPT-5.1 models and clean up reasoning effort logic (#9252)
* Reasoning effort: capability-driven; add disable/none/minimal; remove GPT-5 minimal special-casing; document UI semantics; remove temporary logs * Remove Unused supportsReasoningNone * Roo reasoning: omit field on 'disable'; UI: do not flip enableReasoningEffort when selecting 'disable' * Update packages/types/src/model.ts Co-authored-by: roomote[bot] <219738659+roomote[bot]@users.noreply.github.com> * Update webview-ui/src/components/settings/SimpleThinkingBudget.tsx Co-authored-by: roomote[bot] <219738659+roomote[bot]@users.noreply.github.com> --------- Co-authored-by: roomote[bot] <219738659+roomote[bot]@users.noreply.github.com>
1 parent c49f9ab commit 01cb12f

File tree

16 files changed

+509
-233
lines changed

16 files changed

+509
-233
lines changed

packages/types/src/model.ts

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,22 @@ export const reasoningEffortWithMinimalSchema = z.union([reasoningEffortsSchema,
1818

1919
export type ReasoningEffortWithMinimal = z.infer<typeof reasoningEffortWithMinimalSchema>
2020

21+
/**
22+
* Extended Reasoning Effort (includes "none" and "minimal")
23+
* Note: "disable" is a UI/control value, not a value sent as effort
24+
*/
25+
export const reasoningEffortsExtended = ["none", "minimal", "low", "medium", "high"] as const
26+
27+
export const reasoningEffortExtendedSchema = z.enum(reasoningEffortsExtended)
28+
29+
export type ReasoningEffortExtended = z.infer<typeof reasoningEffortExtendedSchema>
30+
31+
/**
32+
* Reasoning Effort user setting (includes "disable")
33+
*/
34+
export const reasoningEffortSettingValues = ["disable", "none", "minimal", "low", "medium", "high"] as const
35+
export const reasoningEffortSettingSchema = z.enum(reasoningEffortSettingValues)
36+
2137
/**
2238
* Verbosity
2339
*/
@@ -67,7 +83,9 @@ export const modelInfoSchema = z.object({
6783
supportsTemperature: z.boolean().optional(),
6884
defaultTemperature: z.number().optional(),
6985
requiredReasoningBudget: z.boolean().optional(),
70-
supportsReasoningEffort: z.boolean().optional(),
86+
supportsReasoningEffort: z
87+
.union([z.boolean(), z.array(z.enum(["disable", "none", "minimal", "low", "medium", "high"]))])
88+
.optional(),
7189
requiredReasoningEffort: z.boolean().optional(),
7290
preserveReasoning: z.boolean().optional(),
7391
supportedParameters: z.array(modelParametersSchema).optional(),
@@ -76,7 +94,8 @@ export const modelInfoSchema = z.object({
7694
cacheWritesPrice: z.number().optional(),
7795
cacheReadsPrice: z.number().optional(),
7896
description: z.string().optional(),
79-
reasoningEffort: reasoningEffortsSchema.optional(),
97+
// Default effort value for models that support reasoning effort
98+
reasoningEffort: reasoningEffortExtendedSchema.optional(),
8099
minTokensPerCachePoint: z.number().optional(),
81100
maxCachePoints: z.number().optional(),
82101
cachableFields: z.array(z.string()).optional(),

packages/types/src/provider-settings.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { z } from "zod"
22

3-
import { modelInfoSchema, reasoningEffortWithMinimalSchema, verbosityLevelsSchema, serviceTierSchema } from "./model.js"
3+
import { modelInfoSchema, reasoningEffortSettingSchema, verbosityLevelsSchema, serviceTierSchema } from "./model.js"
44
import { codebaseIndexProviderSchema } from "./codebase-index.js"
55
import {
66
anthropicModels,
@@ -176,7 +176,7 @@ const baseProviderSettingsSchema = z.object({
176176

177177
// Model reasoning.
178178
enableReasoningEffort: z.boolean().optional(),
179-
reasoningEffort: reasoningEffortWithMinimalSchema.optional(),
179+
reasoningEffort: reasoningEffortSettingSchema.optional(),
180180
modelMaxTokens: z.number().optional(),
181181
modelMaxThinkingTokens: z.number().optional(),
182182

packages/types/src/providers/openai.ts

Lines changed: 124 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -3,85 +3,128 @@ import type { ModelInfo } from "../model.js"
33
// https://openai.com/api/pricing/
44
export type OpenAiNativeModelId = keyof typeof openAiNativeModels
55

6-
export const openAiNativeDefaultModelId: OpenAiNativeModelId = "gpt-5-2025-08-07"
6+
export const openAiNativeDefaultModelId: OpenAiNativeModelId = "gpt-5.1"
77

88
export const openAiNativeModels = {
9-
"gpt-5-chat-latest": {
9+
"gpt-5.1": {
1010
maxTokens: 128000,
1111
contextWindow: 400000,
1212
supportsImages: true,
1313
supportsPromptCache: true,
14-
supportsReasoningEffort: false,
14+
supportsReasoningEffort: ["none", "low", "medium", "high"],
15+
reasoningEffort: "medium",
1516
inputPrice: 1.25,
1617
outputPrice: 10.0,
17-
cacheReadsPrice: 0.13,
18-
description: "GPT-5 Chat Latest: Optimized for conversational AI and non-reasoning tasks",
18+
cacheReadsPrice: 0.125,
19+
supportsVerbosity: true,
20+
supportsTemperature: false,
21+
tiers: [
22+
{ name: "flex", contextWindow: 400000, inputPrice: 0.625, outputPrice: 5.0, cacheReadsPrice: 0.0625 },
23+
{ name: "priority", contextWindow: 400000, inputPrice: 2.5, outputPrice: 20.0, cacheReadsPrice: 0.25 },
24+
],
25+
description: "GPT-5.1: The best model for coding and agentic tasks across domains",
1926
},
20-
"gpt-5-2025-08-07": {
27+
"gpt-5.1-codex": {
2128
maxTokens: 128000,
2229
contextWindow: 400000,
2330
supportsImages: true,
2431
supportsPromptCache: true,
25-
supportsReasoningEffort: true,
32+
supportsReasoningEffort: ["low", "medium", "high"],
2633
reasoningEffort: "medium",
2734
inputPrice: 1.25,
2835
outputPrice: 10.0,
29-
cacheReadsPrice: 0.13,
30-
description: "GPT-5: The best model for coding and agentic tasks across domains",
31-
// supportsVerbosity is a new capability; ensure ModelInfo includes it
36+
cacheReadsPrice: 0.125,
37+
supportsTemperature: false,
38+
tiers: [{ name: "priority", contextWindow: 400000, inputPrice: 2.5, outputPrice: 20.0, cacheReadsPrice: 0.25 }],
39+
description: "GPT-5.1 Codex: A version of GPT-5.1 optimized for agentic coding in Codex",
40+
},
41+
"gpt-5.1-codex-mini": {
42+
maxTokens: 128000,
43+
contextWindow: 400000,
44+
supportsImages: true,
45+
supportsPromptCache: true,
46+
supportsReasoningEffort: ["low", "medium", "high"],
47+
reasoningEffort: "medium",
48+
inputPrice: 0.25,
49+
outputPrice: 2.0,
50+
cacheReadsPrice: 0.025,
51+
supportsTemperature: false,
52+
description: "GPT-5.1 Codex mini: A version of GPT-5.1 optimized for agentic coding in Codex",
53+
},
54+
"gpt-5": {
55+
maxTokens: 128000,
56+
contextWindow: 400000,
57+
supportsImages: true,
58+
supportsPromptCache: true,
59+
supportsReasoningEffort: ["minimal", "low", "medium", "high"],
60+
reasoningEffort: "medium",
61+
inputPrice: 1.25,
62+
outputPrice: 10.0,
63+
cacheReadsPrice: 0.125,
3264
supportsVerbosity: true,
3365
supportsTemperature: false,
3466
tiers: [
3567
{ name: "flex", contextWindow: 400000, inputPrice: 0.625, outputPrice: 5.0, cacheReadsPrice: 0.0625 },
3668
{ name: "priority", contextWindow: 400000, inputPrice: 2.5, outputPrice: 20.0, cacheReadsPrice: 0.25 },
3769
],
70+
description: "GPT-5: The best model for coding and agentic tasks across domains",
3871
},
39-
"gpt-5-mini-2025-08-07": {
72+
"gpt-5-mini": {
4073
maxTokens: 128000,
4174
contextWindow: 400000,
4275
supportsImages: true,
4376
supportsPromptCache: true,
44-
supportsReasoningEffort: true,
77+
supportsReasoningEffort: ["minimal", "low", "medium", "high"],
4578
reasoningEffort: "medium",
4679
inputPrice: 0.25,
4780
outputPrice: 2.0,
48-
cacheReadsPrice: 0.03,
49-
description: "GPT-5 Mini: A faster, more cost-efficient version of GPT-5 for well-defined tasks",
81+
cacheReadsPrice: 0.025,
5082
supportsVerbosity: true,
5183
supportsTemperature: false,
5284
tiers: [
5385
{ name: "flex", contextWindow: 400000, inputPrice: 0.125, outputPrice: 1.0, cacheReadsPrice: 0.0125 },
5486
{ name: "priority", contextWindow: 400000, inputPrice: 0.45, outputPrice: 3.6, cacheReadsPrice: 0.045 },
5587
],
88+
description: "GPT-5 Mini: A faster, more cost-efficient version of GPT-5 for well-defined tasks",
5689
},
57-
"gpt-5-nano-2025-08-07": {
90+
"gpt-5-codex": {
5891
maxTokens: 128000,
5992
contextWindow: 400000,
6093
supportsImages: true,
6194
supportsPromptCache: true,
62-
supportsReasoningEffort: true,
95+
supportsReasoningEffort: ["low", "medium", "high"],
96+
reasoningEffort: "medium",
97+
inputPrice: 1.25,
98+
outputPrice: 10.0,
99+
cacheReadsPrice: 0.125,
100+
supportsTemperature: false,
101+
tiers: [{ name: "priority", contextWindow: 400000, inputPrice: 2.5, outputPrice: 20.0, cacheReadsPrice: 0.25 }],
102+
description: "GPT-5-Codex: A version of GPT-5 optimized for agentic coding in Codex",
103+
},
104+
"gpt-5-nano": {
105+
maxTokens: 128000,
106+
contextWindow: 400000,
107+
supportsImages: true,
108+
supportsPromptCache: true,
109+
supportsReasoningEffort: ["minimal", "low", "medium", "high"],
63110
reasoningEffort: "medium",
64111
inputPrice: 0.05,
65112
outputPrice: 0.4,
66-
cacheReadsPrice: 0.01,
67-
description: "GPT-5 Nano: Fastest, most cost-efficient version of GPT-5",
113+
cacheReadsPrice: 0.005,
68114
supportsVerbosity: true,
69115
supportsTemperature: false,
70116
tiers: [{ name: "flex", contextWindow: 400000, inputPrice: 0.025, outputPrice: 0.2, cacheReadsPrice: 0.0025 }],
117+
description: "GPT-5 Nano: Fastest, most cost-efficient version of GPT-5",
71118
},
72-
"gpt-5-codex": {
119+
"gpt-5-chat-latest": {
73120
maxTokens: 128000,
74121
contextWindow: 400000,
75122
supportsImages: true,
76123
supportsPromptCache: true,
77-
supportsReasoningEffort: true,
78-
reasoningEffort: "medium",
79124
inputPrice: 1.25,
80125
outputPrice: 10.0,
81-
cacheReadsPrice: 0.13,
82-
description: "GPT-5-Codex: A version of GPT-5 optimized for agentic coding in Codex",
83-
supportsVerbosity: true,
84-
supportsTemperature: false,
126+
cacheReadsPrice: 0.125,
127+
description: "GPT-5 Chat: Optimized for conversational AI and non-reasoning tasks",
85128
},
86129
"gpt-4.1": {
87130
maxTokens: 32_768,
@@ -130,7 +173,7 @@ export const openAiNativeModels = {
130173
inputPrice: 2.0,
131174
outputPrice: 8.0,
132175
cacheReadsPrice: 0.5,
133-
supportsReasoningEffort: true,
176+
supportsReasoningEffort: ["low", "medium", "high"],
134177
reasoningEffort: "medium",
135178
supportsTemperature: false,
136179
tiers: [
@@ -168,7 +211,7 @@ export const openAiNativeModels = {
168211
inputPrice: 1.1,
169212
outputPrice: 4.4,
170213
cacheReadsPrice: 0.275,
171-
supportsReasoningEffort: true,
214+
supportsReasoningEffort: ["low", "medium", "high"],
172215
reasoningEffort: "medium",
173216
supportsTemperature: false,
174217
tiers: [
@@ -206,7 +249,7 @@ export const openAiNativeModels = {
206249
inputPrice: 1.1,
207250
outputPrice: 4.4,
208251
cacheReadsPrice: 0.55,
209-
supportsReasoningEffort: true,
252+
supportsReasoningEffort: ["low", "medium", "high"],
210253
reasoningEffort: "medium",
211254
supportsTemperature: false,
212255
},
@@ -295,11 +338,63 @@ export const openAiNativeModels = {
295338
supportsPromptCache: false,
296339
inputPrice: 1.5,
297340
outputPrice: 6,
298-
cacheReadsPrice: 0,
341+
cacheReadsPrice: 0.375,
299342
supportsTemperature: false,
300343
description:
301344
"Codex Mini: Cloud-based software engineering agent powered by codex-1, a version of o3 optimized for coding tasks. Trained with reinforcement learning to generate human-style code, adhere to instructions, and iteratively run tests.",
302345
},
346+
// Dated clones (snapshots) preserved for backward compatibility
347+
"gpt-5-2025-08-07": {
348+
maxTokens: 128000,
349+
contextWindow: 400000,
350+
supportsImages: true,
351+
supportsPromptCache: true,
352+
supportsReasoningEffort: ["minimal", "low", "medium", "high"],
353+
reasoningEffort: "medium",
354+
inputPrice: 1.25,
355+
outputPrice: 10.0,
356+
cacheReadsPrice: 0.125,
357+
supportsVerbosity: true,
358+
supportsTemperature: false,
359+
tiers: [
360+
{ name: "flex", contextWindow: 400000, inputPrice: 0.625, outputPrice: 5.0, cacheReadsPrice: 0.0625 },
361+
{ name: "priority", contextWindow: 400000, inputPrice: 2.5, outputPrice: 20.0, cacheReadsPrice: 0.25 },
362+
],
363+
description: "GPT-5: The best model for coding and agentic tasks across domains",
364+
},
365+
"gpt-5-mini-2025-08-07": {
366+
maxTokens: 128000,
367+
contextWindow: 400000,
368+
supportsImages: true,
369+
supportsPromptCache: true,
370+
supportsReasoningEffort: ["minimal", "low", "medium", "high"],
371+
reasoningEffort: "medium",
372+
inputPrice: 0.25,
373+
outputPrice: 2.0,
374+
cacheReadsPrice: 0.025,
375+
supportsVerbosity: true,
376+
supportsTemperature: false,
377+
tiers: [
378+
{ name: "flex", contextWindow: 400000, inputPrice: 0.125, outputPrice: 1.0, cacheReadsPrice: 0.0125 },
379+
{ name: "priority", contextWindow: 400000, inputPrice: 0.45, outputPrice: 3.6, cacheReadsPrice: 0.045 },
380+
],
381+
description: "GPT-5 Mini: A faster, more cost-efficient version of GPT-5 for well-defined tasks",
382+
},
383+
"gpt-5-nano-2025-08-07": {
384+
maxTokens: 128000,
385+
contextWindow: 400000,
386+
supportsImages: true,
387+
supportsPromptCache: true,
388+
supportsReasoningEffort: ["minimal", "low", "medium", "high"],
389+
reasoningEffort: "medium",
390+
inputPrice: 0.05,
391+
outputPrice: 0.4,
392+
cacheReadsPrice: 0.005,
393+
supportsVerbosity: true,
394+
supportsTemperature: false,
395+
tiers: [{ name: "flex", contextWindow: 400000, inputPrice: 0.025, outputPrice: 0.2, cacheReadsPrice: 0.0025 }],
396+
description: "GPT-5 Nano: Fastest, most cost-efficient version of GPT-5",
397+
},
303398
} as const satisfies Record<string, ModelInfo>
304399

305400
export const openAiModelInfoSaneDefaults: ModelInfo = {
@@ -316,6 +411,5 @@ export const openAiModelInfoSaneDefaults: ModelInfo = {
316411
export const azureOpenAiDefaultApiVersion = "2024-08-01-preview"
317412

318413
export const OPENAI_NATIVE_DEFAULT_TEMPERATURE = 0
319-
export const GPT5_DEFAULT_TEMPERATURE = 1.0
320414

321415
export const OPENAI_AZURE_AI_INFERENCE_PATH = "/models/chat/completions"

0 commit comments

Comments
 (0)