Skip to content

Commit d73dfed

Browse files
committed
feat: add support for flexible pricing tiers in OpenAI models and update related components
2 parents dcbb7a6 + f546851 commit d73dfed

File tree

12 files changed

+249
-10
lines changed

12 files changed

+249
-10
lines changed

packages/types/src/model.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,15 @@ export const modelInfoSchema = z.object({
5454
outputPrice: z.number().optional(),
5555
cacheWritesPrice: z.number().optional(),
5656
cacheReadsPrice: z.number().optional(),
57+
// Optional discounted pricing for flex service tier
58+
flexPrice: z
59+
.object({
60+
inputPrice: z.number().optional(),
61+
outputPrice: z.number().optional(),
62+
cacheWritesPrice: z.number().optional(),
63+
cacheReadsPrice: z.number().optional(),
64+
})
65+
.optional(),
5766
description: z.string().optional(),
5867
reasoningEffort: reasoningEffortsSchema.optional(),
5968
minTokensPerCachePoint: z.number().optional(),

packages/types/src/provider-settings.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,9 @@ const baseProviderSettingsSchema = z.object({
9191

9292
// Model verbosity.
9393
verbosity: verbosityLevelsSchema.optional(),
94+
95+
// Service tier selection for providers that support tiered pricing (e.g. OpenAI flex tier)
96+
serviceTier: z.enum(["auto", "default", "flex"]).optional(),
9497
})
9598

9699
// Several of the providers share common model config properties.

packages/types/src/providers/openai.ts

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@ export const openAiNativeModels = {
1616
inputPrice: 1.25,
1717
outputPrice: 10.0,
1818
cacheReadsPrice: 0.13,
19+
flexPrice: {
20+
inputPrice: 0.625,
21+
outputPrice: 5.0,
22+
cacheReadsPrice: 0.063,
23+
},
1924
description: "GPT-5: The best model for coding and agentic tasks across domains",
2025
// supportsVerbosity is a new capability; ensure ModelInfo includes it
2126
supportsVerbosity: true,
@@ -30,6 +35,11 @@ export const openAiNativeModels = {
3035
inputPrice: 0.25,
3136
outputPrice: 2.0,
3237
cacheReadsPrice: 0.03,
38+
flexPrice: {
39+
inputPrice: 0.125,
40+
outputPrice: 1.0,
41+
cacheReadsPrice: 0.013,
42+
},
3343
description: "GPT-5 Mini: A faster, more cost-efficient version of GPT-5 for well-defined tasks",
3444
supportsVerbosity: true,
3545
},
@@ -43,6 +53,11 @@ export const openAiNativeModels = {
4353
inputPrice: 0.05,
4454
outputPrice: 0.4,
4555
cacheReadsPrice: 0.01,
56+
flexPrice: {
57+
inputPrice: 0.025,
58+
outputPrice: 0.2,
59+
cacheReadsPrice: 0.003,
60+
},
4661
description: "GPT-5 Nano: Fastest, most cost-efficient version of GPT-5",
4762
supportsVerbosity: true,
4863
},
@@ -81,6 +96,11 @@ export const openAiNativeModels = {
8196
inputPrice: 2.0,
8297
outputPrice: 8.0,
8398
cacheReadsPrice: 0.5,
99+
flexPrice: {
100+
inputPrice: 1.0,
101+
outputPrice: 4.0,
102+
cacheReadsPrice: 0.25,
103+
},
84104
supportsReasoningEffort: true,
85105
reasoningEffort: "medium",
86106
},
@@ -92,6 +112,11 @@ export const openAiNativeModels = {
92112
inputPrice: 2.0,
93113
outputPrice: 8.0,
94114
cacheReadsPrice: 0.5,
115+
flexPrice: {
116+
inputPrice: 1.0,
117+
outputPrice: 4.0,
118+
cacheReadsPrice: 0.25,
119+
},
95120
reasoningEffort: "high",
96121
},
97122
"o3-low": {
@@ -102,6 +127,11 @@ export const openAiNativeModels = {
102127
inputPrice: 2.0,
103128
outputPrice: 8.0,
104129
cacheReadsPrice: 0.5,
130+
flexPrice: {
131+
inputPrice: 1.0,
132+
outputPrice: 4.0,
133+
cacheReadsPrice: 0.25,
134+
},
105135
reasoningEffort: "low",
106136
},
107137
"o4-mini": {
@@ -112,6 +142,11 @@ export const openAiNativeModels = {
112142
inputPrice: 1.1,
113143
outputPrice: 4.4,
114144
cacheReadsPrice: 0.275,
145+
flexPrice: {
146+
inputPrice: 0.55,
147+
outputPrice: 2.2,
148+
cacheReadsPrice: 0.138,
149+
},
115150
supportsReasoningEffort: true,
116151
reasoningEffort: "medium",
117152
},
@@ -123,6 +158,11 @@ export const openAiNativeModels = {
123158
inputPrice: 1.1,
124159
outputPrice: 4.4,
125160
cacheReadsPrice: 0.275,
161+
flexPrice: {
162+
inputPrice: 0.55,
163+
outputPrice: 2.2,
164+
cacheReadsPrice: 0.138,
165+
},
126166
reasoningEffort: "high",
127167
},
128168
"o4-mini-low": {
@@ -133,6 +173,11 @@ export const openAiNativeModels = {
133173
inputPrice: 1.1,
134174
outputPrice: 4.4,
135175
cacheReadsPrice: 0.275,
176+
flexPrice: {
177+
inputPrice: 0.55,
178+
outputPrice: 2.2,
179+
cacheReadsPrice: 0.138,
180+
},
136181
reasoningEffort: "low",
137182
},
138183
"o3-mini": {

src/api/providers/openai-native.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
7474
totalOutputTokens,
7575
cacheWriteTokens || 0,
7676
cacheReadTokens || 0,
77+
this.options.serviceTier,
7778
)
7879

7980
return {
@@ -147,6 +148,9 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
147148
stream: true,
148149
stream_options: { include_usage: true },
149150
...(reasoning && reasoning),
151+
// Add service_tier parameter if configured and not "auto"
152+
...(this.options.serviceTier &&
153+
this.options.serviceTier !== "auto" && { service_tier: this.options.serviceTier }),
150154
})
151155

152156
yield* this.handleStreamResponse(response, model)
@@ -276,6 +280,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
276280
temperature?: number
277281
max_output_tokens?: number
278282
previous_response_id?: string
283+
service_tier?: string
279284
}
280285

281286
const requestBody: Gpt5RequestBody = {
@@ -296,6 +301,11 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
296301
...(requestPreviousResponseId && { previous_response_id: requestPreviousResponseId }),
297302
}
298303

304+
// Add service_tier parameter if configured and not "auto"
305+
if (this.options.serviceTier && this.options.serviceTier !== "auto") {
306+
requestBody.service_tier = this.options.serviceTier
307+
}
308+
299309
try {
300310
// Use the official SDK
301311
const stream = (await (this.client as any).responses.create(requestBody)) as AsyncIterable<any>

src/api/providers/openai.ts

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,14 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
164164
...(reasoning && reasoning),
165165
}
166166

167+
if (this.options.serviceTier && this.options.serviceTier !== "auto") {
168+
;(requestOptions as any).service_tier = this.options.serviceTier
169+
console.log("[DEBUG] Setting service_tier parameter:", this.options.serviceTier)
170+
console.log("[DEBUG] Full request options:", JSON.stringify(requestOptions, null, 2))
171+
} else {
172+
console.log("[DEBUG] Service tier not set or is 'auto'. Current value:", this.options.serviceTier)
173+
}
174+
167175
// Add max_tokens if needed
168176
this.addMaxTokensIfNeeded(requestOptions, modelInfo)
169177

@@ -226,6 +234,14 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
226234
: [systemMessage, ...convertToOpenAiMessages(messages)],
227235
}
228236

237+
if (this.options.serviceTier && this.options.serviceTier !== "auto") {
238+
;(requestOptions as any).service_tier = this.options.serviceTier
239+
console.log("[DEBUG] Setting service_tier parameter:", this.options.serviceTier)
240+
console.log("[DEBUG] Full request options:", JSON.stringify(requestOptions, null, 2))
241+
} else {
242+
console.log("[DEBUG] Service tier not set or is 'auto'. Current value:", this.options.serviceTier)
243+
}
244+
229245
// Add max_tokens if needed
230246
this.addMaxTokensIfNeeded(requestOptions, modelInfo)
231247

@@ -271,6 +287,14 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
271287
messages: [{ role: "user", content: prompt }],
272288
}
273289

290+
if (this.options.serviceTier && this.options.serviceTier !== "auto") {
291+
;(requestOptions as any).service_tier = this.options.serviceTier
292+
console.log("[DEBUG] Setting service_tier parameter:", this.options.serviceTier)
293+
console.log("[DEBUG] Full request options:", JSON.stringify(requestOptions, null, 2))
294+
} else {
295+
console.log("[DEBUG] Service tier not set or is 'auto'. Current value:", this.options.serviceTier)
296+
}
297+
274298
// Add max_tokens if needed
275299
this.addMaxTokensIfNeeded(requestOptions, modelInfo)
276300

@@ -315,6 +339,14 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
315339
temperature: undefined,
316340
}
317341

342+
if (this.options.serviceTier && this.options.serviceTier !== "auto") {
343+
;(requestOptions as any).service_tier = this.options.serviceTier
344+
console.log("[DEBUG] Setting service_tier parameter:", this.options.serviceTier)
345+
console.log("[DEBUG] Full request options:", JSON.stringify(requestOptions, null, 2))
346+
} else {
347+
console.log("[DEBUG] Service tier not set or is 'auto'. Current value:", this.options.serviceTier)
348+
}
349+
318350
// O3 family models do not support the deprecated max_tokens parameter
319351
// but they do support max_completion_tokens (the modern OpenAI parameter)
320352
// This allows O3 models to limit response length when includeMaxTokens is enabled
@@ -340,6 +372,14 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
340372
temperature: undefined,
341373
}
342374

375+
if (this.options.serviceTier && this.options.serviceTier !== "auto") {
376+
;(requestOptions as any).service_tier = this.options.serviceTier
377+
console.log("[DEBUG] Setting service_tier parameter:", this.options.serviceTier)
378+
console.log("[DEBUG] Full request options:", JSON.stringify(requestOptions, null, 2))
379+
} else {
380+
console.log("[DEBUG] Service tier not set or is 'auto'. Current value:", this.options.serviceTier)
381+
}
382+
343383
// O3 family models do not support the deprecated max_tokens parameter
344384
// but they do support max_completion_tokens (the modern OpenAI parameter)
345385
// This allows O3 models to limit response length when includeMaxTokens is enabled

src/shared/cost.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,18 @@ export function calculateApiCostOpenAI(
4040
outputTokens: number,
4141
cacheCreationInputTokens?: number,
4242
cacheReadInputTokens?: number,
43+
serviceTier?: "auto" | "default" | "flex",
4344
): number {
4445
const cacheCreationInputTokensNum = cacheCreationInputTokens || 0
4546
const cacheReadInputTokensNum = cacheReadInputTokens || 0
4647
const nonCachedInputTokens = Math.max(0, inputTokens - cacheCreationInputTokensNum - cacheReadInputTokensNum)
4748

49+
// If flex tier selected and model exposes flexPrice, override pricing fields.
50+
const pricingInfo =
51+
serviceTier === "flex" && modelInfo.flexPrice ? { ...modelInfo, ...modelInfo.flexPrice } : modelInfo
52+
4853
return calculateApiCostInternal(
49-
modelInfo,
54+
pricingInfo,
5055
nonCachedInputTokens,
5156
outputTokens,
5257
cacheCreationInputTokensNum,

src/utils/__tests__/cost.spec.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,12 @@ describe("Cost Utility", () => {
107107
outputPrice: 15.0, // $15 per million tokens
108108
cacheWritesPrice: 3.75, // $3.75 per million tokens
109109
cacheReadsPrice: 0.3, // $0.30 per million tokens
110+
flexPrice: {
111+
inputPrice: 1.5,
112+
outputPrice: 7.5,
113+
cacheWritesPrice: 1.875,
114+
cacheReadsPrice: 0.15,
115+
},
110116
}
111117

112118
it("should calculate basic input/output costs correctly", () => {
@@ -189,5 +195,21 @@ describe("Cost Utility", () => {
189195
// Total: 0.003 + 0.0075 = 0.0105
190196
expect(cost).toBe(0.0105)
191197
})
198+
199+
it("should apply flex pricing when serviceTier=flex and flexPrice present", () => {
200+
const costDefault = calculateApiCostOpenAI(mockModelInfo, 1000, 500, undefined, undefined, "default")
201+
const costFlex = calculateApiCostOpenAI(mockModelInfo, 1000, 500, undefined, undefined, "flex")
202+
203+
// Default pricing: input (3 / 1e6 * 1000) + output (15 /1e6 * 500) = 0.0105
204+
// Flex pricing: input (1.5 /1e6 * 1000) + output (7.5 /1e6 * 500) = 0.00525
205+
expect(costDefault).toBeCloseTo(0.0105, 6)
206+
expect(costFlex).toBeCloseTo(0.00525, 6)
207+
})
208+
209+
it("should fall back to standard pricing if flex selected but no flexPrice", () => {
210+
const noFlexModel: ModelInfo = { ...mockModelInfo, flexPrice: undefined }
211+
const cost = calculateApiCostOpenAI(noFlexModel, 1000, 500, undefined, undefined, "flex")
212+
expect(cost).toBeCloseTo(0.0105, 6)
213+
})
192214
})
193215
})

webview-ui/src/components/settings/ApiOptions.tsx

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ import { ModelInfoView } from "./ModelInfoView"
9494
import { ApiErrorMessage } from "./ApiErrorMessage"
9595
import { ThinkingBudget } from "./ThinkingBudget"
9696
import { Verbosity } from "./Verbosity"
97+
import { ServiceTier } from "./ServiceTier"
9798
import { DiffSettingsControl } from "./DiffSettingsControl"
9899
import { TodoListSettingsControl } from "./TodoListSettingsControl"
99100
import { TemperatureControl } from "./TemperatureControl"
@@ -624,10 +625,18 @@ const ApiOptions = ({
624625
modelInfo={selectedModelInfo}
625626
isDescriptionExpanded={isDescriptionExpanded}
626627
setIsDescriptionExpanded={setIsDescriptionExpanded}
628+
serviceTier={apiConfiguration.serviceTier}
627629
/>
628630
</>
629631
)}
630632

633+
{/* Service Tier - conditional on model supporting flex pricing */}
634+
<ServiceTier
635+
apiConfiguration={apiConfiguration}
636+
setApiConfigurationField={setApiConfigurationField}
637+
modelId={selectedModelId}
638+
/>
639+
631640
<ThinkingBudget
632641
key={`${selectedProvider}-${selectedModelId}`}
633642
apiConfiguration={apiConfiguration}

0 commit comments

Comments
 (0)