Skip to content

Commit 5338922

Browse files
committed
Flex tier
1 parent 5f3c67f commit 5338922

File tree

11 files changed

+190
-9
lines changed

11 files changed

+190
-9
lines changed

packages/types/src/model.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,15 @@ export const modelInfoSchema = z.object({
5454
outputPrice: z.number().optional(),
5555
cacheWritesPrice: z.number().optional(),
5656
cacheReadsPrice: z.number().optional(),
57+
// Optional discounted pricing for flex service tier
58+
flexPrice: z
59+
.object({
60+
inputPrice: z.number().optional(),
61+
outputPrice: z.number().optional(),
62+
cacheWritesPrice: z.number().optional(),
63+
cacheReadsPrice: z.number().optional(),
64+
})
65+
.optional(),
5766
description: z.string().optional(),
5867
reasoningEffort: reasoningEffortsSchema.optional(),
5968
minTokensPerCachePoint: z.number().optional(),

packages/types/src/provider-settings.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,9 @@ const baseProviderSettingsSchema = z.object({
8888

8989
// Model verbosity.
9090
verbosity: verbosityLevelsSchema.optional(),
91+
92+
// Service tier selection for providers that support tiered pricing (e.g. OpenAI flex tier)
93+
serviceTier: z.enum(["auto", "default", "flex"]).optional(),
9194
})
9295

9396
// Several of the providers share common model config properties.

packages/types/src/providers/openai.ts

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@ export const openAiNativeModels = {
1616
inputPrice: 1.25,
1717
outputPrice: 10.0,
1818
cacheReadsPrice: 0.13,
19+
flexPrice: {
20+
inputPrice: 0.625,
21+
outputPrice: 5.0,
22+
cacheReadsPrice: 0.063,
23+
},
1924
description: "GPT-5: The best model for coding and agentic tasks across domains",
2025
// supportsVerbosity is a new capability; ensure ModelInfo includes it
2126
supportsVerbosity: true,
@@ -30,6 +35,11 @@ export const openAiNativeModels = {
3035
inputPrice: 0.25,
3136
outputPrice: 2.0,
3237
cacheReadsPrice: 0.03,
38+
flexPrice: {
39+
inputPrice: 0.125,
40+
outputPrice: 1.0,
41+
cacheReadsPrice: 0.013,
42+
},
3343
description: "GPT-5 Mini: A faster, more cost-efficient version of GPT-5 for well-defined tasks",
3444
supportsVerbosity: true,
3545
},
@@ -43,6 +53,11 @@ export const openAiNativeModels = {
4353
inputPrice: 0.05,
4454
outputPrice: 0.4,
4555
cacheReadsPrice: 0.01,
56+
flexPrice: {
57+
inputPrice: 0.025,
58+
outputPrice: 0.2,
59+
cacheReadsPrice: 0.003,
60+
},
4661
description: "GPT-5 Nano: Fastest, most cost-efficient version of GPT-5",
4762
supportsVerbosity: true,
4863
},
@@ -81,6 +96,11 @@ export const openAiNativeModels = {
8196
inputPrice: 2.0,
8297
outputPrice: 8.0,
8398
cacheReadsPrice: 0.5,
99+
flexPrice: {
100+
inputPrice: 1.0,
101+
outputPrice: 4.0,
102+
cacheReadsPrice: 0.25,
103+
},
84104
supportsReasoningEffort: true,
85105
reasoningEffort: "medium",
86106
},
@@ -112,6 +132,11 @@ export const openAiNativeModels = {
112132
inputPrice: 1.1,
113133
outputPrice: 4.4,
114134
cacheReadsPrice: 0.275,
135+
flexPrice: {
136+
inputPrice: 0.55,
137+
outputPrice: 2.2,
138+
cacheReadsPrice: 0.138,
139+
},
115140
supportsReasoningEffort: true,
116141
reasoningEffort: "medium",
117142
},

src/api/providers/openai-native.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
7474
totalOutputTokens,
7575
cacheWriteTokens || 0,
7676
cacheReadTokens || 0,
77+
this.options.serviceTier,
7778
)
7879

7980
return {
@@ -1180,6 +1181,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
11801181
outputTokens,
11811182
cacheWriteTokens || 0,
11821183
cacheReadTokens || 0,
1184+
this.options.serviceTier,
11831185
)
11841186

11851187
yield {

src/api/providers/openai.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,10 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
158158
...(reasoning && reasoning),
159159
}
160160

161+
if (this.options.serviceTier && this.options.serviceTier !== "auto") {
162+
;(requestOptions as any).service_tier = this.options.serviceTier
163+
}
164+
161165
// Add max_tokens if needed
162166
this.addMaxTokensIfNeeded(requestOptions, modelInfo)
163167

@@ -220,6 +224,10 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
220224
: [systemMessage, ...convertToOpenAiMessages(messages)],
221225
}
222226

227+
if (this.options.serviceTier && this.options.serviceTier !== "auto") {
228+
;(requestOptions as any).service_tier = this.options.serviceTier
229+
}
230+
223231
// Add max_tokens if needed
224232
this.addMaxTokensIfNeeded(requestOptions, modelInfo)
225233

@@ -265,6 +273,10 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
265273
messages: [{ role: "user", content: prompt }],
266274
}
267275

276+
if (this.options.serviceTier && this.options.serviceTier !== "auto") {
277+
;(requestOptions as any).service_tier = this.options.serviceTier
278+
}
279+
268280
// Add max_tokens if needed
269281
this.addMaxTokensIfNeeded(requestOptions, modelInfo)
270282

@@ -309,6 +321,10 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
309321
temperature: undefined,
310322
}
311323

324+
if (this.options.serviceTier && this.options.serviceTier !== "auto") {
325+
;(requestOptions as any).service_tier = this.options.serviceTier
326+
}
327+
312328
// O3 family models do not support the deprecated max_tokens parameter
313329
// but they do support max_completion_tokens (the modern OpenAI parameter)
314330
// This allows O3 models to limit response length when includeMaxTokens is enabled
@@ -334,6 +350,10 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
334350
temperature: undefined,
335351
}
336352

353+
if (this.options.serviceTier && this.options.serviceTier !== "auto") {
354+
;(requestOptions as any).service_tier = this.options.serviceTier
355+
}
356+
337357
// O3 family models do not support the deprecated max_tokens parameter
338358
// but they do support max_completion_tokens (the modern OpenAI parameter)
339359
// This allows O3 models to limit response length when includeMaxTokens is enabled

src/shared/cost.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,20 @@ export function calculateApiCostOpenAI(
4040
outputTokens: number,
4141
cacheCreationInputTokens?: number,
4242
cacheReadInputTokens?: number,
43+
serviceTier?: "auto" | "default" | "flex",
4344
): number {
4445
const cacheCreationInputTokensNum = cacheCreationInputTokens || 0
4546
const cacheReadInputTokensNum = cacheReadInputTokens || 0
4647
const nonCachedInputTokens = Math.max(0, inputTokens - cacheCreationInputTokensNum - cacheReadInputTokensNum)
4748

49+
// If flex tier selected and model exposes flexPrice, override pricing fields.
50+
const pricingInfo =
51+
serviceTier === "flex" && (modelInfo as any).flexPrice
52+
? { ...modelInfo, ...(modelInfo as any).flexPrice }
53+
: modelInfo
54+
4855
return calculateApiCostInternal(
49-
modelInfo,
56+
pricingInfo,
5057
nonCachedInputTokens,
5158
outputTokens,
5259
cacheCreationInputTokensNum,

src/utils/__tests__/cost.spec.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,12 @@ describe("Cost Utility", () => {
107107
outputPrice: 15.0, // $15 per million tokens
108108
cacheWritesPrice: 3.75, // $3.75 per million tokens
109109
cacheReadsPrice: 0.3, // $0.30 per million tokens
110+
flexPrice: {
111+
inputPrice: 1.5,
112+
outputPrice: 7.5,
113+
cacheWritesPrice: 1.875,
114+
cacheReadsPrice: 0.15,
115+
},
110116
}
111117

112118
it("should calculate basic input/output costs correctly", () => {
@@ -189,5 +195,21 @@ describe("Cost Utility", () => {
189195
// Total: 0.003 + 0.0075 = 0.0105
190196
expect(cost).toBe(0.0105)
191197
})
198+
199+
it("should apply flex pricing when serviceTier=flex and flexPrice present", () => {
200+
const costDefault = calculateApiCostOpenAI(mockModelInfo, 1000, 500, undefined, undefined, "default")
201+
const costFlex = calculateApiCostOpenAI(mockModelInfo, 1000, 500, undefined, undefined, "flex")
202+
203+
// Default pricing: input (3 / 1e6 * 1000) + output (15 /1e6 * 500) = 0.0105
204+
// Flex pricing: input (1.5 /1e6 * 1000) + output (7.5 /1e6 * 500) = 0.00525
205+
expect(costDefault).toBeCloseTo(0.0105, 6)
206+
expect(costFlex).toBeCloseTo(0.00525, 6)
207+
})
208+
209+
it("should fall back to standard pricing if flex selected but no flexPrice", () => {
210+
const noFlexModel: ModelInfo = { ...mockModelInfo, flexPrice: undefined }
211+
const cost = calculateApiCostOpenAI(noFlexModel, 1000, 500, undefined, undefined, "flex")
212+
expect(cost).toBeCloseTo(0.0105, 6)
213+
})
192214
})
193215
})

webview-ui/src/components/settings/ApiOptions.tsx

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ import { ModelInfoView } from "./ModelInfoView"
9494
import { ApiErrorMessage } from "./ApiErrorMessage"
9595
import { ThinkingBudget } from "./ThinkingBudget"
9696
import { Verbosity } from "./Verbosity"
97+
import { ServiceTier } from "./ServiceTier"
9798
import { DiffSettingsControl } from "./DiffSettingsControl"
9899
import { TodoListSettingsControl } from "./TodoListSettingsControl"
99100
import { TemperatureControl } from "./TemperatureControl"
@@ -628,6 +629,13 @@ const ApiOptions = ({
628629
</>
629630
)}
630631

632+
{/* Service Tier - conditional on model supporting flex pricing */}
633+
<ServiceTier
634+
apiConfiguration={apiConfiguration}
635+
setApiConfigurationField={setApiConfigurationField}
636+
modelId={selectedModelId}
637+
/>
638+
631639
<ThinkingBudget
632640
key={`${selectedProvider}-${selectedModelId}`}
633641
apiConfiguration={apiConfiguration}

webview-ui/src/components/settings/ModelInfoView.tsx

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ type ModelInfoViewProps = {
1414
modelInfo?: ModelInfo
1515
isDescriptionExpanded: boolean
1616
setIsDescriptionExpanded: (isExpanded: boolean) => void
17+
serviceTier?: "auto" | "default" | "flex"
1718
}
1819

1920
export const ModelInfoView = ({
@@ -22,9 +23,27 @@ export const ModelInfoView = ({
2223
modelInfo,
2324
isDescriptionExpanded,
2425
setIsDescriptionExpanded,
26+
serviceTier,
2527
}: ModelInfoViewProps) => {
2628
const { t } = useAppTranslation()
2729

30+
// Calculate effective pricing based on service tier
31+
const getEffectivePricing = (modelInfo: ModelInfo) => {
32+
if (serviceTier === "flex" && (modelInfo as any).flexPrice) {
33+
const flexPrice = (modelInfo as any).flexPrice
34+
return {
35+
...modelInfo,
36+
inputPrice: flexPrice.inputPrice ?? modelInfo.inputPrice,
37+
outputPrice: flexPrice.outputPrice ?? modelInfo.outputPrice,
38+
cacheReadsPrice: flexPrice.cacheReadsPrice ?? modelInfo.cacheReadsPrice,
39+
cacheWritesPrice: flexPrice.cacheWritesPrice ?? modelInfo.cacheWritesPrice,
40+
}
41+
}
42+
return modelInfo
43+
}
44+
45+
const effectiveModelInfo = modelInfo ? getEffectivePricing(modelInfo) : modelInfo
46+
2847
const infoItems = [
2948
<ModelInfoSupportsItem
3049
isSupported={modelInfo?.supportsImages ?? false}
@@ -47,28 +66,28 @@ export const ModelInfoView = ({
4766
{modelInfo.maxTokens?.toLocaleString()} tokens
4867
</>
4968
),
50-
modelInfo?.inputPrice !== undefined && modelInfo.inputPrice > 0 && (
69+
effectiveModelInfo?.inputPrice !== undefined && effectiveModelInfo.inputPrice > 0 && (
5170
<>
5271
<span className="font-medium">{t("settings:modelInfo.inputPrice")}:</span>{" "}
53-
{formatPrice(modelInfo.inputPrice)} / 1M tokens
72+
{formatPrice(effectiveModelInfo.inputPrice)} / 1M tokens
5473
</>
5574
),
56-
modelInfo?.outputPrice !== undefined && modelInfo.outputPrice > 0 && (
75+
effectiveModelInfo?.outputPrice !== undefined && effectiveModelInfo.outputPrice > 0 && (
5776
<>
5877
<span className="font-medium">{t("settings:modelInfo.outputPrice")}:</span>{" "}
59-
{formatPrice(modelInfo.outputPrice)} / 1M tokens
78+
{formatPrice(effectiveModelInfo.outputPrice)} / 1M tokens
6079
</>
6180
),
62-
modelInfo?.supportsPromptCache && modelInfo.cacheReadsPrice && (
81+
modelInfo?.supportsPromptCache && effectiveModelInfo?.cacheReadsPrice && (
6382
<>
6483
<span className="font-medium">{t("settings:modelInfo.cacheReadsPrice")}:</span>{" "}
65-
{formatPrice(modelInfo.cacheReadsPrice || 0)} / 1M tokens
84+
{formatPrice(effectiveModelInfo.cacheReadsPrice || 0)} / 1M tokens
6685
</>
6786
),
68-
modelInfo?.supportsPromptCache && modelInfo.cacheWritesPrice && (
87+
modelInfo?.supportsPromptCache && effectiveModelInfo?.cacheWritesPrice && (
6988
<>
7089
<span className="font-medium">{t("settings:modelInfo.cacheWritesPrice")}:</span>{" "}
71-
{formatPrice(modelInfo.cacheWritesPrice || 0)} / 1M tokens
90+
{formatPrice(effectiveModelInfo.cacheWritesPrice || 0)} / 1M tokens
7291
</>
7392
),
7493
apiProvider === "gemini" && (
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import { useEffect, useMemo } from "react"
2+
import { VSCodeDropdown, VSCodeOption } from "@vscode/webview-ui-toolkit/react"
3+
import { useAppTranslation } from "@src/i18n/TranslationContext"
4+
import type { ProviderSettings, ModelInfo } from "@roo-code/types"
5+
6+
type Props = {
7+
apiConfiguration: ProviderSettings
8+
setApiConfigurationField: (field: keyof ProviderSettings, value: any) => void
9+
modelInfo?: ModelInfo
10+
modelId?: string
11+
}
12+
13+
// Models that currently have flex pricing
14+
const FLEX_COMPATIBLE_MODELS = ["gpt-5", "gpt-5-mini", "gpt-5-nano", "o3", "o4-mini"]
15+
const SERVICE_TIERS: Array<"auto" | "default" | "flex"> = ["auto", "default", "flex"]
16+
17+
export const ServiceTier = ({ apiConfiguration, setApiConfigurationField, modelId }: Props) => {
18+
const { t } = useAppTranslation()
19+
const effectiveModelId = modelId || apiConfiguration.openAiModelId || ""
20+
21+
const isSupported = useMemo(
22+
() => !!effectiveModelId && FLEX_COMPATIBLE_MODELS.some((m) => effectiveModelId.includes(m)),
23+
[effectiveModelId],
24+
)
25+
26+
// Initialize to auto when supported and unset; clear when unsupported
27+
useEffect(() => {
28+
if (isSupported && !apiConfiguration.serviceTier) {
29+
setApiConfigurationField("serviceTier", "auto")
30+
} else if (!isSupported && apiConfiguration.serviceTier) {
31+
setApiConfigurationField("serviceTier", undefined)
32+
}
33+
}, [isSupported, apiConfiguration.serviceTier, setApiConfigurationField])
34+
35+
if (!isSupported) return null
36+
37+
return (
38+
<div className="flex flex-col gap-1">
39+
<label className="block font-medium mb-1">{t("settings:providers.serviceTier.label")}</label>
40+
<VSCodeDropdown
41+
value={apiConfiguration.serviceTier || "auto"}
42+
onChange={(e: any) => setApiConfigurationField("serviceTier", e.target.value)}
43+
className="w-48">
44+
{SERVICE_TIERS.map((tier) => (
45+
<VSCodeOption key={tier} value={tier}>
46+
{t(`settings:providers.serviceTier.${tier}` as any)}
47+
</VSCodeOption>
48+
))}
49+
</VSCodeDropdown>
50+
<div className="text-sm text-vscode-descriptionForeground">
51+
{t("settings:providers.serviceTier.description", {
52+
defaultValue: "Select pricing tier. Flex uses discounted rates when available.",
53+
})}
54+
</div>
55+
</div>
56+
)
57+
}
58+
59+
export default ServiceTier

0 commit comments

Comments
 (0)