Flex tier

mechanicmuthu · mechanicmuthu · commit 53389221fed1 · 2025-08-12T13:14:06.000+05:30
diff --git a/packages/types/src/model.ts b/packages/types/src/model.ts
@@ -54,6 +54,15 @@ export const modelInfoSchema = z.object({
 	outputPrice: z.number().optional(),
 	cacheWritesPrice: z.number().optional(),
 	cacheReadsPrice: z.number().optional(),
+	// Optional discounted pricing for flex service tier
+	flexPrice: z
+		.object({
+			inputPrice: z.number().optional(),
+			outputPrice: z.number().optional(),
+			cacheWritesPrice: z.number().optional(),
+			cacheReadsPrice: z.number().optional(),
+		})
+		.optional(),
 	description: z.string().optional(),
 	reasoningEffort: reasoningEffortsSchema.optional(),
 	minTokensPerCachePoint: z.number().optional(),
diff --git a/packages/types/src/provider-settings.ts b/packages/types/src/provider-settings.ts
@@ -88,6 +88,9 @@ const baseProviderSettingsSchema = z.object({
 
 	// Model verbosity.
 	verbosity: verbosityLevelsSchema.optional(),
+
+	// Service tier selection for providers that support tiered pricing (e.g. OpenAI flex tier)
+	serviceTier: z.enum(["auto", "default", "flex"]).optional(),
 })
 
 // Several of the providers share common model config properties.
diff --git a/packages/types/src/providers/openai.ts b/packages/types/src/providers/openai.ts
@@ -16,6 +16,11 @@ export const openAiNativeModels = {
 		inputPrice: 1.25,
 		outputPrice: 10.0,
 		cacheReadsPrice: 0.13,
+		flexPrice: {
+			inputPrice: 0.625,
+			outputPrice: 5.0,
+			cacheReadsPrice: 0.063,
+		},
 		description: "GPT-5: The best model for coding and agentic tasks across domains",
 		// supportsVerbosity is a new capability; ensure ModelInfo includes it
 		supportsVerbosity: true,
@@ -30,6 +35,11 @@ export const openAiNativeModels = {
 		inputPrice: 0.25,
 		outputPrice: 2.0,
 		cacheReadsPrice: 0.03,
+		flexPrice: {
+			inputPrice: 0.125,
+			outputPrice: 1.0,
+			cacheReadsPrice: 0.013,
+		},
 		description: "GPT-5 Mini: A faster, more cost-efficient version of GPT-5 for well-defined tasks",
 		supportsVerbosity: true,
 	},
@@ -43,6 +53,11 @@ export const openAiNativeModels = {
 		inputPrice: 0.05,
 		outputPrice: 0.4,
 		cacheReadsPrice: 0.01,
+		flexPrice: {
+			inputPrice: 0.025,
+			outputPrice: 0.2,
+			cacheReadsPrice: 0.003,
+		},
 		description: "GPT-5 Nano: Fastest, most cost-efficient version of GPT-5",
 		supportsVerbosity: true,
 	},
@@ -81,6 +96,11 @@ export const openAiNativeModels = {
 		inputPrice: 2.0,
 		outputPrice: 8.0,
 		cacheReadsPrice: 0.5,
+		flexPrice: {
+			inputPrice: 1.0,
+			outputPrice: 4.0,
+			cacheReadsPrice: 0.25,
+		},
 		supportsReasoningEffort: true,
 		reasoningEffort: "medium",
 	},
@@ -112,6 +132,11 @@ export const openAiNativeModels = {
 		inputPrice: 1.1,
 		outputPrice: 4.4,
 		cacheReadsPrice: 0.275,
+		flexPrice: {
+			inputPrice: 0.55,
+			outputPrice: 2.2,
+			cacheReadsPrice: 0.138,
+		},
 		supportsReasoningEffort: true,
 		reasoningEffort: "medium",
 	},
diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts
@@ -74,6 +74,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			totalOutputTokens,
 			cacheWriteTokens || 0,
 			cacheReadTokens || 0,
+			this.options.serviceTier,
 		)
 
 		return {
@@ -1180,6 +1181,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			outputTokens,
 			cacheWriteTokens || 0,
 			cacheReadTokens || 0,
+			this.options.serviceTier,
 		)
 
 		yield {
diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
@@ -158,6 +158,10 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 				...(reasoning && reasoning),
 			}
 
+			if (this.options.serviceTier && this.options.serviceTier !== "auto") {
+				;(requestOptions as any).service_tier = this.options.serviceTier
+			}
+
 			// Add max_tokens if needed
 			this.addMaxTokensIfNeeded(requestOptions, modelInfo)
 
@@ -220,6 +224,10 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 						: [systemMessage, ...convertToOpenAiMessages(messages)],
 			}
 
+			if (this.options.serviceTier && this.options.serviceTier !== "auto") {
+				;(requestOptions as any).service_tier = this.options.serviceTier
+			}
+
 			// Add max_tokens if needed
 			this.addMaxTokensIfNeeded(requestOptions, modelInfo)
 
@@ -265,6 +273,10 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 				messages: [{ role: "user", content: prompt }],
 			}
 
+			if (this.options.serviceTier && this.options.serviceTier !== "auto") {
+				;(requestOptions as any).service_tier = this.options.serviceTier
+			}
+
 			// Add max_tokens if needed
 			this.addMaxTokensIfNeeded(requestOptions, modelInfo)
 
@@ -309,6 +321,10 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 				temperature: undefined,
 			}
 
+			if (this.options.serviceTier && this.options.serviceTier !== "auto") {
+				;(requestOptions as any).service_tier = this.options.serviceTier
+			}
+
 			// O3 family models do not support the deprecated max_tokens parameter
 			// but they do support max_completion_tokens (the modern OpenAI parameter)
 			// This allows O3 models to limit response length when includeMaxTokens is enabled
@@ -334,6 +350,10 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 				temperature: undefined,
 			}
 
+			if (this.options.serviceTier && this.options.serviceTier !== "auto") {
+				;(requestOptions as any).service_tier = this.options.serviceTier
+			}
+
 			// O3 family models do not support the deprecated max_tokens parameter
 			// but they do support max_completion_tokens (the modern OpenAI parameter)
 			// This allows O3 models to limit response length when includeMaxTokens is enabled
diff --git a/src/shared/cost.ts b/src/shared/cost.ts
@@ -40,13 +40,20 @@ export function calculateApiCostOpenAI(
 	outputTokens: number,
 	cacheCreationInputTokens?: number,
 	cacheReadInputTokens?: number,
+	serviceTier?: "auto" | "default" | "flex",
 ): number {
 	const cacheCreationInputTokensNum = cacheCreationInputTokens || 0
 	const cacheReadInputTokensNum = cacheReadInputTokens || 0
 	const nonCachedInputTokens = Math.max(0, inputTokens - cacheCreationInputTokensNum - cacheReadInputTokensNum)
 
+	// If flex tier selected and model exposes flexPrice, override pricing fields.
+	const pricingInfo =
+		serviceTier === "flex" && (modelInfo as any).flexPrice
+			? { ...modelInfo, ...(modelInfo as any).flexPrice }
+			: modelInfo
+
 	return calculateApiCostInternal(
-		modelInfo,
+		pricingInfo,
 		nonCachedInputTokens,
 		outputTokens,
 		cacheCreationInputTokensNum,
diff --git a/src/utils/__tests__/cost.spec.ts b/src/utils/__tests__/cost.spec.ts
@@ -107,6 +107,12 @@ describe("Cost Utility", () => {
 			outputPrice: 15.0, // $15 per million tokens
 			cacheWritesPrice: 3.75, // $3.75 per million tokens
 			cacheReadsPrice: 0.3, // $0.30 per million tokens
+			flexPrice: {
+				inputPrice: 1.5,
+				outputPrice: 7.5,
+				cacheWritesPrice: 1.875,
+				cacheReadsPrice: 0.15,
+			},
 		}
 
 		it("should calculate basic input/output costs correctly", () => {
@@ -189,5 +195,21 @@ describe("Cost Utility", () => {
 			// Total: 0.003 + 0.0075 = 0.0105
 			expect(cost).toBe(0.0105)
 		})
+
+		it("should apply flex pricing when serviceTier=flex and flexPrice present", () => {
+			const costDefault = calculateApiCostOpenAI(mockModelInfo, 1000, 500, undefined, undefined, "default")
+			const costFlex = calculateApiCostOpenAI(mockModelInfo, 1000, 500, undefined, undefined, "flex")
+
+			// Default pricing: input (3 / 1e6 * 1000) + output (15 /1e6 * 500) = 0.0105
+			// Flex pricing: input (1.5 /1e6 * 1000) + output (7.5 /1e6 * 500) = 0.00525
+			expect(costDefault).toBeCloseTo(0.0105, 6)
+			expect(costFlex).toBeCloseTo(0.00525, 6)
+		})
+
+		it("should fall back to standard pricing if flex selected but no flexPrice", () => {
+			const noFlexModel: ModelInfo = { ...mockModelInfo, flexPrice: undefined }
+			const cost = calculateApiCostOpenAI(noFlexModel, 1000, 500, undefined, undefined, "flex")
+			expect(cost).toBeCloseTo(0.0105, 6)
+		})
 	})
 })
diff --git a/webview-ui/src/components/settings/ApiOptions.tsx b/webview-ui/src/components/settings/ApiOptions.tsx
@@ -94,6 +94,7 @@ import { ModelInfoView } from "./ModelInfoView"
 import { ApiErrorMessage } from "./ApiErrorMessage"
 import { ThinkingBudget } from "./ThinkingBudget"
 import { Verbosity } from "./Verbosity"
+import { ServiceTier } from "./ServiceTier"
 import { DiffSettingsControl } from "./DiffSettingsControl"
 import { TodoListSettingsControl } from "./TodoListSettingsControl"
 import { TemperatureControl } from "./TemperatureControl"
@@ -628,6 +629,13 @@ const ApiOptions = ({
 				</>
 			)}
 
+			{/* Service Tier - conditional on model supporting flex pricing */}
+			<ServiceTier
+				apiConfiguration={apiConfiguration}
+				setApiConfigurationField={setApiConfigurationField}
+				modelId={selectedModelId}
+			/>
+
 			<ThinkingBudget
 				key={`${selectedProvider}-${selectedModelId}`}
 				apiConfiguration={apiConfiguration}
diff --git a/webview-ui/src/components/settings/ModelInfoView.tsx b/webview-ui/src/components/settings/ModelInfoView.tsx
@@ -14,6 +14,7 @@ type ModelInfoViewProps = {
 	modelInfo?: ModelInfo
 	isDescriptionExpanded: boolean
 	setIsDescriptionExpanded: (isExpanded: boolean) => void
+	serviceTier?: "auto" | "default" | "flex"
 }
 
 export const ModelInfoView = ({
@@ -22,9 +23,27 @@ export const ModelInfoView = ({
 	modelInfo,
 	isDescriptionExpanded,
 	setIsDescriptionExpanded,
+	serviceTier,
 }: ModelInfoViewProps) => {
 	const { t } = useAppTranslation()
 
+	// Calculate effective pricing based on service tier
+	const getEffectivePricing = (modelInfo: ModelInfo) => {
+		if (serviceTier === "flex" && (modelInfo as any).flexPrice) {
+			const flexPrice = (modelInfo as any).flexPrice
+			return {
+				...modelInfo,
+				inputPrice: flexPrice.inputPrice ?? modelInfo.inputPrice,
+				outputPrice: flexPrice.outputPrice ?? modelInfo.outputPrice,
+				cacheReadsPrice: flexPrice.cacheReadsPrice ?? modelInfo.cacheReadsPrice,
+				cacheWritesPrice: flexPrice.cacheWritesPrice ?? modelInfo.cacheWritesPrice,
+			}
+		}
+		return modelInfo
+	}
+
+	const effectiveModelInfo = modelInfo ? getEffectivePricing(modelInfo) : modelInfo
+
 	const infoItems = [
 		<ModelInfoSupportsItem
 			isSupported={modelInfo?.supportsImages ?? false}
@@ -47,28 +66,28 @@ export const ModelInfoView = ({
 				{modelInfo.maxTokens?.toLocaleString()} tokens
 			</>
 		),
-		modelInfo?.inputPrice !== undefined && modelInfo.inputPrice > 0 && (
+		effectiveModelInfo?.inputPrice !== undefined && effectiveModelInfo.inputPrice > 0 && (
 			<>
 				<span className="font-medium">{t("settings:modelInfo.inputPrice")}:</span>{" "}
-				{formatPrice(modelInfo.inputPrice)} / 1M tokens
+				{formatPrice(effectiveModelInfo.inputPrice)} / 1M tokens
 			</>
 		),
-		modelInfo?.outputPrice !== undefined && modelInfo.outputPrice > 0 && (
+		effectiveModelInfo?.outputPrice !== undefined && effectiveModelInfo.outputPrice > 0 && (
 			<>
 				<span className="font-medium">{t("settings:modelInfo.outputPrice")}:</span>{" "}
-				{formatPrice(modelInfo.outputPrice)} / 1M tokens
+				{formatPrice(effectiveModelInfo.outputPrice)} / 1M tokens
 			</>
 		),
-		modelInfo?.supportsPromptCache && modelInfo.cacheReadsPrice && (
+		modelInfo?.supportsPromptCache && effectiveModelInfo?.cacheReadsPrice && (
 			<>
 				<span className="font-medium">{t("settings:modelInfo.cacheReadsPrice")}:</span>{" "}
-				{formatPrice(modelInfo.cacheReadsPrice || 0)} / 1M tokens
+				{formatPrice(effectiveModelInfo.cacheReadsPrice || 0)} / 1M tokens
 			</>
 		),
-		modelInfo?.supportsPromptCache && modelInfo.cacheWritesPrice && (
+		modelInfo?.supportsPromptCache && effectiveModelInfo?.cacheWritesPrice && (
 			<>
 				<span className="font-medium">{t("settings:modelInfo.cacheWritesPrice")}:</span>{" "}
-				{formatPrice(modelInfo.cacheWritesPrice || 0)} / 1M tokens
+				{formatPrice(effectiveModelInfo.cacheWritesPrice || 0)} / 1M tokens
 			</>
 		),
 		apiProvider === "gemini" && (
diff --git a/webview-ui/src/components/settings/ServiceTier.tsx b/webview-ui/src/components/settings/ServiceTier.tsx
@@ -0,0 +1,59 @@
+import { useEffect, useMemo } from "react"
+import { VSCodeDropdown, VSCodeOption } from "@vscode/webview-ui-toolkit/react"
+import { useAppTranslation } from "@src/i18n/TranslationContext"
+import type { ProviderSettings, ModelInfo } from "@roo-code/types"
+
+type Props = {
+	apiConfiguration: ProviderSettings
+	setApiConfigurationField: (field: keyof ProviderSettings, value: any) => void
+	modelInfo?: ModelInfo
+	modelId?: string
+}
+
+// Models that currently have flex pricing
+const FLEX_COMPATIBLE_MODELS = ["gpt-5", "gpt-5-mini", "gpt-5-nano", "o3", "o4-mini"]
+const SERVICE_TIERS: Array<"auto" | "default" | "flex"> = ["auto", "default", "flex"]
+
+export const ServiceTier = ({ apiConfiguration, setApiConfigurationField, modelId }: Props) => {
+	const { t } = useAppTranslation()
+	const effectiveModelId = modelId || apiConfiguration.openAiModelId || ""
+
+	const isSupported = useMemo(
+		() => !!effectiveModelId && FLEX_COMPATIBLE_MODELS.some((m) => effectiveModelId.includes(m)),
+		[effectiveModelId],
+	)
+
+	// Initialize to auto when supported and unset; clear when unsupported
+	useEffect(() => {
+		if (isSupported && !apiConfiguration.serviceTier) {
+			setApiConfigurationField("serviceTier", "auto")
+		} else if (!isSupported && apiConfiguration.serviceTier) {
+			setApiConfigurationField("serviceTier", undefined)
+		}
+	}, [isSupported, apiConfiguration.serviceTier, setApiConfigurationField])
+
+	if (!isSupported) return null
+
+	return (
+		<div className="flex flex-col gap-1">
+			<label className="block font-medium mb-1">{t("settings:providers.serviceTier.label")}</label>
+			<VSCodeDropdown
+				value={apiConfiguration.serviceTier || "auto"}
+				onChange={(e: any) => setApiConfigurationField("serviceTier", e.target.value)}
+				className="w-48">
+				{SERVICE_TIERS.map((tier) => (
+					<VSCodeOption key={tier} value={tier}>
+						{t(`settings:providers.serviceTier.${tier}` as any)}
+					</VSCodeOption>
+				))}
+			</VSCodeDropdown>
+			<div className="text-sm text-vscode-descriptionForeground">
+				{t("settings:providers.serviceTier.description", {
+					defaultValue: "Select pricing tier. Flex uses discounted rates when available.",
+				})}
+			</div>
+		</div>
+	)
+}
+
+export default ServiceTier
diff --git a/webview-ui/src/i18n/locales/en/settings.json b/webview-ui/src/i18n/locales/en/settings.json
@@ -377,6 +377,13 @@
 				"learnMore": "Learn more about provider routing"
 			}
 		},
+		"serviceTier": {
+			"label": "Service Tier",
+			"auto": "Auto",
+			"default": "Default",
+			"flex": "Flex",
+			"description": "Select pricing tier. Flex uses discounted rates when available."
+		},
 		"customModel": {
 			"capabilities": "Configure the capabilities and pricing for your custom OpenAI-compatible model. Be careful when specifying the model capabilities, as they can affect how Roo Code performs.",
 			"maxTokens": {

Original file line number	Diff line number	Diff line change
`@@ -74,6 +74,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio`
`74`	`74`	`totalOutputTokens,`
`75`	`75`	`cacheWriteTokens \|\| 0,`
`76`	`76`	`cacheReadTokens \|\| 0,`
	`77`	`+ this.options.serviceTier,`
`77`	`78`	`)`
`78`	`79`
`79`	`80`	`return {`
`@@ -1180,6 +1181,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio`
`1180`	`1181`	`outputTokens,`
`1181`	`1182`	`cacheWriteTokens \|\| 0,`
`1182`	`1183`	`cacheReadTokens \|\| 0,`
	`1184`	`+ this.options.serviceTier,`
`1183`	`1185`	`)`
`1184`	`1186`
`1185`	`1187`	`yield {`