Skip to content

Commit 5a39c3a

Browse files
committed
feat: OpenAI Responses API service tiers (flex/priority) — pricing metadata, handler support, and UI selection
1 parent db4ddb1 commit 5a39c3a

File tree

4 files changed

+122
-2
lines changed

4 files changed

+122
-2
lines changed

packages/types/src/model.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,13 @@ export const verbosityLevelsSchema = z.enum(verbosityLevels)
2828

2929
export type VerbosityLevel = z.infer<typeof verbosityLevelsSchema>
3030

31+
/**
32+
* Service tiers (OpenAI Responses API)
33+
*/
34+
export const serviceTiers = ["default", "flex", "priority"] as const
35+
export const serviceTierSchema = z.enum(serviceTiers)
36+
export type ServiceTier = z.infer<typeof serviceTierSchema>
37+
3138
/**
3239
* ModelParameter
3340
*/
@@ -69,9 +76,15 @@ export const modelInfoSchema = z.object({
6976
minTokensPerCachePoint: z.number().optional(),
7077
maxCachePoints: z.number().optional(),
7178
cachableFields: z.array(z.string()).optional(),
79+
/**
80+
* Service tiers with pricing information.
81+
* Each tier can have a name (for OpenAI service tiers) and pricing overrides.
82+
* The top-level input/output/cache* fields represent the default/standard tier.
83+
*/
7284
tiers: z
7385
.array(
7486
z.object({
87+
name: serviceTierSchema.optional(), // Service tier name (flex, priority, etc.)
7588
contextWindow: z.number(),
7689
inputPrice: z.number().optional(),
7790
outputPrice: z.number().optional(),

packages/types/src/provider-settings.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { z } from "zod"
22

3-
import { modelInfoSchema, reasoningEffortWithMinimalSchema, verbosityLevelsSchema } from "./model.js"
3+
import { modelInfoSchema, reasoningEffortWithMinimalSchema, verbosityLevelsSchema, serviceTierSchema } from "./model.js"
44
import { codebaseIndexProviderSchema } from "./codebase-index.js"
55
import {
66
anthropicModels,
@@ -224,6 +224,9 @@ const geminiCliSchema = apiModelIdProviderModelSchema.extend({
224224
const openAiNativeSchema = apiModelIdProviderModelSchema.extend({
225225
openAiNativeApiKey: z.string().optional(),
226226
openAiNativeBaseUrl: z.string().optional(),
227+
// OpenAI Responses API service tier for openai-native provider only.
228+
// UI should only expose this when the selected model supports flex/priority.
229+
openAiNativeServiceTier: serviceTierSchema.optional(),
227230
})
228231

229232
const mistralSchema = apiModelIdProviderModelSchema.extend({

packages/types/src/providers/openai.ts

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@ export const openAiNativeModels = {
3232
// supportsVerbosity is a new capability; ensure ModelInfo includes it
3333
supportsVerbosity: true,
3434
supportsTemperature: false,
35+
allowedServiceTiers: ["flex", "priority"],
36+
serviceTierPricing: {
37+
flex: { inputPrice: 0.625, outputPrice: 5.0, cacheReadsPrice: 0.0625 },
38+
priority: { inputPrice: 2.5, outputPrice: 20.0, cacheReadsPrice: 0.25 },
39+
},
3540
},
3641
"gpt-5-mini-2025-08-07": {
3742
maxTokens: 128000,
@@ -46,6 +51,11 @@ export const openAiNativeModels = {
4651
description: "GPT-5 Mini: A faster, more cost-efficient version of GPT-5 for well-defined tasks",
4752
supportsVerbosity: true,
4853
supportsTemperature: false,
54+
allowedServiceTiers: ["flex", "priority"],
55+
serviceTierPricing: {
56+
flex: { inputPrice: 0.125, outputPrice: 1.0, cacheReadsPrice: 0.0125 },
57+
priority: { inputPrice: 0.45, outputPrice: 3.6, cacheReadsPrice: 0.045 },
58+
},
4959
},
5060
"gpt-5-nano-2025-08-07": {
5161
maxTokens: 128000,
@@ -60,6 +70,10 @@ export const openAiNativeModels = {
6070
description: "GPT-5 Nano: Fastest, most cost-efficient version of GPT-5",
6171
supportsVerbosity: true,
6272
supportsTemperature: false,
73+
allowedServiceTiers: ["flex"],
74+
serviceTierPricing: {
75+
flex: { inputPrice: 0.025, outputPrice: 0.2, cacheReadsPrice: 0.0025 },
76+
},
6377
},
6478
"gpt-4.1": {
6579
maxTokens: 32_768,
@@ -70,6 +84,10 @@ export const openAiNativeModels = {
7084
outputPrice: 8,
7185
cacheReadsPrice: 0.5,
7286
supportsTemperature: true,
87+
allowedServiceTiers: ["priority"],
88+
serviceTierPricing: {
89+
priority: { inputPrice: 3.5, outputPrice: 14.0, cacheReadsPrice: 0.875 },
90+
},
7391
},
7492
"gpt-4.1-mini": {
7593
maxTokens: 32_768,
@@ -80,6 +98,10 @@ export const openAiNativeModels = {
8098
outputPrice: 1.6,
8199
cacheReadsPrice: 0.1,
82100
supportsTemperature: true,
101+
allowedServiceTiers: ["priority"],
102+
serviceTierPricing: {
103+
priority: { inputPrice: 0.7, outputPrice: 2.8, cacheReadsPrice: 0.175 },
104+
},
83105
},
84106
"gpt-4.1-nano": {
85107
maxTokens: 32_768,
@@ -90,6 +112,10 @@ export const openAiNativeModels = {
90112
outputPrice: 0.4,
91113
cacheReadsPrice: 0.025,
92114
supportsTemperature: true,
115+
allowedServiceTiers: ["priority"],
116+
serviceTierPricing: {
117+
priority: { inputPrice: 0.2, outputPrice: 0.8, cacheReadsPrice: 0.05 },
118+
},
93119
},
94120
o3: {
95121
maxTokens: 100_000,
@@ -102,6 +128,11 @@ export const openAiNativeModels = {
102128
supportsReasoningEffort: true,
103129
reasoningEffort: "medium",
104130
supportsTemperature: false,
131+
allowedServiceTiers: ["flex", "priority"],
132+
serviceTierPricing: {
133+
flex: { inputPrice: 1.0, outputPrice: 4.0, cacheReadsPrice: 0.25 },
134+
priority: { inputPrice: 3.5, outputPrice: 14.0, cacheReadsPrice: 0.875 },
135+
},
105136
},
106137
"o3-high": {
107138
maxTokens: 100_000,
@@ -136,6 +167,11 @@ export const openAiNativeModels = {
136167
supportsReasoningEffort: true,
137168
reasoningEffort: "medium",
138169
supportsTemperature: false,
170+
allowedServiceTiers: ["flex", "priority"],
171+
serviceTierPricing: {
172+
flex: { inputPrice: 0.55, outputPrice: 2.2, cacheReadsPrice: 0.138 },
173+
priority: { inputPrice: 2.0, outputPrice: 8.0, cacheReadsPrice: 0.5 },
174+
},
139175
},
140176
"o4-mini-high": {
141177
maxTokens: 100_000,
@@ -232,6 +268,10 @@ export const openAiNativeModels = {
232268
outputPrice: 10,
233269
cacheReadsPrice: 1.25,
234270
supportsTemperature: true,
271+
allowedServiceTiers: ["priority"],
272+
serviceTierPricing: {
273+
priority: { inputPrice: 4.25, outputPrice: 17.0, cacheReadsPrice: 2.125 },
274+
},
235275
},
236276
"gpt-4o-mini": {
237277
maxTokens: 16_384,
@@ -242,6 +282,10 @@ export const openAiNativeModels = {
242282
outputPrice: 0.6,
243283
cacheReadsPrice: 0.075,
244284
supportsTemperature: true,
285+
allowedServiceTiers: ["priority"],
286+
serviceTierPricing: {
287+
priority: { inputPrice: 0.25, outputPrice: 1.0, cacheReadsPrice: 0.125 },
288+
},
245289
},
246290
"codex-mini-latest": {
247291
maxTokens: 16_384,

src/api/providers/openai-native.ts

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import {
1111
type ReasoningEffort,
1212
type VerbosityLevel,
1313
type ReasoningEffortWithMinimal,
14+
type ServiceTier,
1415
} from "@roo-code/types"
1516

1617
import type { ApiHandlerOptions } from "../../shared/api"
@@ -36,6 +37,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
3637
private lastResponseId: string | undefined
3738
private responseIdPromise: Promise<string | undefined> | undefined
3839
private responseIdResolver: ((value: string | undefined) => void) | undefined
40+
// Resolved service tier from Responses API (actual tier used by OpenAI)
41+
private lastServiceTier: ServiceTier | undefined
3942

4043
// Event types handled by the shared event processor to avoid duplication
4144
private readonly coreHandledEventTypes = new Set<string>([
@@ -90,10 +93,15 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
9093
const cacheReadTokens =
9194
usage.cache_read_input_tokens ?? usage.cache_read_tokens ?? usage.cached_tokens ?? cachedFromDetails ?? 0
9295

96+
// Resolve effective tier: prefer actual tier from response; otherwise requested tier
97+
const effectiveTier =
98+
this.lastServiceTier || (this.options.openAiNativeServiceTier as ServiceTier | undefined) || undefined
99+
const effectiveInfo = this.applyServiceTierPricing(model.info, effectiveTier)
100+
93101
// Pass total input tokens directly to calculateApiCostOpenAI
94102
// The function handles subtracting both cache reads and writes internally (see shared/cost.ts:46)
95103
const totalCost = calculateApiCostOpenAI(
96-
model.info,
104+
effectiveInfo,
97105
totalInputTokens,
98106
totalOutputTokens,
99107
cacheWriteTokens,
@@ -146,6 +154,9 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
146154
messages: Anthropic.Messages.MessageParam[],
147155
metadata?: ApiHandlerCreateMessageMetadata,
148156
): ApiStream {
157+
// Reset resolved tier for this request; will be set from response if present
158+
this.lastServiceTier = undefined
159+
149160
// Use Responses API for ALL models
150161
const { verbosity, reasoning } = this.getModel()
151162

@@ -233,8 +244,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
233244
previous_response_id?: string
234245
store?: boolean
235246
instructions?: string
247+
service_tier?: ServiceTier
236248
}
237249

250+
// Validate requested tier against model support; if not supported, omit.
251+
const requestedTier = (this.options.openAiNativeServiceTier as ServiceTier | undefined) || undefined
252+
const allowedTierNames = new Set(model.info.tiers?.map((t) => t.name).filter(Boolean) || [])
253+
238254
const body: Gpt5RequestBody = {
239255
model: model.id,
240256
input: formattedInput,
@@ -262,6 +278,11 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
262278
// Use the per-request reserved output computed by Roo (params.maxTokens from getModelParams).
263279
...(model.maxTokens ? { max_output_tokens: model.maxTokens } : {}),
264280
...(requestPreviousResponseId && { previous_response_id: requestPreviousResponseId }),
281+
// Include tier when selected and supported by the model, or when explicitly "default"
282+
...(requestedTier &&
283+
(requestedTier === "default" || allowedTierNames.has(requestedTier)) && {
284+
service_tier: requestedTier,
285+
}),
265286
}
266287

267288
// Include text.verbosity only when the model explicitly supports it
@@ -636,6 +657,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
636657
if (parsed.response?.id) {
637658
this.resolveResponseId(parsed.response.id)
638659
}
660+
// Capture resolved service tier if present
661+
if (parsed.response?.service_tier) {
662+
this.lastServiceTier = parsed.response.service_tier as ServiceTier
663+
}
639664

640665
// Delegate standard event types to the shared processor to avoid duplication
641666
if (parsed?.type && this.coreHandledEventTypes.has(parsed.type)) {
@@ -927,6 +952,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
927952
if (parsed.response?.id) {
928953
this.resolveResponseId(parsed.response.id)
929954
}
955+
// Capture resolved service tier if present
956+
if (parsed.response?.service_tier) {
957+
this.lastServiceTier = parsed.response.service_tier as ServiceTier
958+
}
930959

931960
// Check if the done event contains the complete output (as a fallback)
932961
if (
@@ -1051,6 +1080,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
10511080
if (event?.response?.id) {
10521081
this.resolveResponseId(event.response.id)
10531082
}
1083+
// Capture resolved service tier when available
1084+
if (event?.response?.service_tier) {
1085+
this.lastServiceTier = event.response.service_tier as ServiceTier
1086+
}
10541087

10551088
// Handle known streaming text deltas
10561089
if (event?.type === "response.text.delta" || event?.type === "response.output_text.delta") {
@@ -1141,6 +1174,26 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
11411174
return info.reasoningEffort as ReasoningEffortWithMinimal | undefined
11421175
}
11431176

1177+
/**
1178+
* Returns a shallow-cloned ModelInfo with pricing overridden for the given tier, if available.
1179+
* If no tier or no overrides exist, the original ModelInfo is returned.
1180+
*/
1181+
private applyServiceTierPricing(info: ModelInfo, tier?: ServiceTier): ModelInfo {
1182+
if (!tier || tier === "default") return info
1183+
1184+
// Find the tier with matching name in the tiers array
1185+
const tierInfo = info.tiers?.find((t) => t.name === tier)
1186+
if (!tierInfo) return info
1187+
1188+
return {
1189+
...info,
1190+
inputPrice: tierInfo.inputPrice ?? info.inputPrice,
1191+
outputPrice: tierInfo.outputPrice ?? info.outputPrice,
1192+
cacheReadsPrice: tierInfo.cacheReadsPrice ?? info.cacheReadsPrice,
1193+
cacheWritesPrice: tierInfo.cacheWritesPrice ?? info.cacheWritesPrice,
1194+
}
1195+
}
1196+
11441197
// Removed isResponsesApiModel method as ALL models now use the Responses API
11451198

11461199
override getModel() {
@@ -1214,6 +1267,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
12141267
store: false, // Don't store prompt completions
12151268
}
12161269

1270+
// Include service tier if selected and supported
1271+
const requestedTier = (this.options.openAiNativeServiceTier as ServiceTier | undefined) || undefined
1272+
const allowedTierNames = new Set(model.info.tiers?.map((t) => t.name).filter(Boolean) || [])
1273+
if (requestedTier && (requestedTier === "default" || allowedTierNames.has(requestedTier))) {
1274+
requestBody.service_tier = requestedTier
1275+
}
1276+
12171277
// Add reasoning if supported
12181278
if (reasoningEffort) {
12191279
requestBody.reasoning = {

0 commit comments

Comments
 (0)