@@ -11,6 +11,7 @@ import {
1111 type ReasoningEffort ,
1212 type VerbosityLevel ,
1313 type ReasoningEffortWithMinimal ,
14+ type ServiceTier ,
1415} from "@roo-code/types"
1516
1617import type { ApiHandlerOptions } from "../../shared/api"
@@ -36,6 +37,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
3637 private lastResponseId : string | undefined
3738 private responseIdPromise : Promise < string | undefined > | undefined
3839 private responseIdResolver : ( ( value : string | undefined ) => void ) | undefined
40+ // Resolved service tier from Responses API (actual tier used by OpenAI)
41+ private lastServiceTier : ServiceTier | undefined
3942
4043 // Event types handled by the shared event processor to avoid duplication
4144 private readonly coreHandledEventTypes = new Set < string > ( [
@@ -90,10 +93,15 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
9093 const cacheReadTokens =
9194 usage . cache_read_input_tokens ?? usage . cache_read_tokens ?? usage . cached_tokens ?? cachedFromDetails ?? 0
9295
96+ // Resolve effective tier: prefer actual tier from response; otherwise requested tier
97+ const effectiveTier =
98+ this . lastServiceTier || ( this . options . openAiNativeServiceTier as ServiceTier | undefined ) || undefined
99+ const effectiveInfo = this . applyServiceTierPricing ( model . info , effectiveTier )
100+
93101 // Pass total input tokens directly to calculateApiCostOpenAI
94102 // The function handles subtracting both cache reads and writes internally (see shared/cost.ts:46)
95103 const totalCost = calculateApiCostOpenAI (
96- model . info ,
104+ effectiveInfo ,
97105 totalInputTokens ,
98106 totalOutputTokens ,
99107 cacheWriteTokens ,
@@ -146,6 +154,9 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
146154 messages : Anthropic . Messages . MessageParam [ ] ,
147155 metadata ?: ApiHandlerCreateMessageMetadata ,
148156 ) : ApiStream {
157+ // Reset resolved tier for this request; will be set from response if present
158+ this . lastServiceTier = undefined
159+
149160 // Use Responses API for ALL models
150161 const { verbosity, reasoning } = this . getModel ( )
151162
@@ -233,8 +244,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
233244 previous_response_id ?: string
234245 store ?: boolean
235246 instructions ?: string
247+ service_tier ?: ServiceTier
236248 }
237249
250+ // Validate requested tier against model support; if not supported, omit.
251+ const requestedTier = ( this . options . openAiNativeServiceTier as ServiceTier | undefined ) || undefined
252+ const allowedTierNames = new Set ( model . info . tiers ?. map ( ( t ) => t . name ) . filter ( Boolean ) || [ ] )
253+
238254 const body : Gpt5RequestBody = {
239255 model : model . id ,
240256 input : formattedInput ,
@@ -262,6 +278,11 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
262278 // Use the per-request reserved output computed by Roo (params.maxTokens from getModelParams).
263279 ...( model . maxTokens ? { max_output_tokens : model . maxTokens } : { } ) ,
264280 ...( requestPreviousResponseId && { previous_response_id : requestPreviousResponseId } ) ,
281+ // Include tier when selected and supported by the model, or when explicitly "default"
282+ ...( requestedTier &&
283+ ( requestedTier === "default" || allowedTierNames . has ( requestedTier ) ) && {
284+ service_tier : requestedTier ,
285+ } ) ,
265286 }
266287
267288 // Include text.verbosity only when the model explicitly supports it
@@ -636,6 +657,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
636657 if ( parsed . response ?. id ) {
637658 this . resolveResponseId ( parsed . response . id )
638659 }
660+ // Capture resolved service tier if present
661+ if ( parsed . response ?. service_tier ) {
662+ this . lastServiceTier = parsed . response . service_tier as ServiceTier
663+ }
639664
640665 // Delegate standard event types to the shared processor to avoid duplication
641666 if ( parsed ?. type && this . coreHandledEventTypes . has ( parsed . type ) ) {
@@ -927,6 +952,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
927952 if ( parsed . response ?. id ) {
928953 this . resolveResponseId ( parsed . response . id )
929954 }
955+ // Capture resolved service tier if present
956+ if ( parsed . response ?. service_tier ) {
957+ this . lastServiceTier = parsed . response . service_tier as ServiceTier
958+ }
930959
931960 // Check if the done event contains the complete output (as a fallback)
932961 if (
@@ -1051,6 +1080,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
10511080 if ( event ?. response ?. id ) {
10521081 this . resolveResponseId ( event . response . id )
10531082 }
1083+ // Capture resolved service tier when available
1084+ if ( event ?. response ?. service_tier ) {
1085+ this . lastServiceTier = event . response . service_tier as ServiceTier
1086+ }
10541087
10551088 // Handle known streaming text deltas
10561089 if ( event ?. type === "response.text.delta" || event ?. type === "response.output_text.delta" ) {
@@ -1141,6 +1174,26 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
11411174 return info . reasoningEffort as ReasoningEffortWithMinimal | undefined
11421175 }
11431176
1177+ /**
1178+ * Returns a shallow-cloned ModelInfo with pricing overridden for the given tier, if available.
1179+ * If no tier or no overrides exist, the original ModelInfo is returned.
1180+ */
1181+ private applyServiceTierPricing ( info : ModelInfo , tier ?: ServiceTier ) : ModelInfo {
1182+ if ( ! tier || tier === "default" ) return info
1183+
1184+ // Find the tier with matching name in the tiers array
1185+ const tierInfo = info . tiers ?. find ( ( t ) => t . name === tier )
1186+ if ( ! tierInfo ) return info
1187+
1188+ return {
1189+ ...info ,
1190+ inputPrice : tierInfo . inputPrice ?? info . inputPrice ,
1191+ outputPrice : tierInfo . outputPrice ?? info . outputPrice ,
1192+ cacheReadsPrice : tierInfo . cacheReadsPrice ?? info . cacheReadsPrice ,
1193+ cacheWritesPrice : tierInfo . cacheWritesPrice ?? info . cacheWritesPrice ,
1194+ }
1195+ }
1196+
11441197 // Removed isResponsesApiModel method as ALL models now use the Responses API
11451198
11461199 override getModel ( ) {
@@ -1214,6 +1267,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
12141267 store : false , // Don't store prompt completions
12151268 }
12161269
1270+ // Include service tier if selected and supported
1271+ const requestedTier = ( this . options . openAiNativeServiceTier as ServiceTier | undefined ) || undefined
1272+ const allowedTierNames = new Set ( model . info . tiers ?. map ( ( t ) => t . name ) . filter ( Boolean ) || [ ] )
1273+ if ( requestedTier && ( requestedTier === "default" || allowedTierNames . has ( requestedTier ) ) ) {
1274+ requestBody . service_tier = requestedTier
1275+ }
1276+
12171277 // Add reasoning if supported
12181278 if ( reasoningEffort ) {
12191279 requestBody . reasoning = {
0 commit comments