@@ -11,7 +11,6 @@ import {
1111 type ReasoningEffort ,
1212 type VerbosityLevel ,
1313 type ReasoningEffortWithMinimal ,
14- type ServiceTier ,
1514} from "@roo-code/types"
1615
1716import type { ApiHandlerOptions } from "../../shared/api"
@@ -37,8 +36,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
3736 private lastResponseId : string | undefined
3837 private responseIdPromise : Promise < string | undefined > | undefined
3938 private responseIdResolver : ( ( value : string | undefined ) => void ) | undefined
40- // Resolved service tier from Responses API (actual tier used by OpenAI)
41- private lastServiceTier : ServiceTier | undefined
4239
4340 // Event types handled by the shared event processor to avoid duplication
4441 private readonly coreHandledEventTypes = new Set < string > ( [
@@ -93,15 +90,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
9390 const cacheReadTokens =
9491 usage . cache_read_input_tokens ?? usage . cache_read_tokens ?? usage . cached_tokens ?? cachedFromDetails ?? 0
9592
96- // Resolve effective tier: prefer actual tier from response; otherwise requested tier
97- const effectiveTier =
98- this . lastServiceTier || ( this . options . openAiNativeServiceTier as ServiceTier | undefined ) || undefined
99- const effectiveInfo = this . applyServiceTierPricing ( model . info , effectiveTier )
100-
10193 // Pass total input tokens directly to calculateApiCostOpenAI
10294 // The function handles subtracting both cache reads and writes internally (see shared/cost.ts:46)
10395 const totalCost = calculateApiCostOpenAI (
104- effectiveInfo ,
96+ model . info ,
10597 totalInputTokens ,
10698 totalOutputTokens ,
10799 cacheWriteTokens ,
@@ -154,9 +146,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
154146 messages : Anthropic . Messages . MessageParam [ ] ,
155147 metadata ?: ApiHandlerCreateMessageMetadata ,
156148 ) : ApiStream {
157- // Reset resolved tier for this request; will be set from response if present
158- this . lastServiceTier = undefined
159-
160149 // Use Responses API for ALL models
161150 const { verbosity, reasoning } = this . getModel ( )
162151
@@ -217,8 +206,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
217206 metadata ,
218207 )
219208
220- // Make the request (pass systemPrompt and messages for potential retry)
221- yield * this . executeRequest ( requestBody , model , metadata , systemPrompt , messages )
209+ // Make the request
210+ yield * this . executeRequest ( requestBody , model , metadata )
222211 }
223212
224213 private buildRequestBody (
@@ -244,13 +233,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
244233 previous_response_id ?: string
245234 store ?: boolean
246235 instructions ?: string
247- service_tier ?: ServiceTier
248236 }
249237
250- // Validate requested tier against model support; if not supported, omit.
251- const requestedTier = ( this . options . openAiNativeServiceTier as ServiceTier | undefined ) || undefined
252- const allowedTierNames = new Set ( model . info . tiers ?. map ( ( t ) => t . name ) . filter ( Boolean ) || [ ] )
253-
254238 const body : Gpt5RequestBody = {
255239 model : model . id ,
256240 input : formattedInput ,
@@ -278,11 +262,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
278262 // Use the per-request reserved output computed by Roo (params.maxTokens from getModelParams).
279263 ...( model . maxTokens ? { max_output_tokens : model . maxTokens } : { } ) ,
280264 ...( requestPreviousResponseId && { previous_response_id : requestPreviousResponseId } ) ,
281- // Include tier when selected and supported by the model, or when explicitly "default"
282- ...( requestedTier &&
283- ( requestedTier === "default" || allowedTierNames . has ( requestedTier ) ) && {
284- service_tier : requestedTier ,
285- } ) ,
286265 }
287266
288267 // Include text.verbosity only when the model explicitly supports it
@@ -297,8 +276,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
297276 requestBody : any ,
298277 model : OpenAiNativeModel ,
299278 metadata ?: ApiHandlerCreateMessageMetadata ,
300- systemPrompt ?: string ,
301- messages ?: Anthropic . Messages . MessageParam [ ] ,
302279 ) : ApiStream {
303280 try {
304281 // Use the official SDK
@@ -325,18 +302,12 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
325302 if ( is400Error && requestBody . previous_response_id && isPreviousResponseError ) {
326303 // Log the error and retry without the previous_response_id
327304
328- // Clear the stored lastResponseId to prevent using it again
329- this . lastResponseId = undefined
330-
331- // Re-prepare the full conversation without previous_response_id
332- let retryRequestBody = { ...requestBody }
305+ // Remove the problematic previous_response_id and retry
306+ const retryRequestBody = { ...requestBody }
333307 delete retryRequestBody . previous_response_id
334308
335- // If we have the original messages, re-prepare the full conversation
336- if ( systemPrompt && messages ) {
337- const { formattedInput } = this . prepareStructuredInput ( systemPrompt , messages , undefined )
338- retryRequestBody . input = formattedInput
339- }
309+ // Clear the stored lastResponseId to prevent using it again
310+ this . lastResponseId = undefined
340311
341312 try {
342313 // Retry with the SDK
@@ -346,13 +317,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
346317
347318 if ( typeof ( retryStream as any ) [ Symbol . asyncIterator ] !== "function" ) {
348319 // If SDK fails, fall back to SSE
349- yield * this . makeGpt5ResponsesAPIRequest (
350- retryRequestBody ,
351- model ,
352- metadata ,
353- systemPrompt ,
354- messages ,
355- )
320+ yield * this . makeGpt5ResponsesAPIRequest ( retryRequestBody , model , metadata )
356321 return
357322 }
358323
@@ -364,13 +329,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
364329 return
365330 } catch ( retryErr ) {
366331 // If retry also fails, fall back to SSE
367- yield * this . makeGpt5ResponsesAPIRequest ( retryRequestBody , model , metadata , systemPrompt , messages )
332+ yield * this . makeGpt5ResponsesAPIRequest ( retryRequestBody , model , metadata )
368333 return
369334 }
370335 }
371336
372337 // For other errors, fallback to manual SSE via fetch
373- yield * this . makeGpt5ResponsesAPIRequest ( requestBody , model , metadata , systemPrompt , messages )
338+ yield * this . makeGpt5ResponsesAPIRequest ( requestBody , model , metadata )
374339 }
375340 }
376341
@@ -459,8 +424,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
459424 requestBody : any ,
460425 model : OpenAiNativeModel ,
461426 metadata ?: ApiHandlerCreateMessageMetadata ,
462- systemPrompt ?: string ,
463- messages ?: Anthropic . Messages . MessageParam [ ] ,
464427 ) : ApiStream {
465428 const apiKey = this . options . openAiNativeApiKey ?? "not-provided"
466429 const baseUrl = this . options . openAiNativeBaseUrl || "https://api.openai.com"
@@ -505,22 +468,16 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
505468 if ( response . status === 400 && requestBody . previous_response_id && isPreviousResponseError ) {
506469 // Log the error and retry without the previous_response_id
507470
471+ // Remove the problematic previous_response_id and retry
472+ const retryRequestBody = { ...requestBody }
473+ delete retryRequestBody . previous_response_id
474+
508475 // Clear the stored lastResponseId to prevent using it again
509476 this . lastResponseId = undefined
510477 // Resolve the promise once to unblock any waiting requests
511478 this . resolveResponseId ( undefined )
512479
513- // Re-prepare the full conversation without previous_response_id
514- let retryRequestBody = { ...requestBody }
515- delete retryRequestBody . previous_response_id
516-
517- // If we have the original messages, re-prepare the full conversation
518- if ( systemPrompt && messages ) {
519- const { formattedInput } = this . prepareStructuredInput ( systemPrompt , messages , undefined )
520- retryRequestBody . input = formattedInput
521- }
522-
523- // Retry the request with full conversation context
480+ // Retry the request without the previous_response_id
524481 const retryResponse = await fetch ( url , {
525482 method : "POST" ,
526483 headers : {
@@ -679,10 +636,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
679636 if ( parsed . response ?. id ) {
680637 this . resolveResponseId ( parsed . response . id )
681638 }
682- // Capture resolved service tier if present
683- if ( parsed . response ?. service_tier ) {
684- this . lastServiceTier = parsed . response . service_tier as ServiceTier
685- }
686639
687640 // Delegate standard event types to the shared processor to avoid duplication
688641 if ( parsed ?. type && this . coreHandledEventTypes . has ( parsed . type ) ) {
@@ -974,10 +927,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
974927 if ( parsed . response ?. id ) {
975928 this . resolveResponseId ( parsed . response . id )
976929 }
977- // Capture resolved service tier if present
978- if ( parsed . response ?. service_tier ) {
979- this . lastServiceTier = parsed . response . service_tier as ServiceTier
980- }
981930
982931 // Check if the done event contains the complete output (as a fallback)
983932 if (
@@ -1102,10 +1051,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
11021051 if ( event ?. response ?. id ) {
11031052 this . resolveResponseId ( event . response . id )
11041053 }
1105- // Capture resolved service tier when available
1106- if ( event ?. response ?. service_tier ) {
1107- this . lastServiceTier = event . response . service_tier as ServiceTier
1108- }
11091054
11101055 // Handle known streaming text deltas
11111056 if ( event ?. type === "response.text.delta" || event ?. type === "response.output_text.delta" ) {
@@ -1196,26 +1141,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
11961141 return info . reasoningEffort as ReasoningEffortWithMinimal | undefined
11971142 }
11981143
1199- /**
1200- * Returns a shallow-cloned ModelInfo with pricing overridden for the given tier, if available.
1201- * If no tier or no overrides exist, the original ModelInfo is returned.
1202- */
1203- private applyServiceTierPricing ( info : ModelInfo , tier ?: ServiceTier ) : ModelInfo {
1204- if ( ! tier || tier === "default" ) return info
1205-
1206- // Find the tier with matching name in the tiers array
1207- const tierInfo = info . tiers ?. find ( ( t ) => t . name === tier )
1208- if ( ! tierInfo ) return info
1209-
1210- return {
1211- ...info ,
1212- inputPrice : tierInfo . inputPrice ?? info . inputPrice ,
1213- outputPrice : tierInfo . outputPrice ?? info . outputPrice ,
1214- cacheReadsPrice : tierInfo . cacheReadsPrice ?? info . cacheReadsPrice ,
1215- cacheWritesPrice : tierInfo . cacheWritesPrice ?? info . cacheWritesPrice ,
1216- }
1217- }
1218-
12191144 // Removed isResponsesApiModel method as ALL models now use the Responses API
12201145
12211146 override getModel ( ) {
@@ -1289,13 +1214,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
12891214 store : false , // Don't store prompt completions
12901215 }
12911216
1292- // Include service tier if selected and supported
1293- const requestedTier = ( this . options . openAiNativeServiceTier as ServiceTier | undefined ) || undefined
1294- const allowedTierNames = new Set ( model . info . tiers ?. map ( ( t ) => t . name ) . filter ( Boolean ) || [ ] )
1295- if ( requestedTier && ( requestedTier === "default" || allowedTierNames . has ( requestedTier ) ) ) {
1296- requestBody . service_tier = requestedTier
1297- }
1298-
12991217 // Add reasoning if supported
13001218 if ( reasoningEffort ) {
13011219 requestBody . reasoning = {
0 commit comments