Skip to content

Commit 6569710

Browse files
committed
fix: add cache reporting support for OpenAI-Native provider (#7602)
* fix: add cache reporting support for OpenAI-Native provider - Add normalizeUsage method to properly extract cache tokens from Responses API - Support both detailed token shapes (input_tokens_details) and legacy fields - Calculate cache read/write tokens with proper fallbacks - Include reasoning tokens when available in output_tokens_details - Ensure accurate cost calculation using uncached input tokens This fixes the issue where caching information was not being reported when using the OpenAI-Native provider with the Responses API. * fix: improve cache token normalization and add comprehensive tests - Add fallback to derive total input tokens from details when totals are missing - Remove unused convertToOpenAiMessages import - Add comment explaining cost calculation alignment with Gemini provider - Add comprehensive test coverage for normalizeUsage method covering: - Detailed token shapes with cached/miss tokens - Legacy field names and SSE-only events - Edge cases including missing totals with details-only - Cost calculation with uncached input tokens * fix: address PR review comments - Remove incorrect fallback to missFromDetails for cache write tokens - Fix cost calculation to pass total input tokens (calculateApiCostOpenAI handles subtraction) - Improve readability by extracting cache detail checks to intermediate variables - Remove redundant ?? undefined - Update tests to reflect correct behavior (miss tokens are not cache writes) - Add clarifying comments about cache miss vs cache write tokens
1 parent b801673 commit 6569710

File tree

1 file changed

+15
-97
lines changed

1 file changed

+15
-97
lines changed

src/api/providers/openai-native.ts

Lines changed: 15 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import {
1111
type ReasoningEffort,
1212
type VerbosityLevel,
1313
type ReasoningEffortWithMinimal,
14-
type ServiceTier,
1514
} from "@roo-code/types"
1615

1716
import type { ApiHandlerOptions } from "../../shared/api"
@@ -37,8 +36,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
3736
private lastResponseId: string | undefined
3837
private responseIdPromise: Promise<string | undefined> | undefined
3938
private responseIdResolver: ((value: string | undefined) => void) | undefined
40-
// Resolved service tier from Responses API (actual tier used by OpenAI)
41-
private lastServiceTier: ServiceTier | undefined
4239

4340
// Event types handled by the shared event processor to avoid duplication
4441
private readonly coreHandledEventTypes = new Set<string>([
@@ -93,15 +90,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
9390
const cacheReadTokens =
9491
usage.cache_read_input_tokens ?? usage.cache_read_tokens ?? usage.cached_tokens ?? cachedFromDetails ?? 0
9592

96-
// Resolve effective tier: prefer actual tier from response; otherwise requested tier
97-
const effectiveTier =
98-
this.lastServiceTier || (this.options.openAiNativeServiceTier as ServiceTier | undefined) || undefined
99-
const effectiveInfo = this.applyServiceTierPricing(model.info, effectiveTier)
100-
10193
// Pass total input tokens directly to calculateApiCostOpenAI
10294
// The function handles subtracting both cache reads and writes internally (see shared/cost.ts:46)
10395
const totalCost = calculateApiCostOpenAI(
104-
effectiveInfo,
96+
model.info,
10597
totalInputTokens,
10698
totalOutputTokens,
10799
cacheWriteTokens,
@@ -154,9 +146,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
154146
messages: Anthropic.Messages.MessageParam[],
155147
metadata?: ApiHandlerCreateMessageMetadata,
156148
): ApiStream {
157-
// Reset resolved tier for this request; will be set from response if present
158-
this.lastServiceTier = undefined
159-
160149
// Use Responses API for ALL models
161150
const { verbosity, reasoning } = this.getModel()
162151

@@ -217,8 +206,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
217206
metadata,
218207
)
219208

220-
// Make the request (pass systemPrompt and messages for potential retry)
221-
yield* this.executeRequest(requestBody, model, metadata, systemPrompt, messages)
209+
// Make the request
210+
yield* this.executeRequest(requestBody, model, metadata)
222211
}
223212

224213
private buildRequestBody(
@@ -244,13 +233,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
244233
previous_response_id?: string
245234
store?: boolean
246235
instructions?: string
247-
service_tier?: ServiceTier
248236
}
249237

250-
// Validate requested tier against model support; if not supported, omit.
251-
const requestedTier = (this.options.openAiNativeServiceTier as ServiceTier | undefined) || undefined
252-
const allowedTierNames = new Set(model.info.tiers?.map((t) => t.name).filter(Boolean) || [])
253-
254238
const body: Gpt5RequestBody = {
255239
model: model.id,
256240
input: formattedInput,
@@ -278,11 +262,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
278262
// Use the per-request reserved output computed by Roo (params.maxTokens from getModelParams).
279263
...(model.maxTokens ? { max_output_tokens: model.maxTokens } : {}),
280264
...(requestPreviousResponseId && { previous_response_id: requestPreviousResponseId }),
281-
// Include tier when selected and supported by the model, or when explicitly "default"
282-
...(requestedTier &&
283-
(requestedTier === "default" || allowedTierNames.has(requestedTier)) && {
284-
service_tier: requestedTier,
285-
}),
286265
}
287266

288267
// Include text.verbosity only when the model explicitly supports it
@@ -297,8 +276,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
297276
requestBody: any,
298277
model: OpenAiNativeModel,
299278
metadata?: ApiHandlerCreateMessageMetadata,
300-
systemPrompt?: string,
301-
messages?: Anthropic.Messages.MessageParam[],
302279
): ApiStream {
303280
try {
304281
// Use the official SDK
@@ -325,18 +302,12 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
325302
if (is400Error && requestBody.previous_response_id && isPreviousResponseError) {
326303
// Log the error and retry without the previous_response_id
327304

328-
// Clear the stored lastResponseId to prevent using it again
329-
this.lastResponseId = undefined
330-
331-
// Re-prepare the full conversation without previous_response_id
332-
let retryRequestBody = { ...requestBody }
305+
// Remove the problematic previous_response_id and retry
306+
const retryRequestBody = { ...requestBody }
333307
delete retryRequestBody.previous_response_id
334308

335-
// If we have the original messages, re-prepare the full conversation
336-
if (systemPrompt && messages) {
337-
const { formattedInput } = this.prepareStructuredInput(systemPrompt, messages, undefined)
338-
retryRequestBody.input = formattedInput
339-
}
309+
// Clear the stored lastResponseId to prevent using it again
310+
this.lastResponseId = undefined
340311

341312
try {
342313
// Retry with the SDK
@@ -346,13 +317,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
346317

347318
if (typeof (retryStream as any)[Symbol.asyncIterator] !== "function") {
348319
// If SDK fails, fall back to SSE
349-
yield* this.makeGpt5ResponsesAPIRequest(
350-
retryRequestBody,
351-
model,
352-
metadata,
353-
systemPrompt,
354-
messages,
355-
)
320+
yield* this.makeGpt5ResponsesAPIRequest(retryRequestBody, model, metadata)
356321
return
357322
}
358323

@@ -364,13 +329,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
364329
return
365330
} catch (retryErr) {
366331
// If retry also fails, fall back to SSE
367-
yield* this.makeGpt5ResponsesAPIRequest(retryRequestBody, model, metadata, systemPrompt, messages)
332+
yield* this.makeGpt5ResponsesAPIRequest(retryRequestBody, model, metadata)
368333
return
369334
}
370335
}
371336

372337
// For other errors, fallback to manual SSE via fetch
373-
yield* this.makeGpt5ResponsesAPIRequest(requestBody, model, metadata, systemPrompt, messages)
338+
yield* this.makeGpt5ResponsesAPIRequest(requestBody, model, metadata)
374339
}
375340
}
376341

@@ -459,8 +424,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
459424
requestBody: any,
460425
model: OpenAiNativeModel,
461426
metadata?: ApiHandlerCreateMessageMetadata,
462-
systemPrompt?: string,
463-
messages?: Anthropic.Messages.MessageParam[],
464427
): ApiStream {
465428
const apiKey = this.options.openAiNativeApiKey ?? "not-provided"
466429
const baseUrl = this.options.openAiNativeBaseUrl || "https://api.openai.com"
@@ -505,22 +468,16 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
505468
if (response.status === 400 && requestBody.previous_response_id && isPreviousResponseError) {
506469
// Log the error and retry without the previous_response_id
507470

471+
// Remove the problematic previous_response_id and retry
472+
const retryRequestBody = { ...requestBody }
473+
delete retryRequestBody.previous_response_id
474+
508475
// Clear the stored lastResponseId to prevent using it again
509476
this.lastResponseId = undefined
510477
// Resolve the promise once to unblock any waiting requests
511478
this.resolveResponseId(undefined)
512479

513-
// Re-prepare the full conversation without previous_response_id
514-
let retryRequestBody = { ...requestBody }
515-
delete retryRequestBody.previous_response_id
516-
517-
// If we have the original messages, re-prepare the full conversation
518-
if (systemPrompt && messages) {
519-
const { formattedInput } = this.prepareStructuredInput(systemPrompt, messages, undefined)
520-
retryRequestBody.input = formattedInput
521-
}
522-
523-
// Retry the request with full conversation context
480+
// Retry the request without the previous_response_id
524481
const retryResponse = await fetch(url, {
525482
method: "POST",
526483
headers: {
@@ -679,10 +636,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
679636
if (parsed.response?.id) {
680637
this.resolveResponseId(parsed.response.id)
681638
}
682-
// Capture resolved service tier if present
683-
if (parsed.response?.service_tier) {
684-
this.lastServiceTier = parsed.response.service_tier as ServiceTier
685-
}
686639

687640
// Delegate standard event types to the shared processor to avoid duplication
688641
if (parsed?.type && this.coreHandledEventTypes.has(parsed.type)) {
@@ -974,10 +927,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
974927
if (parsed.response?.id) {
975928
this.resolveResponseId(parsed.response.id)
976929
}
977-
// Capture resolved service tier if present
978-
if (parsed.response?.service_tier) {
979-
this.lastServiceTier = parsed.response.service_tier as ServiceTier
980-
}
981930

982931
// Check if the done event contains the complete output (as a fallback)
983932
if (
@@ -1102,10 +1051,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
11021051
if (event?.response?.id) {
11031052
this.resolveResponseId(event.response.id)
11041053
}
1105-
// Capture resolved service tier when available
1106-
if (event?.response?.service_tier) {
1107-
this.lastServiceTier = event.response.service_tier as ServiceTier
1108-
}
11091054

11101055
// Handle known streaming text deltas
11111056
if (event?.type === "response.text.delta" || event?.type === "response.output_text.delta") {
@@ -1196,26 +1141,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
11961141
return info.reasoningEffort as ReasoningEffortWithMinimal | undefined
11971142
}
11981143

1199-
/**
1200-
* Returns a shallow-cloned ModelInfo with pricing overridden for the given tier, if available.
1201-
* If no tier or no overrides exist, the original ModelInfo is returned.
1202-
*/
1203-
private applyServiceTierPricing(info: ModelInfo, tier?: ServiceTier): ModelInfo {
1204-
if (!tier || tier === "default") return info
1205-
1206-
// Find the tier with matching name in the tiers array
1207-
const tierInfo = info.tiers?.find((t) => t.name === tier)
1208-
if (!tierInfo) return info
1209-
1210-
return {
1211-
...info,
1212-
inputPrice: tierInfo.inputPrice ?? info.inputPrice,
1213-
outputPrice: tierInfo.outputPrice ?? info.outputPrice,
1214-
cacheReadsPrice: tierInfo.cacheReadsPrice ?? info.cacheReadsPrice,
1215-
cacheWritesPrice: tierInfo.cacheWritesPrice ?? info.cacheWritesPrice,
1216-
}
1217-
}
1218-
12191144
// Removed isResponsesApiModel method as ALL models now use the Responses API
12201145

12211146
override getModel() {
@@ -1289,13 +1214,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
12891214
store: false, // Don't store prompt completions
12901215
}
12911216

1292-
// Include service tier if selected and supported
1293-
const requestedTier = (this.options.openAiNativeServiceTier as ServiceTier | undefined) || undefined
1294-
const allowedTierNames = new Set(model.info.tiers?.map((t) => t.name).filter(Boolean) || [])
1295-
if (requestedTier && (requestedTier === "default" || allowedTierNames.has(requestedTier))) {
1296-
requestBody.service_tier = requestedTier
1297-
}
1298-
12991217
// Add reasoning if supported
13001218
if (reasoningEffort) {
13011219
requestBody.reasoning = {

0 commit comments

Comments
 (0)