Skip to content

Commit e8d4b5b

Browse files
committed
fix: add cache reporting support for OpenAI-Native provider
- Add normalizeUsage method to properly extract cache tokens from Responses API - Support both detailed token shapes (input_tokens_details) and legacy fields - Calculate cache read/write tokens with proper fallbacks - Include reasoning tokens when available in output_tokens_details - Ensure accurate cost calculation using uncached input tokens This fixes the issue where caching information was not being reported when using the OpenAI-Native provider with the Responses API.
1 parent 2e59347 commit e8d4b5b

File tree

1 file changed

+25
-4
lines changed

1 file changed

+25
-4
lines changed

src/api/providers/openai-native.ts

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,25 +68,46 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
6868

6969
const totalInputTokens = usage.input_tokens ?? usage.prompt_tokens ?? 0
7070
const totalOutputTokens = usage.output_tokens ?? usage.completion_tokens ?? 0
71-
const cacheWriteTokens = usage.cache_creation_input_tokens ?? usage.cache_write_tokens ?? 0
72-
const cacheReadTokens = usage.cache_read_input_tokens ?? usage.cache_read_tokens ?? usage.cached_tokens ?? 0
71+
72+
// Prefer detailed shapes when available (Responses API)
73+
const inputDetails = (usage.input_tokens_details || usage.prompt_tokens_details) ?? undefined
74+
const cachedFromDetails = typeof inputDetails?.cached_tokens === "number" ? inputDetails.cached_tokens : 0
75+
const missFromDetails = typeof inputDetails?.cache_miss_tokens === "number" ? inputDetails.cache_miss_tokens : 0
76+
77+
const cacheWriteTokens = usage.cache_creation_input_tokens ?? usage.cache_write_tokens ?? missFromDetails ?? 0
78+
79+
const cacheReadTokens =
80+
usage.cache_read_input_tokens ?? usage.cache_read_tokens ?? usage.cached_tokens ?? cachedFromDetails ?? 0
81+
82+
// Use uncached input tokens for costing to avoid double-counting with cache reads
83+
const uncachedInputTokens =
84+
typeof cacheReadTokens === "number" ? Math.max(0, totalInputTokens - cacheReadTokens) : totalInputTokens
7385

7486
const totalCost = calculateApiCostOpenAI(
7587
model.info,
76-
totalInputTokens,
88+
uncachedInputTokens,
7789
totalOutputTokens,
7890
cacheWriteTokens || 0,
7991
cacheReadTokens || 0,
8092
)
8193

82-
return {
94+
const reasoningTokens =
95+
typeof usage.output_tokens_details?.reasoning_tokens === "number"
96+
? usage.output_tokens_details.reasoning_tokens
97+
: undefined
98+
99+
const out: ApiStreamUsageChunk = {
83100
type: "usage",
101+
// Keep inputTokens as TOTAL input to preserve correct context length,
102+
// cost is computed with uncachedInputTokens above.
84103
inputTokens: totalInputTokens,
85104
outputTokens: totalOutputTokens,
86105
cacheWriteTokens,
87106
cacheReadTokens,
107+
...(typeof reasoningTokens === "number" ? { reasoningTokens } : {}),
88108
totalCost,
89109
}
110+
return out
90111
}
91112

92113
private resolveResponseId(responseId: string | undefined): void {

0 commit comments

Comments
 (0)