Skip to content

Commit 3e073f3

Browse files
committed
fix: address PR review comments
- Remove incorrect fallback to missFromDetails for cache write tokens - Fix cost calculation to pass total input tokens (calculateApiCostOpenAI handles subtraction) - Improve readability by extracting cache detail checks to intermediate variables - Remove redundant ?? undefined - Update tests to reflect correct behavior (miss tokens are not cache writes) - Add clarifying comments about cache miss vs cache write tokens
1 parent 89ab175 commit 3e073f3

File tree

2 files changed

+49
-25
lines changed

2 files changed

+49
-25
lines changed

src/api/providers/__tests__/openai-native-usage.spec.ts

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ describe("OpenAiNativeHandler - normalizeUsage", () => {
3333
inputTokens: 100,
3434
outputTokens: 50,
3535
cacheReadTokens: 30,
36-
cacheWriteTokens: 70,
36+
cacheWriteTokens: 0, // miss tokens are NOT cache writes
3737
})
3838
})
3939

@@ -54,7 +54,7 @@ describe("OpenAiNativeHandler - normalizeUsage", () => {
5454
inputTokens: 100, // Derived from 30 + 70
5555
outputTokens: 50,
5656
cacheReadTokens: 30,
57-
cacheWriteTokens: 70,
57+
cacheWriteTokens: 0, // miss tokens are NOT cache writes
5858
})
5959
})
6060

@@ -75,7 +75,29 @@ describe("OpenAiNativeHandler - normalizeUsage", () => {
7575
inputTokens: 100,
7676
outputTokens: 50,
7777
cacheReadTokens: 30,
78-
cacheWriteTokens: 70,
78+
cacheWriteTokens: 0, // miss tokens are NOT cache writes
79+
})
80+
})
81+
82+
it("should handle cache_creation_input_tokens for actual cache writes", () => {
83+
const usage = {
84+
input_tokens: 100,
85+
output_tokens: 50,
86+
cache_creation_input_tokens: 20,
87+
input_tokens_details: {
88+
cached_tokens: 30,
89+
cache_miss_tokens: 50, // 50 miss + 30 cached + 20 creation = 100 total
90+
},
91+
}
92+
93+
const result = (handler as any).normalizeUsage(usage, mockModel)
94+
95+
expect(result).toMatchObject({
96+
type: "usage",
97+
inputTokens: 100,
98+
outputTokens: 50,
99+
cacheReadTokens: 30,
100+
cacheWriteTokens: 20, // Actual cache writes from cache_creation_input_tokens
79101
})
80102
})
81103

@@ -274,7 +296,7 @@ describe("OpenAiNativeHandler - normalizeUsage", () => {
274296
inputTokens: 100,
275297
outputTokens: 50,
276298
cacheReadTokens: 20, // From cached_tokens (legacy field comes before details in fallback chain)
277-
cacheWriteTokens: 70,
299+
cacheWriteTokens: 0, // miss tokens are NOT cache writes
278300
})
279301
})
280302

@@ -296,7 +318,7 @@ describe("OpenAiNativeHandler - normalizeUsage", () => {
296318
inputTokens: 100,
297319
outputTokens: 50,
298320
cacheReadTokens: 30, // From details since no legacy field exists
299-
cacheWriteTokens: 70,
321+
cacheWriteTokens: 0, // miss tokens are NOT cache writes
300322
})
301323
})
302324

@@ -323,21 +345,20 @@ describe("OpenAiNativeHandler - normalizeUsage", () => {
323345
})
324346

325347
describe("cost calculation", () => {
326-
it("should calculate cost using uncached input tokens", () => {
348+
it("should pass total input tokens to calculateApiCostOpenAI", () => {
327349
const usage = {
328350
input_tokens: 100,
329351
output_tokens: 50,
330-
input_tokens_details: {
331-
cached_tokens: 30,
332-
cache_miss_tokens: 70,
333-
},
352+
cache_read_input_tokens: 30,
353+
cache_creation_input_tokens: 20,
334354
}
335355

336356
const result = (handler as any).normalizeUsage(usage, mockModel)
337357

338358
expect(result).toHaveProperty("totalCost")
339359
expect(result.totalCost).toBeGreaterThan(0)
340-
// Cost should be calculated with uncachedInputTokens = 100 - 30 = 70
360+
// calculateApiCostOpenAI handles subtracting cache tokens internally
361+
// It will compute: 100 - 30 - 20 = 50 uncached input tokens
341362
})
342363

343364
it("should handle cost calculation with no cache reads", () => {

src/api/providers/openai-native.ts

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
6666
if (!usage) return undefined
6767

6868
// Prefer detailed shapes when available (Responses API)
69-
const inputDetails = usage.input_tokens_details ?? usage.prompt_tokens_details ?? undefined
70-
const cachedFromDetails = inputDetails?.cached_tokens ?? 0
71-
const missFromDetails = inputDetails?.cache_miss_tokens ?? 0
69+
const inputDetails = usage.input_tokens_details ?? usage.prompt_tokens_details
70+
71+
// Extract cache information from details with better readability
72+
const hasCachedTokens = typeof inputDetails?.cached_tokens === "number"
73+
const hasCacheMissTokens = typeof inputDetails?.cache_miss_tokens === "number"
74+
const cachedFromDetails = hasCachedTokens ? inputDetails.cached_tokens : 0
75+
const missFromDetails = hasCacheMissTokens ? inputDetails.cache_miss_tokens : 0
7276

7377
// If total input tokens are missing but we have details, derive from them
7478
let totalInputTokens = usage.input_tokens ?? usage.prompt_tokens ?? 0
@@ -78,22 +82,22 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
7882

7983
const totalOutputTokens = usage.output_tokens ?? usage.completion_tokens ?? 0
8084

81-
const cacheWriteTokens = usage.cache_creation_input_tokens ?? usage.cache_write_tokens ?? missFromDetails ?? 0
85+
// Note: missFromDetails is NOT used as fallback for cache writes
86+
// Cache miss tokens represent tokens that weren't found in cache (part of input)
87+
// Cache write tokens represent tokens being written to cache for future use
88+
const cacheWriteTokens = usage.cache_creation_input_tokens ?? usage.cache_write_tokens ?? 0
8289

8390
const cacheReadTokens =
8491
usage.cache_read_input_tokens ?? usage.cache_read_tokens ?? usage.cached_tokens ?? cachedFromDetails ?? 0
8592

86-
// Use uncached input tokens for costing to avoid double-counting with cache reads
87-
// This aligns with how Gemini calculates costs (see gemini.ts calculateCost method)
88-
const uncachedInputTokens =
89-
typeof cacheReadTokens === "number" ? Math.max(0, totalInputTokens - cacheReadTokens) : totalInputTokens
90-
93+
// Pass total input tokens directly to calculateApiCostOpenAI
94+
// The function handles subtracting both cache reads and writes internally (see shared/cost.ts:46)
9195
const totalCost = calculateApiCostOpenAI(
9296
model.info,
93-
uncachedInputTokens,
97+
totalInputTokens,
9498
totalOutputTokens,
95-
cacheWriteTokens || 0,
96-
cacheReadTokens || 0,
99+
cacheWriteTokens,
100+
cacheReadTokens,
97101
)
98102

99103
const reasoningTokens =
@@ -103,8 +107,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
103107

104108
const out: ApiStreamUsageChunk = {
105109
type: "usage",
106-
// Keep inputTokens as TOTAL input to preserve correct context length,
107-
// cost is computed with uncachedInputTokens above.
110+
// Keep inputTokens as TOTAL input to preserve correct context length
108111
inputTokens: totalInputTokens,
109112
outputTokens: totalOutputTokens,
110113
cacheWriteTokens,

0 commit comments

Comments
 (0)