fix: address PR review comments

hannesrudolph · hannesrudolph · commit 3e073f3b4fe3 · 2025-09-02T15:15:42.000-06:00
- Remove incorrect fallback to missFromDetails for cache write tokens
- Fix cost calculation to pass total input tokens (calculateApiCostOpenAI handles subtraction)
- Improve readability by extracting cache detail checks to intermediate variables
- Remove redundant ?? undefined
- Update tests to reflect correct behavior (miss tokens are not cache writes)
- Add clarifying comments about cache miss vs cache write tokens
diff --git a/src/api/providers/__tests__/openai-native-usage.spec.ts b/src/api/providers/__tests__/openai-native-usage.spec.ts
@@ -33,7 +33,7 @@ describe("OpenAiNativeHandler - normalizeUsage", () => {
 				inputTokens: 100,
 				outputTokens: 50,
 				cacheReadTokens: 30,
-				cacheWriteTokens: 70,
+				cacheWriteTokens: 0, // miss tokens are NOT cache writes
 			})
 		})
 
@@ -54,7 +54,7 @@ describe("OpenAiNativeHandler - normalizeUsage", () => {
 				inputTokens: 100, // Derived from 30 + 70
 				outputTokens: 50,
 				cacheReadTokens: 30,
-				cacheWriteTokens: 70,
+				cacheWriteTokens: 0, // miss tokens are NOT cache writes
 			})
 		})
 
@@ -75,7 +75,29 @@ describe("OpenAiNativeHandler - normalizeUsage", () => {
 				inputTokens: 100,
 				outputTokens: 50,
 				cacheReadTokens: 30,
-				cacheWriteTokens: 70,
+				cacheWriteTokens: 0, // miss tokens are NOT cache writes
+			})
+		})
+
+		it("should handle cache_creation_input_tokens for actual cache writes", () => {
+			const usage = {
+				input_tokens: 100,
+				output_tokens: 50,
+				cache_creation_input_tokens: 20,
+				input_tokens_details: {
+					cached_tokens: 30,
+					cache_miss_tokens: 50, // 50 miss + 30 cached + 20 creation = 100 total
+				},
+			}
+
+			const result = (handler as any).normalizeUsage(usage, mockModel)
+
+			expect(result).toMatchObject({
+				type: "usage",
+				inputTokens: 100,
+				outputTokens: 50,
+				cacheReadTokens: 30,
+				cacheWriteTokens: 20, // Actual cache writes from cache_creation_input_tokens
 			})
 		})
 
@@ -274,7 +296,7 @@ describe("OpenAiNativeHandler - normalizeUsage", () => {
 				inputTokens: 100,
 				outputTokens: 50,
 				cacheReadTokens: 20, // From cached_tokens (legacy field comes before details in fallback chain)
-				cacheWriteTokens: 70,
+				cacheWriteTokens: 0, // miss tokens are NOT cache writes
 			})
 		})
 
@@ -296,7 +318,7 @@ describe("OpenAiNativeHandler - normalizeUsage", () => {
 				inputTokens: 100,
 				outputTokens: 50,
 				cacheReadTokens: 30, // From details since no legacy field exists
-				cacheWriteTokens: 70,
+				cacheWriteTokens: 0, // miss tokens are NOT cache writes
 			})
 		})
 
@@ -323,21 +345,20 @@ describe("OpenAiNativeHandler - normalizeUsage", () => {
 	})
 
 	describe("cost calculation", () => {
-		it("should calculate cost using uncached input tokens", () => {
+		it("should pass total input tokens to calculateApiCostOpenAI", () => {
 			const usage = {
 				input_tokens: 100,
 				output_tokens: 50,
-				input_tokens_details: {
-					cached_tokens: 30,
-					cache_miss_tokens: 70,
-				},
+				cache_read_input_tokens: 30,
+				cache_creation_input_tokens: 20,
 			}
 
 			const result = (handler as any).normalizeUsage(usage, mockModel)
 
 			expect(result).toHaveProperty("totalCost")
 			expect(result.totalCost).toBeGreaterThan(0)
-			// Cost should be calculated with uncachedInputTokens = 100 - 30 = 70
+			// calculateApiCostOpenAI handles subtracting cache tokens internally
+			// It will compute: 100 - 30 - 20 = 50 uncached input tokens
 		})
 
 		it("should handle cost calculation with no cache reads", () => {
diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts
@@ -66,9 +66,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		if (!usage) return undefined
 
 		// Prefer detailed shapes when available (Responses API)
-		const inputDetails = usage.input_tokens_details ?? usage.prompt_tokens_details ?? undefined
-		const cachedFromDetails = inputDetails?.cached_tokens ?? 0
-		const missFromDetails = inputDetails?.cache_miss_tokens ?? 0
+		const inputDetails = usage.input_tokens_details ?? usage.prompt_tokens_details
+
+		// Extract cache information from details with better readability
+		const hasCachedTokens = typeof inputDetails?.cached_tokens === "number"
+		const hasCacheMissTokens = typeof inputDetails?.cache_miss_tokens === "number"
+		const cachedFromDetails = hasCachedTokens ? inputDetails.cached_tokens : 0
+		const missFromDetails = hasCacheMissTokens ? inputDetails.cache_miss_tokens : 0
 
 		// If total input tokens are missing but we have details, derive from them
 		let totalInputTokens = usage.input_tokens ?? usage.prompt_tokens ?? 0
@@ -78,22 +82,22 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 
 		const totalOutputTokens = usage.output_tokens ?? usage.completion_tokens ?? 0
 
-		const cacheWriteTokens = usage.cache_creation_input_tokens ?? usage.cache_write_tokens ?? missFromDetails ?? 0
+		// Note: missFromDetails is NOT used as fallback for cache writes
+		// Cache miss tokens represent tokens that weren't found in cache (part of input)
+		// Cache write tokens represent tokens being written to cache for future use
+		const cacheWriteTokens = usage.cache_creation_input_tokens ?? usage.cache_write_tokens ?? 0
 
 		const cacheReadTokens =
 			usage.cache_read_input_tokens ?? usage.cache_read_tokens ?? usage.cached_tokens ?? cachedFromDetails ?? 0
 
-		// Use uncached input tokens for costing to avoid double-counting with cache reads
-		// This aligns with how Gemini calculates costs (see gemini.ts calculateCost method)
-		const uncachedInputTokens =
-			typeof cacheReadTokens === "number" ? Math.max(0, totalInputTokens - cacheReadTokens) : totalInputTokens
-
+		// Pass total input tokens directly to calculateApiCostOpenAI
+		// The function handles subtracting both cache reads and writes internally (see shared/cost.ts:46)
 		const totalCost = calculateApiCostOpenAI(
 			model.info,
-			uncachedInputTokens,
+			totalInputTokens,
 			totalOutputTokens,
-			cacheWriteTokens || 0,
-			cacheReadTokens || 0,
+			cacheWriteTokens,
+			cacheReadTokens,
 		)
 
 		const reasoningTokens =
@@ -103,8 +107,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 
 		const out: ApiStreamUsageChunk = {
 			type: "usage",
-			// Keep inputTokens as TOTAL input to preserve correct context length,
-			// cost is computed with uncachedInputTokens above.
+			// Keep inputTokens as TOTAL input to preserve correct context length
 			inputTokens: totalInputTokens,
 			outputTokens: totalOutputTokens,
 			cacheWriteTokens,