Skip to content

Commit 89ab175

Browse files
committed
fix: improve cache token normalization and add comprehensive tests
- Add fallback to derive total input tokens from details when totals are missing - Remove unused convertToOpenAiMessages import - Add comment explaining cost calculation alignment with Gemini provider - Add comprehensive test coverage for normalizeUsage method covering: - Detailed token shapes with cached/miss tokens - Legacy field names and SSE-only events - Edge cases including missing totals with details-only - Cost calculation with uncached input tokens
1 parent e8d4b5b commit 89ab175

File tree

2 files changed

+368
-7
lines changed

2 files changed

+368
-7
lines changed
Lines changed: 356 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,356 @@
1+
import { describe, it, expect, beforeEach } from "vitest"
2+
import { OpenAiNativeHandler } from "../openai-native"
3+
import { openAiNativeModels } from "@roo-code/types"
4+
5+
describe("OpenAiNativeHandler - normalizeUsage", () => {
6+
let handler: OpenAiNativeHandler
7+
const mockModel = {
8+
id: "gpt-4o",
9+
info: openAiNativeModels["gpt-4o"],
10+
}
11+
12+
beforeEach(() => {
13+
handler = new OpenAiNativeHandler({
14+
openAiNativeApiKey: "test-key",
15+
})
16+
})
17+
18+
describe("detailed token shapes (Responses API)", () => {
19+
it("should handle detailed shapes with cached and miss tokens", () => {
20+
const usage = {
21+
input_tokens: 100,
22+
output_tokens: 50,
23+
input_tokens_details: {
24+
cached_tokens: 30,
25+
cache_miss_tokens: 70,
26+
},
27+
}
28+
29+
const result = (handler as any).normalizeUsage(usage, mockModel)
30+
31+
expect(result).toMatchObject({
32+
type: "usage",
33+
inputTokens: 100,
34+
outputTokens: 50,
35+
cacheReadTokens: 30,
36+
cacheWriteTokens: 70,
37+
})
38+
})
39+
40+
it("should derive total input tokens from details when totals are missing", () => {
41+
const usage = {
42+
// No input_tokens or prompt_tokens
43+
output_tokens: 50,
44+
input_tokens_details: {
45+
cached_tokens: 30,
46+
cache_miss_tokens: 70,
47+
},
48+
}
49+
50+
const result = (handler as any).normalizeUsage(usage, mockModel)
51+
52+
expect(result).toMatchObject({
53+
type: "usage",
54+
inputTokens: 100, // Derived from 30 + 70
55+
outputTokens: 50,
56+
cacheReadTokens: 30,
57+
cacheWriteTokens: 70,
58+
})
59+
})
60+
61+
it("should handle prompt_tokens_details variant", () => {
62+
const usage = {
63+
prompt_tokens: 100,
64+
completion_tokens: 50,
65+
prompt_tokens_details: {
66+
cached_tokens: 30,
67+
cache_miss_tokens: 70,
68+
},
69+
}
70+
71+
const result = (handler as any).normalizeUsage(usage, mockModel)
72+
73+
expect(result).toMatchObject({
74+
type: "usage",
75+
inputTokens: 100,
76+
outputTokens: 50,
77+
cacheReadTokens: 30,
78+
cacheWriteTokens: 70,
79+
})
80+
})
81+
82+
it("should handle reasoning tokens in output details", () => {
83+
const usage = {
84+
input_tokens: 100,
85+
output_tokens: 150,
86+
output_tokens_details: {
87+
reasoning_tokens: 50,
88+
},
89+
}
90+
91+
const result = (handler as any).normalizeUsage(usage, mockModel)
92+
93+
expect(result).toMatchObject({
94+
type: "usage",
95+
inputTokens: 100,
96+
outputTokens: 150,
97+
reasoningTokens: 50,
98+
})
99+
})
100+
})
101+
102+
describe("legacy field names", () => {
103+
it("should handle cache_creation_input_tokens and cache_read_input_tokens", () => {
104+
const usage = {
105+
input_tokens: 100,
106+
output_tokens: 50,
107+
cache_creation_input_tokens: 20,
108+
cache_read_input_tokens: 30,
109+
}
110+
111+
const result = (handler as any).normalizeUsage(usage, mockModel)
112+
113+
expect(result).toMatchObject({
114+
type: "usage",
115+
inputTokens: 100,
116+
outputTokens: 50,
117+
cacheReadTokens: 30,
118+
cacheWriteTokens: 20,
119+
})
120+
})
121+
122+
it("should handle cache_write_tokens and cache_read_tokens", () => {
123+
const usage = {
124+
input_tokens: 100,
125+
output_tokens: 50,
126+
cache_write_tokens: 20,
127+
cache_read_tokens: 30,
128+
}
129+
130+
const result = (handler as any).normalizeUsage(usage, mockModel)
131+
132+
expect(result).toMatchObject({
133+
type: "usage",
134+
inputTokens: 100,
135+
outputTokens: 50,
136+
cacheReadTokens: 30,
137+
cacheWriteTokens: 20,
138+
})
139+
})
140+
141+
it("should handle cached_tokens field", () => {
142+
const usage = {
143+
input_tokens: 100,
144+
output_tokens: 50,
145+
cached_tokens: 30,
146+
}
147+
148+
const result = (handler as any).normalizeUsage(usage, mockModel)
149+
150+
expect(result).toMatchObject({
151+
type: "usage",
152+
inputTokens: 100,
153+
outputTokens: 50,
154+
cacheReadTokens: 30,
155+
})
156+
})
157+
158+
it("should handle prompt_tokens and completion_tokens", () => {
159+
const usage = {
160+
prompt_tokens: 100,
161+
completion_tokens: 50,
162+
}
163+
164+
const result = (handler as any).normalizeUsage(usage, mockModel)
165+
166+
expect(result).toMatchObject({
167+
type: "usage",
168+
inputTokens: 100,
169+
outputTokens: 50,
170+
cacheReadTokens: 0,
171+
cacheWriteTokens: 0,
172+
})
173+
})
174+
})
175+
176+
describe("SSE-only events", () => {
177+
it("should handle SSE events with minimal usage data", () => {
178+
const usage = {
179+
input_tokens: 100,
180+
output_tokens: 50,
181+
}
182+
183+
const result = (handler as any).normalizeUsage(usage, mockModel)
184+
185+
expect(result).toMatchObject({
186+
type: "usage",
187+
inputTokens: 100,
188+
outputTokens: 50,
189+
cacheReadTokens: 0,
190+
cacheWriteTokens: 0,
191+
})
192+
})
193+
194+
it("should handle SSE events with no cache information", () => {
195+
const usage = {
196+
prompt_tokens: 100,
197+
completion_tokens: 50,
198+
}
199+
200+
const result = (handler as any).normalizeUsage(usage, mockModel)
201+
202+
expect(result).toMatchObject({
203+
type: "usage",
204+
inputTokens: 100,
205+
outputTokens: 50,
206+
cacheReadTokens: 0,
207+
cacheWriteTokens: 0,
208+
})
209+
})
210+
})
211+
212+
describe("edge cases", () => {
213+
it("should handle undefined usage", () => {
214+
const result = (handler as any).normalizeUsage(undefined, mockModel)
215+
expect(result).toBeUndefined()
216+
})
217+
218+
it("should handle null usage", () => {
219+
const result = (handler as any).normalizeUsage(null, mockModel)
220+
expect(result).toBeUndefined()
221+
})
222+
223+
it("should handle empty usage object", () => {
224+
const usage = {}
225+
226+
const result = (handler as any).normalizeUsage(usage, mockModel)
227+
228+
expect(result).toMatchObject({
229+
type: "usage",
230+
inputTokens: 0,
231+
outputTokens: 0,
232+
cacheReadTokens: 0,
233+
cacheWriteTokens: 0,
234+
})
235+
})
236+
237+
it("should handle missing details but with cache fields", () => {
238+
const usage = {
239+
input_tokens: 100,
240+
output_tokens: 50,
241+
cache_read_input_tokens: 30,
242+
// No input_tokens_details
243+
}
244+
245+
const result = (handler as any).normalizeUsage(usage, mockModel)
246+
247+
expect(result).toMatchObject({
248+
type: "usage",
249+
inputTokens: 100,
250+
outputTokens: 50,
251+
cacheReadTokens: 30,
252+
cacheWriteTokens: 0,
253+
})
254+
})
255+
256+
it("should use all available cache information with proper fallbacks", () => {
257+
const usage = {
258+
input_tokens: 100,
259+
output_tokens: 50,
260+
cached_tokens: 20, // Legacy field (will be used as fallback)
261+
input_tokens_details: {
262+
cached_tokens: 30, // Detailed shape
263+
cache_miss_tokens: 70,
264+
},
265+
}
266+
267+
const result = (handler as any).normalizeUsage(usage, mockModel)
268+
269+
// The implementation uses nullish coalescing, so it will use the first non-nullish value:
270+
// cache_read_input_tokens ?? cache_read_tokens ?? cached_tokens ?? cachedFromDetails
271+
// Since none of the first two exist, it falls back to cached_tokens (20) before cachedFromDetails
272+
expect(result).toMatchObject({
273+
type: "usage",
274+
inputTokens: 100,
275+
outputTokens: 50,
276+
cacheReadTokens: 20, // From cached_tokens (legacy field comes before details in fallback chain)
277+
cacheWriteTokens: 70,
278+
})
279+
})
280+
281+
it("should use detailed shapes when legacy fields are not present", () => {
282+
const usage = {
283+
input_tokens: 100,
284+
output_tokens: 50,
285+
// No cached_tokens legacy field
286+
input_tokens_details: {
287+
cached_tokens: 30,
288+
cache_miss_tokens: 70,
289+
},
290+
}
291+
292+
const result = (handler as any).normalizeUsage(usage, mockModel)
293+
294+
expect(result).toMatchObject({
295+
type: "usage",
296+
inputTokens: 100,
297+
outputTokens: 50,
298+
cacheReadTokens: 30, // From details since no legacy field exists
299+
cacheWriteTokens: 70,
300+
})
301+
})
302+
303+
it("should handle totals missing with only partial details", () => {
304+
const usage = {
305+
// No input_tokens or prompt_tokens
306+
output_tokens: 50,
307+
input_tokens_details: {
308+
cached_tokens: 30,
309+
// No cache_miss_tokens
310+
},
311+
}
312+
313+
const result = (handler as any).normalizeUsage(usage, mockModel)
314+
315+
expect(result).toMatchObject({
316+
type: "usage",
317+
inputTokens: 30, // Derived from cached_tokens only
318+
outputTokens: 50,
319+
cacheReadTokens: 30,
320+
cacheWriteTokens: 0,
321+
})
322+
})
323+
})
324+
325+
describe("cost calculation", () => {
326+
it("should calculate cost using uncached input tokens", () => {
327+
const usage = {
328+
input_tokens: 100,
329+
output_tokens: 50,
330+
input_tokens_details: {
331+
cached_tokens: 30,
332+
cache_miss_tokens: 70,
333+
},
334+
}
335+
336+
const result = (handler as any).normalizeUsage(usage, mockModel)
337+
338+
expect(result).toHaveProperty("totalCost")
339+
expect(result.totalCost).toBeGreaterThan(0)
340+
// Cost should be calculated with uncachedInputTokens = 100 - 30 = 70
341+
})
342+
343+
it("should handle cost calculation with no cache reads", () => {
344+
const usage = {
345+
input_tokens: 100,
346+
output_tokens: 50,
347+
}
348+
349+
const result = (handler as any).normalizeUsage(usage, mockModel)
350+
351+
expect(result).toHaveProperty("totalCost")
352+
expect(result.totalCost).toBeGreaterThan(0)
353+
// Cost should be calculated with full input tokens since no cache reads
354+
})
355+
})
356+
})

src/api/providers/openai-native.ts

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ import type { ApiHandlerOptions } from "../../shared/api"
1717

1818
import { calculateApiCostOpenAI } from "../../shared/cost"
1919

20-
import { convertToOpenAiMessages } from "../transform/openai-format"
2120
import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
2221
import { getModelParams } from "../transform/model-params"
2322

@@ -66,20 +65,26 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
6665
private normalizeUsage(usage: any, model: OpenAiNativeModel): ApiStreamUsageChunk | undefined {
6766
if (!usage) return undefined
6867

69-
const totalInputTokens = usage.input_tokens ?? usage.prompt_tokens ?? 0
70-
const totalOutputTokens = usage.output_tokens ?? usage.completion_tokens ?? 0
71-
7268
// Prefer detailed shapes when available (Responses API)
73-
const inputDetails = (usage.input_tokens_details || usage.prompt_tokens_details) ?? undefined
74-
const cachedFromDetails = typeof inputDetails?.cached_tokens === "number" ? inputDetails.cached_tokens : 0
75-
const missFromDetails = typeof inputDetails?.cache_miss_tokens === "number" ? inputDetails.cache_miss_tokens : 0
69+
const inputDetails = usage.input_tokens_details ?? usage.prompt_tokens_details ?? undefined
70+
const cachedFromDetails = inputDetails?.cached_tokens ?? 0
71+
const missFromDetails = inputDetails?.cache_miss_tokens ?? 0
72+
73+
// If total input tokens are missing but we have details, derive from them
74+
let totalInputTokens = usage.input_tokens ?? usage.prompt_tokens ?? 0
75+
if (totalInputTokens === 0 && inputDetails && (cachedFromDetails > 0 || missFromDetails > 0)) {
76+
totalInputTokens = cachedFromDetails + missFromDetails
77+
}
78+
79+
const totalOutputTokens = usage.output_tokens ?? usage.completion_tokens ?? 0
7680

7781
const cacheWriteTokens = usage.cache_creation_input_tokens ?? usage.cache_write_tokens ?? missFromDetails ?? 0
7882

7983
const cacheReadTokens =
8084
usage.cache_read_input_tokens ?? usage.cache_read_tokens ?? usage.cached_tokens ?? cachedFromDetails ?? 0
8185

8286
// Use uncached input tokens for costing to avoid double-counting with cache reads
87+
// This aligns with how Gemini calculates costs (see gemini.ts calculateCost method)
8388
const uncachedInputTokens =
8489
typeof cacheReadTokens === "number" ? Math.max(0, totalInputTokens - cacheReadTokens) : totalInputTokens
8590

0 commit comments

Comments
 (0)