Skip to content

Commit 8af0d55

Browse files
hannesrudolphplaycations
authored andcommitted
fix: add cache reporting support for OpenAI-Native provider (#7602)
* fix: add cache reporting support for OpenAI-Native provider - Add normalizeUsage method to properly extract cache tokens from Responses API - Support both detailed token shapes (input_tokens_details) and legacy fields - Calculate cache read/write tokens with proper fallbacks - Include reasoning tokens when available in output_tokens_details - Ensure accurate cost calculation using uncached input tokens This fixes the issue where caching information was not being reported when using the OpenAI-Native provider with the Responses API. * fix: improve cache token normalization and add comprehensive tests - Add fallback to derive total input tokens from details when totals are missing - Remove unused convertToOpenAiMessages import - Add comment explaining cost calculation alignment with Gemini provider - Add comprehensive test coverage for normalizeUsage method covering: - Detailed token shapes with cached/miss tokens - Legacy field names and SSE-only events - Edge cases including missing totals with details-only - Cost calculation with uncached input tokens * fix: address PR review comments - Remove incorrect fallback to missFromDetails for cache write tokens - Fix cost calculation to pass total input tokens (calculateApiCostOpenAI handles subtraction) - Improve readability by extracting cache detail checks to intermediate variables - Remove redundant ?? undefined - Update tests to reflect correct behavior (miss tokens are not cache writes) - Add clarifying comments about cache miss vs cache write tokens
1 parent a0e13fe commit 8af0d55

File tree

2 files changed

+412
-6
lines changed

2 files changed

+412
-6
lines changed
Lines changed: 377 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,377 @@
1+
import { describe, it, expect, beforeEach } from "vitest"
2+
import { OpenAiNativeHandler } from "../openai-native"
3+
import { openAiNativeModels } from "@roo-code/types"
4+
5+
describe("OpenAiNativeHandler - normalizeUsage", () => {
6+
let handler: OpenAiNativeHandler
7+
const mockModel = {
8+
id: "gpt-4o",
9+
info: openAiNativeModels["gpt-4o"],
10+
}
11+
12+
beforeEach(() => {
13+
handler = new OpenAiNativeHandler({
14+
openAiNativeApiKey: "test-key",
15+
})
16+
})
17+
18+
describe("detailed token shapes (Responses API)", () => {
19+
it("should handle detailed shapes with cached and miss tokens", () => {
20+
const usage = {
21+
input_tokens: 100,
22+
output_tokens: 50,
23+
input_tokens_details: {
24+
cached_tokens: 30,
25+
cache_miss_tokens: 70,
26+
},
27+
}
28+
29+
const result = (handler as any).normalizeUsage(usage, mockModel)
30+
31+
expect(result).toMatchObject({
32+
type: "usage",
33+
inputTokens: 100,
34+
outputTokens: 50,
35+
cacheReadTokens: 30,
36+
cacheWriteTokens: 0, // miss tokens are NOT cache writes
37+
})
38+
})
39+
40+
it("should derive total input tokens from details when totals are missing", () => {
41+
const usage = {
42+
// No input_tokens or prompt_tokens
43+
output_tokens: 50,
44+
input_tokens_details: {
45+
cached_tokens: 30,
46+
cache_miss_tokens: 70,
47+
},
48+
}
49+
50+
const result = (handler as any).normalizeUsage(usage, mockModel)
51+
52+
expect(result).toMatchObject({
53+
type: "usage",
54+
inputTokens: 100, // Derived from 30 + 70
55+
outputTokens: 50,
56+
cacheReadTokens: 30,
57+
cacheWriteTokens: 0, // miss tokens are NOT cache writes
58+
})
59+
})
60+
61+
it("should handle prompt_tokens_details variant", () => {
62+
const usage = {
63+
prompt_tokens: 100,
64+
completion_tokens: 50,
65+
prompt_tokens_details: {
66+
cached_tokens: 30,
67+
cache_miss_tokens: 70,
68+
},
69+
}
70+
71+
const result = (handler as any).normalizeUsage(usage, mockModel)
72+
73+
expect(result).toMatchObject({
74+
type: "usage",
75+
inputTokens: 100,
76+
outputTokens: 50,
77+
cacheReadTokens: 30,
78+
cacheWriteTokens: 0, // miss tokens are NOT cache writes
79+
})
80+
})
81+
82+
it("should handle cache_creation_input_tokens for actual cache writes", () => {
83+
const usage = {
84+
input_tokens: 100,
85+
output_tokens: 50,
86+
cache_creation_input_tokens: 20,
87+
input_tokens_details: {
88+
cached_tokens: 30,
89+
cache_miss_tokens: 50, // 50 miss + 30 cached + 20 creation = 100 total
90+
},
91+
}
92+
93+
const result = (handler as any).normalizeUsage(usage, mockModel)
94+
95+
expect(result).toMatchObject({
96+
type: "usage",
97+
inputTokens: 100,
98+
outputTokens: 50,
99+
cacheReadTokens: 30,
100+
cacheWriteTokens: 20, // Actual cache writes from cache_creation_input_tokens
101+
})
102+
})
103+
104+
it("should handle reasoning tokens in output details", () => {
105+
const usage = {
106+
input_tokens: 100,
107+
output_tokens: 150,
108+
output_tokens_details: {
109+
reasoning_tokens: 50,
110+
},
111+
}
112+
113+
const result = (handler as any).normalizeUsage(usage, mockModel)
114+
115+
expect(result).toMatchObject({
116+
type: "usage",
117+
inputTokens: 100,
118+
outputTokens: 150,
119+
reasoningTokens: 50,
120+
})
121+
})
122+
})
123+
124+
describe("legacy field names", () => {
125+
it("should handle cache_creation_input_tokens and cache_read_input_tokens", () => {
126+
const usage = {
127+
input_tokens: 100,
128+
output_tokens: 50,
129+
cache_creation_input_tokens: 20,
130+
cache_read_input_tokens: 30,
131+
}
132+
133+
const result = (handler as any).normalizeUsage(usage, mockModel)
134+
135+
expect(result).toMatchObject({
136+
type: "usage",
137+
inputTokens: 100,
138+
outputTokens: 50,
139+
cacheReadTokens: 30,
140+
cacheWriteTokens: 20,
141+
})
142+
})
143+
144+
it("should handle cache_write_tokens and cache_read_tokens", () => {
145+
const usage = {
146+
input_tokens: 100,
147+
output_tokens: 50,
148+
cache_write_tokens: 20,
149+
cache_read_tokens: 30,
150+
}
151+
152+
const result = (handler as any).normalizeUsage(usage, mockModel)
153+
154+
expect(result).toMatchObject({
155+
type: "usage",
156+
inputTokens: 100,
157+
outputTokens: 50,
158+
cacheReadTokens: 30,
159+
cacheWriteTokens: 20,
160+
})
161+
})
162+
163+
it("should handle cached_tokens field", () => {
164+
const usage = {
165+
input_tokens: 100,
166+
output_tokens: 50,
167+
cached_tokens: 30,
168+
}
169+
170+
const result = (handler as any).normalizeUsage(usage, mockModel)
171+
172+
expect(result).toMatchObject({
173+
type: "usage",
174+
inputTokens: 100,
175+
outputTokens: 50,
176+
cacheReadTokens: 30,
177+
})
178+
})
179+
180+
it("should handle prompt_tokens and completion_tokens", () => {
181+
const usage = {
182+
prompt_tokens: 100,
183+
completion_tokens: 50,
184+
}
185+
186+
const result = (handler as any).normalizeUsage(usage, mockModel)
187+
188+
expect(result).toMatchObject({
189+
type: "usage",
190+
inputTokens: 100,
191+
outputTokens: 50,
192+
cacheReadTokens: 0,
193+
cacheWriteTokens: 0,
194+
})
195+
})
196+
})
197+
198+
describe("SSE-only events", () => {
199+
it("should handle SSE events with minimal usage data", () => {
200+
const usage = {
201+
input_tokens: 100,
202+
output_tokens: 50,
203+
}
204+
205+
const result = (handler as any).normalizeUsage(usage, mockModel)
206+
207+
expect(result).toMatchObject({
208+
type: "usage",
209+
inputTokens: 100,
210+
outputTokens: 50,
211+
cacheReadTokens: 0,
212+
cacheWriteTokens: 0,
213+
})
214+
})
215+
216+
it("should handle SSE events with no cache information", () => {
217+
const usage = {
218+
prompt_tokens: 100,
219+
completion_tokens: 50,
220+
}
221+
222+
const result = (handler as any).normalizeUsage(usage, mockModel)
223+
224+
expect(result).toMatchObject({
225+
type: "usage",
226+
inputTokens: 100,
227+
outputTokens: 50,
228+
cacheReadTokens: 0,
229+
cacheWriteTokens: 0,
230+
})
231+
})
232+
})
233+
234+
describe("edge cases", () => {
235+
it("should handle undefined usage", () => {
236+
const result = (handler as any).normalizeUsage(undefined, mockModel)
237+
expect(result).toBeUndefined()
238+
})
239+
240+
it("should handle null usage", () => {
241+
const result = (handler as any).normalizeUsage(null, mockModel)
242+
expect(result).toBeUndefined()
243+
})
244+
245+
it("should handle empty usage object", () => {
246+
const usage = {}
247+
248+
const result = (handler as any).normalizeUsage(usage, mockModel)
249+
250+
expect(result).toMatchObject({
251+
type: "usage",
252+
inputTokens: 0,
253+
outputTokens: 0,
254+
cacheReadTokens: 0,
255+
cacheWriteTokens: 0,
256+
})
257+
})
258+
259+
it("should handle missing details but with cache fields", () => {
260+
const usage = {
261+
input_tokens: 100,
262+
output_tokens: 50,
263+
cache_read_input_tokens: 30,
264+
// No input_tokens_details
265+
}
266+
267+
const result = (handler as any).normalizeUsage(usage, mockModel)
268+
269+
expect(result).toMatchObject({
270+
type: "usage",
271+
inputTokens: 100,
272+
outputTokens: 50,
273+
cacheReadTokens: 30,
274+
cacheWriteTokens: 0,
275+
})
276+
})
277+
278+
it("should use all available cache information with proper fallbacks", () => {
279+
const usage = {
280+
input_tokens: 100,
281+
output_tokens: 50,
282+
cached_tokens: 20, // Legacy field (will be used as fallback)
283+
input_tokens_details: {
284+
cached_tokens: 30, // Detailed shape
285+
cache_miss_tokens: 70,
286+
},
287+
}
288+
289+
const result = (handler as any).normalizeUsage(usage, mockModel)
290+
291+
// The implementation uses nullish coalescing, so it will use the first non-nullish value:
292+
// cache_read_input_tokens ?? cache_read_tokens ?? cached_tokens ?? cachedFromDetails
293+
// Since none of the first two exist, it falls back to cached_tokens (20) before cachedFromDetails
294+
expect(result).toMatchObject({
295+
type: "usage",
296+
inputTokens: 100,
297+
outputTokens: 50,
298+
cacheReadTokens: 20, // From cached_tokens (legacy field comes before details in fallback chain)
299+
cacheWriteTokens: 0, // miss tokens are NOT cache writes
300+
})
301+
})
302+
303+
it("should use detailed shapes when legacy fields are not present", () => {
304+
const usage = {
305+
input_tokens: 100,
306+
output_tokens: 50,
307+
// No cached_tokens legacy field
308+
input_tokens_details: {
309+
cached_tokens: 30,
310+
cache_miss_tokens: 70,
311+
},
312+
}
313+
314+
const result = (handler as any).normalizeUsage(usage, mockModel)
315+
316+
expect(result).toMatchObject({
317+
type: "usage",
318+
inputTokens: 100,
319+
outputTokens: 50,
320+
cacheReadTokens: 30, // From details since no legacy field exists
321+
cacheWriteTokens: 0, // miss tokens are NOT cache writes
322+
})
323+
})
324+
325+
it("should handle totals missing with only partial details", () => {
326+
const usage = {
327+
// No input_tokens or prompt_tokens
328+
output_tokens: 50,
329+
input_tokens_details: {
330+
cached_tokens: 30,
331+
// No cache_miss_tokens
332+
},
333+
}
334+
335+
const result = (handler as any).normalizeUsage(usage, mockModel)
336+
337+
expect(result).toMatchObject({
338+
type: "usage",
339+
inputTokens: 30, // Derived from cached_tokens only
340+
outputTokens: 50,
341+
cacheReadTokens: 30,
342+
cacheWriteTokens: 0,
343+
})
344+
})
345+
})
346+
347+
describe("cost calculation", () => {
348+
it("should pass total input tokens to calculateApiCostOpenAI", () => {
349+
const usage = {
350+
input_tokens: 100,
351+
output_tokens: 50,
352+
cache_read_input_tokens: 30,
353+
cache_creation_input_tokens: 20,
354+
}
355+
356+
const result = (handler as any).normalizeUsage(usage, mockModel)
357+
358+
expect(result).toHaveProperty("totalCost")
359+
expect(result.totalCost).toBeGreaterThan(0)
360+
// calculateApiCostOpenAI handles subtracting cache tokens internally
361+
// It will compute: 100 - 30 - 20 = 50 uncached input tokens
362+
})
363+
364+
it("should handle cost calculation with no cache reads", () => {
365+
const usage = {
366+
input_tokens: 100,
367+
output_tokens: 50,
368+
}
369+
370+
const result = (handler as any).normalizeUsage(usage, mockModel)
371+
372+
expect(result).toHaveProperty("totalCost")
373+
expect(result.totalCost).toBeGreaterThan(0)
374+
// Cost should be calculated with full input tokens since no cache reads
375+
})
376+
})
377+
})

0 commit comments

Comments
 (0)