Skip to content

Commit ebb5a57

Browse files
authored
Merge pull request RooCodeInc#1558 from RooVetGit/fix_open_ai_usage
Fix usage tracking for SiliconFlow etc
2 parents 13be66f + f306461 commit ebb5a57

File tree

3 files changed

+247
-1
lines changed

3 files changed

+247
-1
lines changed

.changeset/tidy-queens-pay.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"roo-cline": patch
3+
---
4+
5+
Fix usage tracking for SiliconFlow etc
Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
import { OpenAiHandler } from "../openai"
2+
import { ApiHandlerOptions } from "../../../shared/api"
3+
import { Anthropic } from "@anthropic-ai/sdk"
4+
5+
// Mock OpenAI client with multiple chunks that contain usage data
6+
const mockCreate = jest.fn()
7+
jest.mock("openai", () => {
8+
return {
9+
__esModule: true,
10+
default: jest.fn().mockImplementation(() => ({
11+
chat: {
12+
completions: {
13+
create: mockCreate.mockImplementation(async (options) => {
14+
if (!options.stream) {
15+
return {
16+
id: "test-completion",
17+
choices: [
18+
{
19+
message: { role: "assistant", content: "Test response", refusal: null },
20+
finish_reason: "stop",
21+
index: 0,
22+
},
23+
],
24+
usage: {
25+
prompt_tokens: 10,
26+
completion_tokens: 5,
27+
total_tokens: 15,
28+
},
29+
}
30+
}
31+
32+
// Return a stream with multiple chunks that include usage metrics
33+
return {
34+
[Symbol.asyncIterator]: async function* () {
35+
// First chunk with partial usage
36+
yield {
37+
choices: [
38+
{
39+
delta: { content: "Test " },
40+
index: 0,
41+
},
42+
],
43+
usage: {
44+
prompt_tokens: 10,
45+
completion_tokens: 2,
46+
total_tokens: 12,
47+
},
48+
}
49+
50+
// Second chunk with updated usage
51+
yield {
52+
choices: [
53+
{
54+
delta: { content: "response" },
55+
index: 0,
56+
},
57+
],
58+
usage: {
59+
prompt_tokens: 10,
60+
completion_tokens: 4,
61+
total_tokens: 14,
62+
},
63+
}
64+
65+
// Final chunk with complete usage
66+
yield {
67+
choices: [
68+
{
69+
delta: {},
70+
index: 0,
71+
},
72+
],
73+
usage: {
74+
prompt_tokens: 10,
75+
completion_tokens: 5,
76+
total_tokens: 15,
77+
},
78+
}
79+
},
80+
}
81+
}),
82+
},
83+
},
84+
})),
85+
}
86+
})
87+
88+
describe("OpenAiHandler with usage tracking fix", () => {
89+
let handler: OpenAiHandler
90+
let mockOptions: ApiHandlerOptions
91+
92+
beforeEach(() => {
93+
mockOptions = {
94+
openAiApiKey: "test-api-key",
95+
openAiModelId: "gpt-4",
96+
openAiBaseUrl: "https://api.openai.com/v1",
97+
}
98+
handler = new OpenAiHandler(mockOptions)
99+
mockCreate.mockClear()
100+
})
101+
102+
describe("usage metrics with streaming", () => {
103+
const systemPrompt = "You are a helpful assistant."
104+
const messages: Anthropic.Messages.MessageParam[] = [
105+
{
106+
role: "user",
107+
content: [
108+
{
109+
type: "text" as const,
110+
text: "Hello!",
111+
},
112+
],
113+
},
114+
]
115+
116+
it("should only yield usage metrics once at the end of the stream", async () => {
117+
const stream = handler.createMessage(systemPrompt, messages)
118+
const chunks: any[] = []
119+
for await (const chunk of stream) {
120+
chunks.push(chunk)
121+
}
122+
123+
// Check we have text chunks
124+
const textChunks = chunks.filter((chunk) => chunk.type === "text")
125+
expect(textChunks).toHaveLength(2)
126+
expect(textChunks[0].text).toBe("Test ")
127+
expect(textChunks[1].text).toBe("response")
128+
129+
// Check we only have one usage chunk and it's the last one
130+
const usageChunks = chunks.filter((chunk) => chunk.type === "usage")
131+
expect(usageChunks).toHaveLength(1)
132+
expect(usageChunks[0]).toEqual({
133+
type: "usage",
134+
inputTokens: 10,
135+
outputTokens: 5,
136+
})
137+
138+
// Check the usage chunk is the last one reported from the API
139+
const lastChunk = chunks[chunks.length - 1]
140+
expect(lastChunk.type).toBe("usage")
141+
expect(lastChunk.inputTokens).toBe(10)
142+
expect(lastChunk.outputTokens).toBe(5)
143+
})
144+
145+
it("should handle case where usage is only in the final chunk", async () => {
146+
// Override the mock for this specific test
147+
mockCreate.mockImplementationOnce(async (options) => {
148+
if (!options.stream) {
149+
return {
150+
id: "test-completion",
151+
choices: [{ message: { role: "assistant", content: "Test response" } }],
152+
usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 },
153+
}
154+
}
155+
156+
return {
157+
[Symbol.asyncIterator]: async function* () {
158+
// First chunk with no usage
159+
yield {
160+
choices: [{ delta: { content: "Test " }, index: 0 }],
161+
usage: null,
162+
}
163+
164+
// Second chunk with no usage
165+
yield {
166+
choices: [{ delta: { content: "response" }, index: 0 }],
167+
usage: null,
168+
}
169+
170+
// Final chunk with usage data
171+
yield {
172+
choices: [{ delta: {}, index: 0 }],
173+
usage: {
174+
prompt_tokens: 10,
175+
completion_tokens: 5,
176+
total_tokens: 15,
177+
},
178+
}
179+
},
180+
}
181+
})
182+
183+
const stream = handler.createMessage(systemPrompt, messages)
184+
const chunks: any[] = []
185+
for await (const chunk of stream) {
186+
chunks.push(chunk)
187+
}
188+
189+
// Check usage metrics
190+
const usageChunks = chunks.filter((chunk) => chunk.type === "usage")
191+
expect(usageChunks).toHaveLength(1)
192+
expect(usageChunks[0]).toEqual({
193+
type: "usage",
194+
inputTokens: 10,
195+
outputTokens: 5,
196+
})
197+
})
198+
199+
it("should handle case where no usage is provided", async () => {
200+
// Override the mock for this specific test
201+
mockCreate.mockImplementationOnce(async (options) => {
202+
if (!options.stream) {
203+
return {
204+
id: "test-completion",
205+
choices: [{ message: { role: "assistant", content: "Test response" } }],
206+
usage: null,
207+
}
208+
}
209+
210+
return {
211+
[Symbol.asyncIterator]: async function* () {
212+
yield {
213+
choices: [{ delta: { content: "Test response" }, index: 0 }],
214+
usage: null,
215+
}
216+
yield {
217+
choices: [{ delta: {}, index: 0 }],
218+
usage: null,
219+
}
220+
},
221+
}
222+
})
223+
224+
const stream = handler.createMessage(systemPrompt, messages)
225+
const chunks: any[] = []
226+
for await (const chunk of stream) {
227+
chunks.push(chunk)
228+
}
229+
230+
// Check we don't have any usage chunks
231+
const usageChunks = chunks.filter((chunk) => chunk.type === "usage")
232+
expect(usageChunks).toHaveLength(0)
233+
})
234+
})
235+
})

src/api/providers/openai.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,8 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
9999

100100
const stream = await this.client.chat.completions.create(requestOptions)
101101

102+
let lastUsage
103+
102104
for await (const chunk of stream) {
103105
const delta = chunk.choices[0]?.delta ?? {}
104106

@@ -116,9 +118,13 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
116118
}
117119
}
118120
if (chunk.usage) {
119-
yield this.processUsageMetrics(chunk.usage, modelInfo)
121+
lastUsage = chunk.usage
120122
}
121123
}
124+
125+
if (lastUsage) {
126+
yield this.processUsageMetrics(lastUsage, modelInfo)
127+
}
122128
} else {
123129
// o1 for instance doesnt support streaming, non-1 temp, or system prompt
124130
const systemMessage: OpenAI.Chat.ChatCompletionUserMessageParam = {

0 commit comments

Comments
 (0)