Skip to content

Commit 86a4e5b

Browse files
committed
Add thinking metrics support to GeminiHandler and related components
- Introduced thoughtsTokenCount and thinkingBudget in API responses. - Updated GeminiHandler to handle thinking models and their configurations. - Enhanced ChatView, TaskHeader, and related components to display thinking metrics. - Added tests for thinking metrics in ChatView and TaskHeader.
1 parent c5f48a8 commit 86a4e5b

File tree

14 files changed

+550
-37
lines changed

14 files changed

+550
-37
lines changed

src/api/providers/__tests__/gemini.test.ts

Lines changed: 171 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,37 +5,63 @@ import { Anthropic } from "@anthropic-ai/sdk"
55
import { GeminiHandler } from "../gemini"
66
import { geminiDefaultModelId } from "../../../shared/api"
77

8-
const GEMINI_20_FLASH_THINKING_NAME = "gemini-2.0-flash-thinking-exp-1219"
8+
const GEMINI_THINKING_MODEL = "gemini-2.5-flash-preview-04-17:thinking"
99

1010
describe("GeminiHandler", () => {
1111
let handler: GeminiHandler
12+
let thinkingHandler: GeminiHandler
13+
let proHandler: GeminiHandler
1214

1315
beforeEach(() => {
1416
// Create mock functions
1517
const mockGenerateContentStream = jest.fn()
1618
const mockGenerateContent = jest.fn()
1719
const mockGetGenerativeModel = jest.fn()
1820

21+
// Regular handler without thinking capabilities
1922
handler = new GeminiHandler({
2023
apiKey: "test-key",
21-
apiModelId: GEMINI_20_FLASH_THINKING_NAME,
24+
apiModelId: "gemini-2.5-flash-preview-04-17", // Non-thinking model
2225
geminiApiKey: "test-key",
2326
})
2427

25-
// Replace the client with our mock
26-
handler["client"] = {
28+
// Handler with thinking capabilities
29+
thinkingHandler = new GeminiHandler({
30+
apiKey: "test-key",
31+
apiModelId: GEMINI_THINKING_MODEL,
32+
geminiApiKey: "test-key",
33+
})
34+
35+
// Pro handler with different capabilities and pricing
36+
proHandler = new GeminiHandler({
37+
apiKey: "test-key",
38+
apiModelId: "gemini-2.5-pro-preview-03-25",
39+
geminiApiKey: "test-key",
40+
})
41+
42+
// Replace the clients with our mocks
43+
const mockClient = {
2744
models: {
2845
generateContentStream: mockGenerateContentStream,
2946
generateContent: mockGenerateContent,
3047
getGenerativeModel: mockGetGenerativeModel,
3148
},
3249
} as any
50+
51+
handler["client"] = mockClient
52+
thinkingHandler["client"] = { ...mockClient }
53+
proHandler["client"] = { ...mockClient }
3354
})
3455

3556
describe("constructor", () => {
3657
it("should initialize with provided config", () => {
3758
expect(handler["options"].geminiApiKey).toBe("test-key")
38-
expect(handler["options"].apiModelId).toBe(GEMINI_20_FLASH_THINKING_NAME)
59+
// Regular handler should have non-thinking model
60+
expect(handler["options"].apiModelId).toBe("gemini-2.5-flash-preview-04-17")
61+
// Thinking handler should have thinking model
62+
expect(thinkingHandler["options"].apiModelId).toBe(GEMINI_THINKING_MODEL)
63+
// Pro handler should have pro model
64+
expect(proHandler["options"].apiModelId).toBe("gemini-2.5-pro-preview-03-25")
3965
})
4066
})
4167

@@ -53,17 +79,18 @@ describe("GeminiHandler", () => {
5379

5480
const systemPrompt = "You are a helpful assistant"
5581

56-
it("should handle text messages correctly", async () => {
82+
it("should handle text messages without thinking capabilities correctly", async () => {
5783
// Setup the mock implementation to return an async generator
58-
;(handler["client"].models.generateContentStream as jest.Mock).mockResolvedValue({
84+
const mockGenerateContentStream = handler["client"].models.generateContentStream as jest.Mock
85+
mockGenerateContentStream.mockResolvedValue({
5986
[Symbol.asyncIterator]: async function* () {
6087
yield { text: "Hello" }
6188
yield { text: " world!" }
6289
yield { usageMetadata: { promptTokenCount: 10, candidatesTokenCount: 5 } }
6390
},
6491
})
6592

66-
const stream = handler.createMessage(systemPrompt, mockMessages)
93+
const stream = handler.createMessage(systemPrompt, mockMessages) // Using standard handler without thinking capabilities
6794
const chunks = []
6895

6996
for await (const chunk of stream) {
@@ -84,12 +111,119 @@ describe("GeminiHandler", () => {
84111
type: "usage",
85112
inputTokens: 10,
86113
outputTokens: 5,
114+
thoughtsTokenCount: undefined, // thoughtsTokenCount should be undefined when not thinking
115+
thinkingBudget: undefined, // Added expected field
87116
})
88117

89118
// Verify the call to generateContentStream
119+
expect(thinkingHandler["client"].models.generateContentStream).toHaveBeenCalledWith(
120+
expect.objectContaining({
121+
model: "gemini-2.5-flash-preview-04-17",
122+
config: expect.objectContaining({
123+
temperature: 0,
124+
systemInstruction: systemPrompt,
125+
}),
126+
}),
127+
)
128+
})
129+
130+
it("should handle text messages with thinking capabilities correctly", async () => {
131+
// Setup the mock implementation with thinking tokens for the thinking handler
132+
const mockGenerateContentStream = thinkingHandler["client"].models.generateContentStream as jest.Mock
133+
mockGenerateContentStream.mockResolvedValue({
134+
[Symbol.asyncIterator]: async function* () {
135+
yield { text: "Thinking..." }
136+
yield {
137+
usageMetadata: {
138+
promptTokenCount: 10,
139+
candidatesTokenCount: 5,
140+
thoughtsTokenCount: 25,
141+
},
142+
}
143+
},
144+
})
145+
146+
const stream = thinkingHandler.createMessage(systemPrompt, mockMessages)
147+
const chunks = []
148+
149+
for await (const chunk of stream) {
150+
chunks.push(chunk)
151+
}
152+
153+
// Should have 2 chunks: 'Thinking...' and usage info with thinking tokens
154+
expect(chunks.length).toBe(2)
155+
expect(chunks[0]).toEqual({
156+
type: "text",
157+
text: "Thinking...",
158+
})
159+
expect(chunks[1]).toEqual({
160+
type: "usage",
161+
inputTokens: 10,
162+
outputTokens: 5,
163+
thoughtsTokenCount: 25,
164+
thinkingBudget: 24_576, // From gemini-2.5-flash-preview-04-17:thinking model info
165+
})
166+
167+
// Verify the call includes thinkingConfig
90168
expect(handler["client"].models.generateContentStream).toHaveBeenCalledWith(
91169
expect.objectContaining({
92-
model: GEMINI_20_FLASH_THINKING_NAME,
170+
model: "gemini-2.5-flash-preview-04-17",
171+
config: expect.objectContaining({
172+
temperature: 0,
173+
systemInstruction: systemPrompt,
174+
thinkingConfig: {
175+
thinkingBudget: 24_576,
176+
},
177+
}),
178+
}),
179+
)
180+
})
181+
182+
it("should handle text messages with pro model correctly", async () => {
183+
// Setup the mock implementation for pro model
184+
const mockGenerateContentStream = proHandler["client"].models.generateContentStream as jest.Mock
185+
mockGenerateContentStream.mockResolvedValue({
186+
[Symbol.asyncIterator]: async function* () {
187+
yield { text: "Pro model" }
188+
yield { text: " response" }
189+
yield {
190+
usageMetadata: {
191+
promptTokenCount: 15,
192+
candidatesTokenCount: 8,
193+
},
194+
}
195+
},
196+
})
197+
198+
const stream = proHandler.createMessage(systemPrompt, mockMessages)
199+
const chunks = []
200+
201+
for await (const chunk of stream) {
202+
chunks.push(chunk)
203+
}
204+
205+
// Should have 3 chunks: 'Pro model', ' response', and usage info
206+
expect(chunks.length).toBe(3)
207+
expect(chunks[0]).toEqual({
208+
type: "text",
209+
text: "Pro model",
210+
})
211+
expect(chunks[1]).toEqual({
212+
type: "text",
213+
text: " response",
214+
})
215+
expect(chunks[2]).toEqual({
216+
type: "usage",
217+
inputTokens: 15,
218+
outputTokens: 8,
219+
thoughtsTokenCount: undefined,
220+
thinkingBudget: undefined,
221+
})
222+
223+
// Verify the call to generateContentStream
224+
expect(proHandler["client"].models.generateContentStream).toHaveBeenCalledWith(
225+
expect.objectContaining({
226+
model: "gemini-2.5-pro-preview-03-25",
93227
config: expect.objectContaining({
94228
temperature: 0,
95229
systemInstruction: systemPrompt,
@@ -113,7 +247,7 @@ describe("GeminiHandler", () => {
113247
})
114248

115249
describe("completePrompt", () => {
116-
it("should complete prompt successfully", async () => {
250+
it("should complete prompt successfully with non-thinking model", async () => {
117251
// Mock the response with text property
118252
;(handler["client"].models.generateContent as jest.Mock).mockResolvedValue({
119253
text: "Test response",
@@ -124,7 +258,7 @@ describe("GeminiHandler", () => {
124258

125259
// Verify the call to generateContent
126260
expect(handler["client"].models.generateContent).toHaveBeenCalledWith({
127-
model: GEMINI_20_FLASH_THINKING_NAME,
261+
model: "gemini-2.5-flash-preview-04-17", // Use the non-thinking model ID
128262
contents: [{ role: "user", parts: [{ text: "Test prompt" }] }],
129263
config: {
130264
httpOptions: undefined,
@@ -154,12 +288,34 @@ describe("GeminiHandler", () => {
154288
})
155289

156290
describe("getModel", () => {
157-
it("should return correct model info", () => {
291+
it("should return correct model info for non-thinking model", () => {
158292
const modelInfo = handler.getModel()
159-
expect(modelInfo.id).toBe(GEMINI_20_FLASH_THINKING_NAME)
293+
expect(modelInfo.id).toBe("gemini-2.5-flash-preview-04-17")
294+
expect(modelInfo.info).toBeDefined()
295+
expect(modelInfo.thinkingConfig).toBeUndefined()
296+
expect(modelInfo.info.maxTokens).toBe(65_535)
297+
expect(modelInfo.info.contextWindow).toBe(1_048_576)
298+
})
299+
300+
it("should return correct model info for thinking model", () => {
301+
const modelInfo = thinkingHandler.getModel()
302+
expect(modelInfo.id).toBe("gemini-2.5-flash-preview-04-17")
303+
expect(modelInfo.info).toBeDefined()
304+
expect(modelInfo.thinkingConfig).toBeDefined()
305+
expect(modelInfo.thinkingConfig?.thinkingBudget).toBe(24_576)
306+
expect(modelInfo.info.maxTokens).toBe(65_535)
307+
expect(modelInfo.info.contextWindow).toBe(1_048_576)
308+
})
309+
310+
it("should return correct model info for pro model", () => {
311+
const modelInfo = proHandler.getModel()
312+
expect(modelInfo.id).toBe("gemini-2.5-pro-preview-03-25")
160313
expect(modelInfo.info).toBeDefined()
161-
expect(modelInfo.info.maxTokens).toBe(8192)
162-
expect(modelInfo.info.contextWindow).toBe(32_767)
314+
expect(modelInfo.thinkingConfig).toBeUndefined()
315+
expect(modelInfo.info.maxTokens).toBe(65_535)
316+
expect(modelInfo.info.contextWindow).toBe(1_048_576)
317+
expect(modelInfo.info.inputPrice).toBe(2.5)
318+
expect(modelInfo.info.outputPrice).toBe(15)
163319
})
164320

165321
it("should return default model if invalid model specified", () => {

src/api/providers/gemini.ts

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import { SingleCompletionHandler } from "../"
1010
import type { ApiHandlerOptions, GeminiModelId, ModelInfo } from "../../shared/api"
1111
import { geminiDefaultModelId, geminiModels } from "../../shared/api"
1212
import { convertAnthropicContentToGemini, convertAnthropicMessageToGemini } from "../transform/gemini-format"
13-
import type { ApiStream } from "../transform/stream"
13+
import type { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
1414
import { BaseProvider } from "./base-provider"
1515

1616
export class GeminiHandler extends BaseProvider implements SingleCompletionHandler {
@@ -59,7 +59,9 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
5959
type: "usage",
6060
inputTokens: lastUsageMetadata.promptTokenCount ?? 0,
6161
outputTokens: lastUsageMetadata.candidatesTokenCount ?? 0,
62-
}
62+
thoughtsTokenCount: lastUsageMetadata.thoughtsTokenCount ?? undefined,
63+
thinkingBudget: thinkingConfig?.thinkingBudget,
64+
} satisfies ApiStreamUsageChunk
6365
}
6466
}
6567

@@ -70,20 +72,27 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
7072
maxOutputTokens?: number
7173
} {
7274
let id = this.options.apiModelId ? (this.options.apiModelId as GeminiModelId) : geminiDefaultModelId
73-
let info: ModelInfo = geminiModels[id]
75+
const thinkingSuffix = ":thinking"
7476
let thinkingConfig: ThinkingConfig | undefined = undefined
7577
let maxOutputTokens: number | undefined = undefined
7678

77-
const thinkingSuffix = ":thinking"
79+
// If this is a thinking model, get the info before modifying the ID
80+
// so we can access the maxThinkingTokens value
81+
let info: ModelInfo = geminiModels[id]
82+
const originalInfo = id?.endsWith(thinkingSuffix) ? info : undefined
7883

79-
if (id?.endsWith(thinkingSuffix)) {
84+
if (originalInfo) {
85+
console.log("modelMaxThinkingTokens debug:", {
86+
value: this.options.modelMaxThinkingTokens,
87+
type: typeof this.options.modelMaxThinkingTokens,
88+
infoMaxThinkingTokens: originalInfo.maxThinkingTokens,
89+
})
90+
const maxThinkingTokens = this.options.modelMaxThinkingTokens ?? originalInfo.maxThinkingTokens ?? 4096
91+
thinkingConfig = { thinkingBudget: maxThinkingTokens }
92+
93+
// Remove thinking suffix and get base model info
8094
id = id.slice(0, -thinkingSuffix.length) as GeminiModelId
8195
info = geminiModels[id]
82-
83-
thinkingConfig = this.options.modelMaxThinkingTokens
84-
? { thinkingBudget: this.options.modelMaxThinkingTokens }
85-
: undefined
86-
8796
maxOutputTokens = this.options.modelMaxTokens ?? info.maxTokens ?? undefined
8897
}
8998

src/api/transform/stream.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,6 @@ export interface ApiStreamUsageChunk {
1818
cacheWriteTokens?: number
1919
cacheReadTokens?: number
2020
totalCost?: number // openrouter
21+
thoughtsTokenCount?: number
22+
thinkingBudget?: number
2123
}

src/core/Cline.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1677,6 +1677,8 @@ export class Cline extends EventEmitter<ClineEvents> {
16771677
let inputTokens = 0
16781678
let outputTokens = 0
16791679
let totalCost: number | undefined
1680+
let thoughtsTokenCount: number | undefined
1681+
let thinkingBudget: number | undefined
16801682

16811683
// update api_req_started. we can't use api_req_finished anymore since it's a unique case where it could come after a streaming message (ie in the middle of being updated or executed)
16821684
// fortunately api_req_finished was always parsed out for the gui anyways, so it remains solely for legacy purposes to keep track of prices in tasks from history
@@ -1688,6 +1690,8 @@ export class Cline extends EventEmitter<ClineEvents> {
16881690
tokensOut: outputTokens,
16891691
cacheWrites: cacheWriteTokens,
16901692
cacheReads: cacheReadTokens,
1693+
thoughtsTokenCount,
1694+
thinkingBudget: thinkingBudget,
16911695
cost:
16921696
totalCost ??
16931697
calculateApiCostAnthropic(
@@ -1781,6 +1785,10 @@ export class Cline extends EventEmitter<ClineEvents> {
17811785
cacheWriteTokens += chunk.cacheWriteTokens ?? 0
17821786
cacheReadTokens += chunk.cacheReadTokens ?? 0
17831787
totalCost = chunk.totalCost
1788+
if (typeof chunk.thoughtsTokenCount === "number") {
1789+
thoughtsTokenCount = (thoughtsTokenCount ?? 0) + chunk.thoughtsTokenCount
1790+
}
1791+
thinkingBudget = chunk.thinkingBudget
17841792
break
17851793
case "text":
17861794
assistantMessage += chunk.text

src/exports/roo-code.d.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,8 @@ type TokenUsage = {
432432
totalCacheReads?: number | undefined
433433
totalCost: number
434434
contextTokens: number
435+
thoughtsTokenCount?: number | undefined
436+
thinkingBudget?: number | undefined
435437
}
436438

437439
type RooCodeEvents = {
@@ -524,6 +526,8 @@ type RooCodeEvents = {
524526
totalCacheReads?: number | undefined
525527
totalCost: number
526528
contextTokens: number
529+
thoughtsTokenCount?: number | undefined
530+
thinkingBudget?: number | undefined
527531
},
528532
{
529533
[x: string]: {
@@ -541,6 +545,8 @@ type RooCodeEvents = {
541545
totalCacheReads?: number | undefined
542546
totalCost: number
543547
contextTokens: number
548+
thoughtsTokenCount?: number | undefined
549+
thinkingBudget?: number | undefined
544550
},
545551
]
546552
}

0 commit comments

Comments
 (0)