Skip to content

Commit 15a4c4c

Browse files
authored
Merge pull request RooCodeInc#1305 from RooVetGit/cte/max-tokens-fix
Custom max tokens fix for non-thinking models
2 parents 108e978 + 8a8319d commit 15a4c4c

File tree

7 files changed

+242
-146
lines changed

7 files changed

+242
-146
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"roo-cline": patch
3+
---
4+
5+
Don't honor custom max tokens for non thinking models

src/api/providers/__tests__/anthropic.test.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,5 +194,33 @@ describe("AnthropicHandler", () => {
194194
expect(model.info.supportsImages).toBe(true)
195195
expect(model.info.supportsPromptCache).toBe(true)
196196
})
197+
198+
it("honors custom maxTokens for thinking models", () => {
199+
const handler = new AnthropicHandler({
200+
apiKey: "test-api-key",
201+
apiModelId: "claude-3-7-sonnet-20250219:thinking",
202+
modelMaxTokens: 32_768,
203+
modelMaxThinkingTokens: 16_384,
204+
})
205+
206+
const result = handler.getModel()
207+
expect(result.maxTokens).toBe(32_768)
208+
expect(result.thinking).toEqual({ type: "enabled", budget_tokens: 16_384 })
209+
expect(result.temperature).toBe(1.0)
210+
})
211+
212+
it("does not honor custom maxTokens for non-thinking models", () => {
213+
const handler = new AnthropicHandler({
214+
apiKey: "test-api-key",
215+
apiModelId: "claude-3-7-sonnet-20250219",
216+
modelMaxTokens: 32_768,
217+
modelMaxThinkingTokens: 16_384,
218+
})
219+
220+
const result = handler.getModel()
221+
expect(result.maxTokens).toBe(16_384)
222+
expect(result.thinking).toBeUndefined()
223+
expect(result.temperature).toBe(0)
224+
})
197225
})
198226
})

src/api/providers/__tests__/openrouter.test.ts

Lines changed: 55 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,30 @@
11
// npx jest src/api/providers/__tests__/openrouter.test.ts
22

3-
import { OpenRouterHandler } from "../openrouter"
4-
import { ApiHandlerOptions, ModelInfo } from "../../../shared/api"
5-
import OpenAI from "openai"
63
import axios from "axios"
74
import { Anthropic } from "@anthropic-ai/sdk"
5+
import OpenAI from "openai"
6+
7+
import { OpenRouterHandler } from "../openrouter"
8+
import { ApiHandlerOptions, ModelInfo } from "../../../shared/api"
89

910
// Mock dependencies
1011
jest.mock("openai")
1112
jest.mock("axios")
1213
jest.mock("delay", () => jest.fn(() => Promise.resolve()))
1314

15+
const mockOpenRouterModelInfo: ModelInfo = {
16+
maxTokens: 1000,
17+
contextWindow: 2000,
18+
supportsPromptCache: true,
19+
inputPrice: 0.01,
20+
outputPrice: 0.02,
21+
}
22+
1423
describe("OpenRouterHandler", () => {
1524
const mockOptions: ApiHandlerOptions = {
1625
openRouterApiKey: "test-key",
1726
openRouterModelId: "test-model",
18-
openRouterModelInfo: {
19-
name: "Test Model",
20-
description: "Test Description",
21-
maxTokens: 1000,
22-
contextWindow: 2000,
23-
supportsPromptCache: true,
24-
inputPrice: 0.01,
25-
outputPrice: 0.02,
26-
} as ModelInfo,
27+
openRouterModelInfo: mockOpenRouterModelInfo,
2728
}
2829

2930
beforeEach(() => {
@@ -50,6 +51,10 @@ describe("OpenRouterHandler", () => {
5051
expect(result).toEqual({
5152
id: mockOptions.openRouterModelId,
5253
info: mockOptions.openRouterModelInfo,
54+
maxTokens: 1000,
55+
temperature: 0,
56+
thinking: undefined,
57+
topP: undefined,
5358
})
5459
})
5560

@@ -61,6 +66,38 @@ describe("OpenRouterHandler", () => {
6166
expect(result.info.supportsPromptCache).toBe(true)
6267
})
6368

69+
test("getModel honors custom maxTokens for thinking models", () => {
70+
const handler = new OpenRouterHandler({
71+
openRouterApiKey: "test-key",
72+
openRouterModelId: "test-model",
73+
openRouterModelInfo: {
74+
...mockOpenRouterModelInfo,
75+
maxTokens: 64_000,
76+
thinking: true,
77+
},
78+
modelMaxTokens: 32_768,
79+
modelMaxThinkingTokens: 16_384,
80+
})
81+
82+
const result = handler.getModel()
83+
expect(result.maxTokens).toBe(32_768)
84+
expect(result.thinking).toEqual({ type: "enabled", budget_tokens: 16_384 })
85+
expect(result.temperature).toBe(1.0)
86+
})
87+
88+
test("getModel does not honor custom maxTokens for non-thinking models", () => {
89+
const handler = new OpenRouterHandler({
90+
...mockOptions,
91+
modelMaxTokens: 32_768,
92+
modelMaxThinkingTokens: 16_384,
93+
})
94+
95+
const result = handler.getModel()
96+
expect(result.maxTokens).toBe(1000)
97+
expect(result.thinking).toBeUndefined()
98+
expect(result.temperature).toBe(0)
99+
})
100+
64101
test("createMessage generates correct stream chunks", async () => {
65102
const handler = new OpenRouterHandler(mockOptions)
66103
const mockStream = {
@@ -242,15 +279,7 @@ describe("OpenRouterHandler", () => {
242279

243280
test("completePrompt returns correct response", async () => {
244281
const handler = new OpenRouterHandler(mockOptions)
245-
const mockResponse = {
246-
choices: [
247-
{
248-
message: {
249-
content: "test completion",
250-
},
251-
},
252-
],
253-
}
282+
const mockResponse = { choices: [{ message: { content: "test completion" } }] }
254283

255284
const mockCreate = jest.fn().mockResolvedValue(mockResponse)
256285
;(OpenAI as jest.MockedClass<typeof OpenAI>).prototype.chat = {
@@ -260,10 +289,13 @@ describe("OpenRouterHandler", () => {
260289
const result = await handler.completePrompt("test prompt")
261290

262291
expect(result).toBe("test completion")
292+
263293
expect(mockCreate).toHaveBeenCalledWith({
264294
model: mockOptions.openRouterModelId,
265-
messages: [{ role: "user", content: "test prompt" }],
295+
max_tokens: 1000,
296+
thinking: undefined,
266297
temperature: 0,
298+
messages: [{ role: "user", content: "test prompt" }],
267299
stream: false,
268300
})
269301
})
@@ -292,8 +324,6 @@ describe("OpenRouterHandler", () => {
292324
completions: { create: mockCreate },
293325
} as any
294326

295-
await expect(handler.completePrompt("test prompt")).rejects.toThrow(
296-
"OpenRouter completion error: Unexpected error",
297-
)
327+
await expect(handler.completePrompt("test prompt")).rejects.toThrow("Unexpected error")
298328
})
299329
})

src/api/providers/__tests__/vertex.test.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -890,6 +890,34 @@ describe("VertexHandler", () => {
890890
expect(modelInfo.info.maxTokens).toBe(8192)
891891
expect(modelInfo.info.contextWindow).toBe(1048576)
892892
})
893+
894+
it("honors custom maxTokens for thinking models", () => {
895+
const handler = new VertexHandler({
896+
apiKey: "test-api-key",
897+
apiModelId: "claude-3-7-sonnet@20250219:thinking",
898+
modelMaxTokens: 32_768,
899+
modelMaxThinkingTokens: 16_384,
900+
})
901+
902+
const result = handler.getModel()
903+
expect(result.maxTokens).toBe(32_768)
904+
expect(result.thinking).toEqual({ type: "enabled", budget_tokens: 16_384 })
905+
expect(result.temperature).toBe(1.0)
906+
})
907+
908+
it("does not honor custom maxTokens for non-thinking models", () => {
909+
const handler = new VertexHandler({
910+
apiKey: "test-api-key",
911+
apiModelId: "claude-3-7-sonnet@20250219",
912+
modelMaxTokens: 32_768,
913+
modelMaxThinkingTokens: 16_384,
914+
})
915+
916+
const result = handler.getModel()
917+
expect(result.maxTokens).toBe(16_384)
918+
expect(result.thinking).toBeUndefined()
919+
expect(result.temperature).toBe(0)
920+
})
893921
})
894922

895923
describe("thinking model configuration", () => {

src/api/providers/anthropic.ts

Lines changed: 30 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@ import {
1212
import { ApiHandler, SingleCompletionHandler } from "../index"
1313
import { ApiStream } from "../transform/stream"
1414

15-
const ANTHROPIC_DEFAULT_TEMPERATURE = 0
16-
1715
export class AnthropicHandler implements ApiHandler, SingleCompletionHandler {
1816
private options: ApiHandlerOptions
1917
private client: Anthropic
@@ -30,7 +28,7 @@ export class AnthropicHandler implements ApiHandler, SingleCompletionHandler {
3028
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
3129
let stream: AnthropicStream<Anthropic.Messages.RawMessageStreamEvent>
3230
const cacheControl: CacheControlEphemeral = { type: "ephemeral" }
33-
let { id: modelId, temperature, maxTokens, thinking } = this.getModel()
31+
let { id: modelId, maxTokens, thinking, temperature } = this.getModel()
3432

3533
switch (modelId) {
3634
case "claude-3-7-sonnet-20250219":
@@ -182,55 +180,52 @@ export class AnthropicHandler implements ApiHandler, SingleCompletionHandler {
182180

183181
getModel() {
184182
const modelId = this.options.apiModelId
185-
let temperature = this.options.modelTemperature ?? ANTHROPIC_DEFAULT_TEMPERATURE
186-
let thinking: BetaThinkingConfigParam | undefined = undefined
187183

188-
if (modelId && modelId in anthropicModels) {
189-
let id = modelId as AnthropicModelId
190-
const info: ModelInfo = anthropicModels[id]
184+
const {
185+
modelMaxTokens: customMaxTokens,
186+
modelMaxThinkingTokens: customMaxThinkingTokens,
187+
modelTemperature: customTemperature,
188+
} = this.options
191189

192-
// The `:thinking` variant is a virtual identifier for the
193-
// `claude-3-7-sonnet-20250219` model with a thinking budget.
194-
// We can handle this more elegantly in the future.
195-
if (id === "claude-3-7-sonnet-20250219:thinking") {
196-
id = "claude-3-7-sonnet-20250219"
197-
}
190+
let id = modelId && modelId in anthropicModels ? (modelId as AnthropicModelId) : anthropicDefaultModelId
191+
const info: ModelInfo = anthropicModels[id]
198192

199-
const maxTokens = this.options.modelMaxTokens || info.maxTokens || 8192
193+
// The `:thinking` variant is a virtual identifier for the
194+
// `claude-3-7-sonnet-20250219` model with a thinking budget.
195+
// We can handle this more elegantly in the future.
196+
if (id === "claude-3-7-sonnet-20250219:thinking") {
197+
id = "claude-3-7-sonnet-20250219"
198+
}
200199

201-
if (info.thinking) {
202-
// Anthropic "Thinking" models require a temperature of 1.0.
203-
temperature = 1.0
200+
let maxTokens = info.maxTokens ?? 8192
201+
let thinking: BetaThinkingConfigParam | undefined = undefined
202+
let temperature = customTemperature ?? 0
204203

205-
// Clamp the thinking budget to be at most 80% of max tokens and at
206-
// least 1024 tokens.
207-
const maxBudgetTokens = Math.floor(maxTokens * 0.8)
208-
const budgetTokens = Math.max(
209-
Math.min(this.options.modelMaxThinkingTokens ?? maxBudgetTokens, maxBudgetTokens),
210-
1024,
211-
)
204+
if (info.thinking) {
205+
// Only honor `customMaxTokens` for thinking models.
206+
maxTokens = customMaxTokens ?? maxTokens
212207

213-
thinking = { type: "enabled", budget_tokens: budgetTokens }
214-
}
208+
// Clamp the thinking budget to be at most 80% of max tokens and at
209+
// least 1024 tokens.
210+
const maxBudgetTokens = Math.floor(maxTokens * 0.8)
211+
const budgetTokens = Math.max(Math.min(customMaxThinkingTokens ?? maxBudgetTokens, maxBudgetTokens), 1024)
212+
thinking = { type: "enabled", budget_tokens: budgetTokens }
215213

216-
return { id, info, temperature, maxTokens, thinking }
214+
// Anthropic "Thinking" models require a temperature of 1.0.
215+
temperature = 1.0
217216
}
218217

219-
const id = anthropicDefaultModelId
220-
const info: ModelInfo = anthropicModels[id]
221-
const maxTokens = this.options.modelMaxTokens || info.maxTokens || 8192
222-
223-
return { id, info, temperature, maxTokens, thinking }
218+
return { id, info, maxTokens, thinking, temperature }
224219
}
225220

226221
async completePrompt(prompt: string) {
227-
let { id: modelId, temperature, maxTokens, thinking } = this.getModel()
222+
let { id: modelId, maxTokens, thinking, temperature } = this.getModel()
228223

229224
const message = await this.client.messages.create({
230225
model: modelId,
231226
max_tokens: maxTokens,
232-
temperature,
233227
thinking,
228+
temperature,
234229
messages: [{ role: "user", content: prompt }],
235230
stream: false,
236231
})

0 commit comments

Comments
 (0)