Skip to content

Commit c52fdc4

Browse files
authored
Clamp default model max tokens to 20% of context window (RooCodeInc#6761)
1 parent 2b647ed commit c52fdc4

File tree

4 files changed

+80
-14
lines changed

4 files changed

+80
-14
lines changed

src/api/providers/__tests__/openrouter.spec.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,11 @@ describe("OpenRouterHandler", () => {
9898
})
9999

100100
const result = await handler.fetchModel()
101-
expect(result.maxTokens).toBe(128000) // Use actual implementation value
102-
expect(result.reasoningBudget).toBeUndefined() // Use actual implementation value
103-
expect(result.temperature).toBe(0) // Use actual implementation value
101+
// With the new clamping logic, 128000 tokens (64% of 200000 context window)
102+
// gets clamped to 20% of context window: 200000 * 0.2 = 40000
103+
expect(result.maxTokens).toBe(40000)
104+
expect(result.reasoningBudget).toBeUndefined()
105+
expect(result.temperature).toBe(0)
104106
})
105107

106108
it("does not honor custom maxTokens for non-thinking models", async () => {

src/api/transform/__tests__/model-params.spec.ts

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -293,12 +293,12 @@ describe("getModelParams", () => {
293293
it("should not honor customMaxThinkingTokens for non-reasoning budget models", () => {
294294
const model: ModelInfo = {
295295
...baseModel,
296-
maxTokens: 4000,
296+
maxTokens: 3000, // 3000 is 18.75% of 16000 context window, within 20% threshold
297297
}
298298

299299
expect(getModelParams({ ...anthropicParams, settings: { modelMaxThinkingTokens: 1500 }, model })).toEqual({
300300
format: anthropicParams.format,
301-
maxTokens: 4000,
301+
maxTokens: 3000, // Uses model.maxTokens since it's within 20% threshold
302302
temperature: 0, // Using default temperature.
303303
reasoningEffort: undefined,
304304
reasoningBudget: undefined, // Should remain undefined despite customMaxThinkingTokens being set.
@@ -565,7 +565,7 @@ describe("getModelParams", () => {
565565
it("should use reasoningEffort if supportsReasoningEffort is false but reasoningEffort is set", () => {
566566
const model: ModelInfo = {
567567
...baseModel,
568-
maxTokens: 8000,
568+
maxTokens: 3000, // Changed to 3000 (18.75% of 16000), which is within 20% threshold
569569
supportsReasoningEffort: false,
570570
reasoningEffort: "medium",
571571
}
@@ -576,7 +576,7 @@ describe("getModelParams", () => {
576576
model,
577577
})
578578

579-
expect(result.maxTokens).toBe(8000)
579+
expect(result.maxTokens).toBe(3000) // Now uses model.maxTokens since it's within 20% threshold
580580
expect(result.reasoningEffort).toBe("medium")
581581
})
582582
})
@@ -595,7 +595,8 @@ describe("getModelParams", () => {
595595
model,
596596
})
597597

598-
// Should discard model's maxTokens and use default
598+
// For hybrid models (supportsReasoningBudget) in Anthropic contexts,
599+
// should discard model's maxTokens and use ANTHROPIC_DEFAULT_MAX_TOKENS
599600
expect(result.maxTokens).toBe(ANTHROPIC_DEFAULT_MAX_TOKENS)
600601
expect(result.reasoningBudget).toBeUndefined()
601602
})

src/shared/__tests__/api.spec.ts

Lines changed: 66 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,13 @@ describe("getModelMaxOutputTokens", () => {
2525
expect(result).toBe(16384)
2626
})
2727

28-
test("should return model maxTokens when not using claude-code provider", () => {
28+
test("should return model maxTokens when not using claude-code provider and maxTokens is within 20% of context window", () => {
2929
const settings: ProviderSettings = {
3030
apiProvider: "anthropic",
3131
}
3232

33+
// mockModel has maxTokens: 8192 and contextWindow: 200000
34+
// 8192 is 4.096% of 200000, which is <= 20%, so it should use model.maxTokens
3335
const result = getModelMaxOutputTokens({
3436
modelId: "claude-3-5-sonnet-20241022",
3537
model: mockModel,
@@ -115,7 +117,7 @@ describe("getModelMaxOutputTokens", () => {
115117
contextWindow: 1_048_576,
116118
supportsPromptCache: false,
117119
supportsReasoningBudget: true,
118-
maxTokens: 65_535,
120+
maxTokens: 65_535, // 65_535 is ~6.25% of 1_048_576, which is <= 20%
119121
}
120122

121123
const settings: ProviderSettings = {
@@ -124,7 +126,68 @@ describe("getModelMaxOutputTokens", () => {
124126
}
125127

126128
const result = getModelMaxOutputTokens({ modelId: geminiModelId, model, settings })
127-
expect(result).toBe(65_535) // Should use model.maxTokens, not ANTHROPIC_DEFAULT_MAX_TOKENS
129+
expect(result).toBe(65_535) // Should use model.maxTokens since it's within 20% threshold
130+
})
131+
132+
test("should clamp maxTokens to 20% of context window when maxTokens exceeds threshold", () => {
133+
const model: ModelInfo = {
134+
contextWindow: 100_000,
135+
supportsPromptCache: false,
136+
maxTokens: 50_000, // 50% of context window, exceeds 20% threshold
137+
}
138+
139+
const settings: ProviderSettings = {
140+
apiProvider: "openai",
141+
}
142+
143+
const result = getModelMaxOutputTokens({
144+
modelId: "gpt-4",
145+
model,
146+
settings,
147+
format: "openai",
148+
})
149+
// Should clamp to 20% of context window: 100_000 * 0.2 = 20_000
150+
expect(result).toBe(20_000)
151+
})
152+
153+
test("should clamp maxTokens to 20% of context window for Anthropic models when maxTokens exceeds threshold", () => {
154+
const model: ModelInfo = {
155+
contextWindow: 100_000,
156+
supportsPromptCache: true,
157+
maxTokens: 50_000, // 50% of context window, exceeds 20% threshold
158+
}
159+
160+
const settings: ProviderSettings = {
161+
apiProvider: "anthropic",
162+
}
163+
164+
const result = getModelMaxOutputTokens({
165+
modelId: "claude-3-5-sonnet-20241022",
166+
model,
167+
settings,
168+
})
169+
// Should clamp to 20% of context window: 100_000 * 0.2 = 20_000
170+
expect(result).toBe(20_000)
171+
})
172+
173+
test("should use model.maxTokens when exactly at 20% threshold", () => {
174+
const model: ModelInfo = {
175+
contextWindow: 100_000,
176+
supportsPromptCache: false,
177+
maxTokens: 20_000, // Exactly 20% of context window
178+
}
179+
180+
const settings: ProviderSettings = {
181+
apiProvider: "openai",
182+
}
183+
184+
const result = getModelMaxOutputTokens({
185+
modelId: "gpt-4",
186+
model,
187+
settings,
188+
format: "openai",
189+
})
190+
expect(result).toBe(20_000) // Should use model.maxTokens since it's exactly at 20%
128191
})
129192

130193
test("should return modelMaxTokens from settings when reasoning budget is required", () => {

src/shared/api.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,9 @@ export const getModelMaxOutputTokens = ({
9090
return ANTHROPIC_DEFAULT_MAX_TOKENS
9191
}
9292

93-
// If model has explicit maxTokens and it's not the full context window, use it
94-
if (model.maxTokens && model.maxTokens !== model.contextWindow) {
95-
return model.maxTokens
93+
// If model has explicit maxTokens, clamp it to 20% of the context window
94+
if (model.maxTokens) {
95+
return Math.min(model.maxTokens, model.contextWindow * 0.2)
9696
}
9797

9898
// For non-Anthropic formats without explicit maxTokens, return undefined

0 commit comments

Comments
 (0)