Skip to content

Commit 8729027

Browse files
authored
Fix max tokens in the task header (#3893)
1 parent fa1e7b4 commit 8729027

File tree

6 files changed

+85
-18
lines changed

6 files changed

+85
-18
lines changed

.changeset/ten-bags-hang.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"roo-cline": patch
3+
---
4+
5+
Fix max tokens in task header

src/api/transform/model-params.ts

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,16 +87,20 @@ export function getModelParams({
8787
reasoningEffort = customReasoningEffort ?? model.reasoningEffort
8888
}
8989

90+
// TODO: We should consolidate this logic to compute `maxTokens` with
91+
// `getModelMaxOutputTokens` in order to maintain a single source of truth.
92+
93+
const isAnthropic = format === "anthropic" || (format === "openrouter" && modelId.startsWith("anthropic/"))
94+
9095
// For "Hybrid" reasoning models, we should discard the model's actual
91-
// `maxTokens` value if we're not using reasoning.
92-
if (model.supportsReasoningBudget && !reasoningBudget) {
96+
// `maxTokens` value if we're not using reasoning. We do this for Anthropic
97+
// models only for now. Should we do this for Gemini too?
98+
if (model.supportsReasoningBudget && !reasoningBudget && isAnthropic) {
9399
maxTokens = ANTHROPIC_DEFAULT_MAX_TOKENS
94100
}
95101

96102
// For Anthropic models we should always make sure a `maxTokens` value is
97103
// set.
98-
const isAnthropic = format === "anthropic" || (format === "openrouter" && modelId.startsWith("anthropic/"))
99-
100104
if (!maxTokens && isAnthropic) {
101105
maxTokens = ANTHROPIC_DEFAULT_MAX_TOKENS
102106
}

src/shared/__tests__/api.test.ts

Lines changed: 50 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,11 @@ import {
77
shouldUseReasoningBudget,
88
shouldUseReasoningEffort,
99
} from "../api"
10+
import { ANTHROPIC_DEFAULT_MAX_TOKENS } from "../../api/providers/constants"
1011

1112
describe("getMaxTokensForModel", () => {
13+
const modelId = "test"
14+
1215
/**
1316
* Testing the specific fix in commit cc79178f:
1417
* For thinking models, use apiConfig.modelMaxTokens if available,
@@ -27,7 +30,7 @@ describe("getMaxTokensForModel", () => {
2730
modelMaxTokens: 4000,
2831
}
2932

30-
expect(getModelMaxOutputTokens({ model, settings })).toBe(4000)
33+
expect(getModelMaxOutputTokens({ modelId, model, settings })).toBe(4000)
3134
})
3235

3336
it("should return 16_384 for thinking models when modelMaxTokens not provided", () => {
@@ -40,7 +43,7 @@ describe("getMaxTokensForModel", () => {
4043

4144
const settings = {}
4245

43-
expect(getModelMaxOutputTokens({ model, settings })).toBe(16_384)
46+
expect(getModelMaxOutputTokens({ modelId, model, settings })).toBe(16_384)
4447
})
4548

4649
it("should return 16_384 for thinking models when apiConfig is undefined", () => {
@@ -51,7 +54,7 @@ describe("getMaxTokensForModel", () => {
5154
maxTokens: 8000,
5255
}
5356

54-
expect(getModelMaxOutputTokens({ model, settings: undefined })).toBe(16_384)
57+
expect(getModelMaxOutputTokens({ modelId, model, settings: undefined })).toBe(16_384)
5558
})
5659

5760
it("should return modelInfo.maxTokens for non-thinking models", () => {
@@ -65,7 +68,7 @@ describe("getMaxTokensForModel", () => {
6568
modelMaxTokens: 4000,
6669
}
6770

68-
expect(getModelMaxOutputTokens({ model, settings })).toBe(8000)
71+
expect(getModelMaxOutputTokens({ modelId, model, settings })).toBe(8000)
6972
})
7073

7174
it("should return undefined for non-thinking models with undefined maxTokens", () => {
@@ -78,7 +81,7 @@ describe("getMaxTokensForModel", () => {
7881
modelMaxTokens: 4000,
7982
}
8083

81-
expect(getModelMaxOutputTokens({ model, settings })).toBeUndefined()
84+
expect(getModelMaxOutputTokens({ modelId, model, settings })).toBeUndefined()
8285
})
8386

8487
test("should return maxTokens from modelInfo when thinking is false", () => {
@@ -92,7 +95,7 @@ describe("getMaxTokensForModel", () => {
9295
modelMaxTokens: 4096,
9396
}
9497

95-
const result = getModelMaxOutputTokens({ model, settings })
98+
const result = getModelMaxOutputTokens({ modelId, model, settings })
9699
expect(result).toBe(2048)
97100
})
98101

@@ -108,7 +111,7 @@ describe("getMaxTokensForModel", () => {
108111
modelMaxTokens: 4096,
109112
}
110113

111-
const result = getModelMaxOutputTokens({ model, settings })
114+
const result = getModelMaxOutputTokens({ modelId, model, settings })
112115
expect(result).toBe(4096)
113116
})
114117

@@ -122,7 +125,7 @@ describe("getMaxTokensForModel", () => {
122125

123126
const settings: ProviderSettings = {}
124127

125-
const result = getModelMaxOutputTokens({ model, settings: undefined })
128+
const result = getModelMaxOutputTokens({ modelId, model, settings: undefined })
126129
expect(result).toBe(16_384)
127130
})
128131

@@ -133,7 +136,7 @@ describe("getMaxTokensForModel", () => {
133136
maxTokens: 2048,
134137
}
135138

136-
expect(getModelMaxOutputTokens({ model: modelInfoOnly, settings: undefined })).toBe(2048)
139+
expect(getModelMaxOutputTokens({ modelId, model: modelInfoOnly, settings: undefined })).toBe(2048)
137140
})
138141

139142
test("should handle missing properties gracefully", () => {
@@ -147,15 +150,51 @@ describe("getMaxTokensForModel", () => {
147150
modelMaxTokens: 4096,
148151
}
149152

150-
expect(getModelMaxOutputTokens({ model: modelInfoWithoutMaxTokens, settings })).toBe(4096)
153+
expect(getModelMaxOutputTokens({ modelId, model: modelInfoWithoutMaxTokens, settings })).toBe(4096)
151154

152155
const modelInfoWithoutThinking: ModelInfo = {
153156
contextWindow: 200_000,
154157
supportsPromptCache: true,
155158
maxTokens: 2048,
156159
}
157160

158-
expect(getModelMaxOutputTokens({ model: modelInfoWithoutThinking, settings: undefined })).toBe(2048)
161+
expect(getModelMaxOutputTokens({ modelId, model: modelInfoWithoutThinking, settings: undefined })).toBe(2048)
162+
})
163+
164+
test("should return ANTHROPIC_DEFAULT_MAX_TOKENS for Anthropic models that support reasoning budget but aren't using it", () => {
165+
// Test case for models that support reasoning budget but enableReasoningEffort is false
166+
const anthropicModelId = "claude-sonnet-4-20250514"
167+
const model: ModelInfo = {
168+
contextWindow: 200_000,
169+
supportsPromptCache: true,
170+
supportsReasoningBudget: true,
171+
maxTokens: 64_000, // This should be ignored
172+
}
173+
174+
const settings: ProviderSettings = {
175+
enableReasoningEffort: false, // Not using reasoning
176+
}
177+
178+
const result = getModelMaxOutputTokens({ modelId: anthropicModelId, model, settings })
179+
expect(result).toBe(ANTHROPIC_DEFAULT_MAX_TOKENS) // Should be 8192, not 64_000
180+
})
181+
182+
test("should return model.maxTokens for non-Anthropic models that support reasoning budget but aren't using it", () => {
183+
// Test case for non-Anthropic models - should still use model.maxTokens
184+
const geminiModelId = "gemini-2.5-flash-preview-04-17"
185+
const model: ModelInfo = {
186+
contextWindow: 1_048_576,
187+
supportsPromptCache: false,
188+
supportsReasoningBudget: true,
189+
maxTokens: 65_535,
190+
}
191+
192+
const settings: ProviderSettings = {
193+
enableReasoningEffort: false, // Not using reasoning
194+
}
195+
196+
const result = getModelMaxOutputTokens({ modelId: geminiModelId, model, settings })
197+
expect(result).toBe(65_535) // Should use model.maxTokens, not ANTHROPIC_DEFAULT_MAX_TOKENS
159198
})
160199
})
161200

src/shared/api.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import { ANTHROPIC_DEFAULT_MAX_TOKENS } from "../api/providers/constants"
12
import { ModelInfo, ProviderName, ProviderSettings } from "../schemas"
23

34
export type { ModelInfo, ProviderName, ProviderSettings }
@@ -1936,15 +1937,26 @@ export const DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS = 16_384
19361937
export const DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS = 8_192
19371938

19381939
export const getModelMaxOutputTokens = ({
1940+
modelId,
19391941
model,
19401942
settings,
19411943
}: {
1944+
modelId: string
19421945
model: ModelInfo
19431946
settings?: ProviderSettings
19441947
}): number | undefined => {
19451948
if (shouldUseReasoningBudget({ model, settings })) {
19461949
return settings?.modelMaxTokens || DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS
19471950
}
19481951

1952+
const isAnthropicModel = modelId.includes("claude")
1953+
1954+
// For "Hybrid" reasoning models, we should discard the model's actual
1955+
// `maxTokens` value if we're not using reasoning. We do this for Anthropic
1956+
// models only for now. Should we do this for Gemini too?
1957+
if (model.supportsReasoningBudget && isAnthropicModel) {
1958+
return ANTHROPIC_DEFAULT_MAX_TOKENS
1959+
}
1960+
19491961
return model.maxTokens ?? undefined
19501962
}

webview-ui/src/__tests__/ContextWindowProgress.test.tsx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ jest.mock("@src/components/chat/TaskHeader", () => {
3838
// Mock useSelectedModel hook
3939
jest.mock("@src/components/ui/hooks/useSelectedModel", () => ({
4040
useSelectedModel: jest.fn(() => ({
41+
id: "test",
4142
info: { contextWindow: 4000 },
4243
})),
4344
}))

webview-ui/src/components/chat/TaskHeader.tsx

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ const TaskHeader = ({
4848
}: TaskHeaderProps) => {
4949
const { t } = useTranslation()
5050
const { apiConfiguration, currentTaskItem } = useExtensionState()
51-
const { info: model } = useSelectedModel(apiConfiguration)
51+
const { id: modelId, info: model } = useSelectedModel(apiConfiguration)
5252
const [isTaskExpanded, setIsTaskExpanded] = useState(false)
5353

5454
const textContainerRef = useRef<HTMLDivElement>(null)
@@ -101,7 +101,9 @@ const TaskHeader = ({
101101
contextWindow={contextWindow}
102102
contextTokens={contextTokens || 0}
103103
maxTokens={
104-
model ? getModelMaxOutputTokens({ model, settings: apiConfiguration }) : undefined
104+
model
105+
? getModelMaxOutputTokens({ modelId, model, settings: apiConfiguration })
106+
: undefined
105107
}
106108
/>
107109
{!!totalCost && <VSCodeBadge>${totalCost.toFixed(2)}</VSCodeBadge>}
@@ -140,7 +142,11 @@ const TaskHeader = ({
140142
contextTokens={contextTokens || 0}
141143
maxTokens={
142144
model
143-
? getModelMaxOutputTokens({ model, settings: apiConfiguration })
145+
? getModelMaxOutputTokens({
146+
modelId,
147+
model,
148+
settings: apiConfiguration,
149+
})
144150
: undefined
145151
}
146152
/>

0 commit comments

Comments
 (0)