Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/ten-bags-hang.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"roo-cline": patch
---

Fix max tokens in task header
12 changes: 8 additions & 4 deletions src/api/transform/model-params.ts
Original file line number Diff line number Diff line change
Expand Up @@ -87,16 +87,20 @@ export function getModelParams({
reasoningEffort = customReasoningEffort ?? model.reasoningEffort
}

// TODO: We should consolidate this logic to compute `maxTokens` with
// `getModelMaxOutputTokens` in order to maintain a single source of truth.

const isAnthropic = format === "anthropic" || (format === "openrouter" && modelId.startsWith("anthropic/"))

// For "Hybrid" reasoning models, we should discard the model's actual
// `maxTokens` value if we're not using reasoning.
if (model.supportsReasoningBudget && !reasoningBudget) {
// `maxTokens` value if we're not using reasoning. We do this for Anthropic
// models only for now. Should we do this for Gemini too?
if (model.supportsReasoningBudget && !reasoningBudget && isAnthropic) {
maxTokens = ANTHROPIC_DEFAULT_MAX_TOKENS
}

// For Anthropic models we should always make sure a `maxTokens` value is
// set.
const isAnthropic = format === "anthropic" || (format === "openrouter" && modelId.startsWith("anthropic/"))

if (!maxTokens && isAnthropic) {
maxTokens = ANTHROPIC_DEFAULT_MAX_TOKENS
}
Expand Down
61 changes: 50 additions & 11 deletions src/shared/__tests__/api.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@ import {
shouldUseReasoningBudget,
shouldUseReasoningEffort,
} from "../api"
import { ANTHROPIC_DEFAULT_MAX_TOKENS } from "../../api/providers/constants"

describe("getMaxTokensForModel", () => {
const modelId = "test"

/**
* Testing the specific fix in commit cc79178f:
* For thinking models, use apiConfig.modelMaxTokens if available,
Expand All @@ -27,7 +30,7 @@ describe("getMaxTokensForModel", () => {
modelMaxTokens: 4000,
}

expect(getModelMaxOutputTokens({ model, settings })).toBe(4000)
expect(getModelMaxOutputTokens({ modelId, model, settings })).toBe(4000)
})

it("should return 16_384 for thinking models when modelMaxTokens not provided", () => {
Expand All @@ -40,7 +43,7 @@ describe("getMaxTokensForModel", () => {

const settings = {}

expect(getModelMaxOutputTokens({ model, settings })).toBe(16_384)
expect(getModelMaxOutputTokens({ modelId, model, settings })).toBe(16_384)
})

it("should return 16_384 for thinking models when apiConfig is undefined", () => {
Expand All @@ -51,7 +54,7 @@ describe("getMaxTokensForModel", () => {
maxTokens: 8000,
}

expect(getModelMaxOutputTokens({ model, settings: undefined })).toBe(16_384)
expect(getModelMaxOutputTokens({ modelId, model, settings: undefined })).toBe(16_384)
})

it("should return modelInfo.maxTokens for non-thinking models", () => {
Expand All @@ -65,7 +68,7 @@ describe("getMaxTokensForModel", () => {
modelMaxTokens: 4000,
}

expect(getModelMaxOutputTokens({ model, settings })).toBe(8000)
expect(getModelMaxOutputTokens({ modelId, model, settings })).toBe(8000)
})

it("should return undefined for non-thinking models with undefined maxTokens", () => {
Expand All @@ -78,7 +81,7 @@ describe("getMaxTokensForModel", () => {
modelMaxTokens: 4000,
}

expect(getModelMaxOutputTokens({ model, settings })).toBeUndefined()
expect(getModelMaxOutputTokens({ modelId, model, settings })).toBeUndefined()
})

test("should return maxTokens from modelInfo when thinking is false", () => {
Expand All @@ -92,7 +95,7 @@ describe("getMaxTokensForModel", () => {
modelMaxTokens: 4096,
}

const result = getModelMaxOutputTokens({ model, settings })
const result = getModelMaxOutputTokens({ modelId, model, settings })
expect(result).toBe(2048)
})

Expand All @@ -108,7 +111,7 @@ describe("getMaxTokensForModel", () => {
modelMaxTokens: 4096,
}

const result = getModelMaxOutputTokens({ model, settings })
const result = getModelMaxOutputTokens({ modelId, model, settings })
expect(result).toBe(4096)
})

Expand All @@ -122,7 +125,7 @@ describe("getMaxTokensForModel", () => {

const settings: ProviderSettings = {}

const result = getModelMaxOutputTokens({ model, settings: undefined })
const result = getModelMaxOutputTokens({ modelId, model, settings: undefined })
expect(result).toBe(16_384)
})

Expand All @@ -133,7 +136,7 @@ describe("getMaxTokensForModel", () => {
maxTokens: 2048,
}

expect(getModelMaxOutputTokens({ model: modelInfoOnly, settings: undefined })).toBe(2048)
expect(getModelMaxOutputTokens({ modelId, model: modelInfoOnly, settings: undefined })).toBe(2048)
})

test("should handle missing properties gracefully", () => {
Expand All @@ -147,15 +150,51 @@ describe("getMaxTokensForModel", () => {
modelMaxTokens: 4096,
}

expect(getModelMaxOutputTokens({ model: modelInfoWithoutMaxTokens, settings })).toBe(4096)
expect(getModelMaxOutputTokens({ modelId, model: modelInfoWithoutMaxTokens, settings })).toBe(4096)

const modelInfoWithoutThinking: ModelInfo = {
contextWindow: 200_000,
supportsPromptCache: true,
maxTokens: 2048,
}

expect(getModelMaxOutputTokens({ model: modelInfoWithoutThinking, settings: undefined })).toBe(2048)
expect(getModelMaxOutputTokens({ modelId, model: modelInfoWithoutThinking, settings: undefined })).toBe(2048)
})

test("should return ANTHROPIC_DEFAULT_MAX_TOKENS for Anthropic models that support reasoning budget but aren't using it", () => {
// Test case for models that support reasoning budget but enableReasoningEffort is false
const anthropicModelId = "claude-sonnet-4-20250514"
const model: ModelInfo = {
contextWindow: 200_000,
supportsPromptCache: true,
supportsReasoningBudget: true,
maxTokens: 64_000, // This should be ignored
}

const settings: ProviderSettings = {
enableReasoningEffort: false, // Not using reasoning
}

const result = getModelMaxOutputTokens({ modelId: anthropicModelId, model, settings })
expect(result).toBe(ANTHROPIC_DEFAULT_MAX_TOKENS) // Should be 8192, not 64_000
})

test("should return model.maxTokens for non-Anthropic models that support reasoning budget but aren't using it", () => {
// Test case for non-Anthropic models - should still use model.maxTokens
const geminiModelId = "gemini-2.5-flash-preview-04-17"
const model: ModelInfo = {
contextWindow: 1_048_576,
supportsPromptCache: false,
supportsReasoningBudget: true,
maxTokens: 65_535,
}

const settings: ProviderSettings = {
enableReasoningEffort: false, // Not using reasoning
}

const result = getModelMaxOutputTokens({ modelId: geminiModelId, model, settings })
expect(result).toBe(65_535) // Should use model.maxTokens, not ANTHROPIC_DEFAULT_MAX_TOKENS
})
})

Expand Down
12 changes: 12 additions & 0 deletions src/shared/api.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { ANTHROPIC_DEFAULT_MAX_TOKENS } from "../api/providers/constants"
import { ModelInfo, ProviderName, ProviderSettings } from "../schemas"

export type { ModelInfo, ProviderName, ProviderSettings }
Expand Down Expand Up @@ -1936,15 +1937,26 @@ export const DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS = 16_384
export const DEFAULT_HYBRID_REASONING_MODEL_THINKING_TOKENS = 8_192

export const getModelMaxOutputTokens = ({
modelId,
model,
settings,
}: {
modelId: string
model: ModelInfo
settings?: ProviderSettings
}): number | undefined => {
if (shouldUseReasoningBudget({ model, settings })) {
return settings?.modelMaxTokens || DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS
}

const isAnthropicModel = modelId.includes("claude")

// For "Hybrid" reasoning models, we should discard the model's actual
// `maxTokens` value if we're not using reasoning. We do this for Anthropic
// models only for now. Should we do this for Gemini too?
if (model.supportsReasoningBudget && isAnthropicModel) {
return ANTHROPIC_DEFAULT_MAX_TOKENS
}

return model.maxTokens ?? undefined
}
1 change: 1 addition & 0 deletions webview-ui/src/__tests__/ContextWindowProgress.test.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ jest.mock("@src/components/chat/TaskHeader", () => {
// Mock useSelectedModel hook
jest.mock("@src/components/ui/hooks/useSelectedModel", () => ({
useSelectedModel: jest.fn(() => ({
id: "test",
info: { contextWindow: 4000 },
})),
}))
Expand Down
12 changes: 9 additions & 3 deletions webview-ui/src/components/chat/TaskHeader.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ const TaskHeader = ({
}: TaskHeaderProps) => {
const { t } = useTranslation()
const { apiConfiguration, currentTaskItem } = useExtensionState()
const { info: model } = useSelectedModel(apiConfiguration)
const { id: modelId, info: model } = useSelectedModel(apiConfiguration)
const [isTaskExpanded, setIsTaskExpanded] = useState(false)

const textContainerRef = useRef<HTMLDivElement>(null)
Expand Down Expand Up @@ -101,7 +101,9 @@ const TaskHeader = ({
contextWindow={contextWindow}
contextTokens={contextTokens || 0}
maxTokens={
model ? getModelMaxOutputTokens({ model, settings: apiConfiguration }) : undefined
model
? getModelMaxOutputTokens({ modelId, model, settings: apiConfiguration })
: undefined
}
/>
{!!totalCost && <VSCodeBadge>${totalCost.toFixed(2)}</VSCodeBadge>}
Expand Down Expand Up @@ -140,7 +142,11 @@ const TaskHeader = ({
contextTokens={contextTokens || 0}
maxTokens={
model
? getModelMaxOutputTokens({ model, settings: apiConfiguration })
? getModelMaxOutputTokens({
modelId,
model,
settings: apiConfiguration,
})
: undefined
}
/>
Expand Down
Loading