Skip to content

Commit 1c1a42c

Browse files
committed
feat: Add Advanced Setting for Custom Max Tokens per Provider Profile (#5784)
1 parent 6745c8f commit 1c1a42c

File tree

23 files changed

+437
-1
lines changed

23 files changed

+437
-1
lines changed

src/shared/__tests__/api.spec.ts

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,70 @@ describe("getModelMaxOutputTokens", () => {
154154

155155
expect(getModelMaxOutputTokens({ modelId: "test", model, settings })).toBe(16_384)
156156
})
157+
158+
test("should use user-configured modelMaxTokens for non-reasoning models", () => {
159+
const settings: ProviderSettings = {
160+
modelMaxTokens: 16000,
161+
}
162+
163+
const result = getModelMaxOutputTokens({
164+
modelId: "gpt-4",
165+
model: mockModel,
166+
settings,
167+
})
168+
169+
expect(result).toBe(16000)
170+
})
171+
172+
test("should ignore modelMaxTokens when it's 0 or negative", () => {
173+
const settings: ProviderSettings = {
174+
modelMaxTokens: 0,
175+
}
176+
177+
const result = getModelMaxOutputTokens({
178+
modelId: "claude-3-5-sonnet",
179+
model: mockModel,
180+
settings,
181+
})
182+
183+
// Should fall back to model's maxTokens
184+
expect(result).toBe(8192)
185+
})
186+
187+
test("should prioritize user-configured modelMaxTokens over model's default", () => {
188+
const modelWithHighMaxTokens: ModelInfo = {
189+
maxTokens: 64000,
190+
contextWindow: 200000,
191+
supportsPromptCache: true,
192+
}
193+
194+
const settings: ProviderSettings = {
195+
modelMaxTokens: 32000,
196+
}
197+
198+
const result = getModelMaxOutputTokens({
199+
modelId: "some-model",
200+
model: modelWithHighMaxTokens,
201+
settings,
202+
})
203+
204+
expect(result).toBe(32000)
205+
})
206+
207+
test("should use modelMaxTokens even for Anthropic models when configured", () => {
208+
const settings: ProviderSettings = {
209+
modelMaxTokens: 20000,
210+
}
211+
212+
const result = getModelMaxOutputTokens({
213+
modelId: "claude-3-5-sonnet",
214+
model: mockModel,
215+
settings,
216+
format: "anthropic",
217+
})
218+
219+
expect(result).toBe(20000)
220+
})
157221
})
158222

159223
describe("shouldUseReasoningBudget", () => {

src/shared/api.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,14 @@ export const getModelMaxOutputTokens = ({
7070
return settings.claudeCodeMaxOutputTokens || CLAUDE_CODE_DEFAULT_MAX_OUTPUT_TOKENS
7171
}
7272

73+
// Check for user-configured modelMaxTokens FIRST (new logic)
74+
if (settings?.modelMaxTokens && settings.modelMaxTokens > 0) {
75+
return settings.modelMaxTokens
76+
}
77+
78+
// Existing reasoning budget logic
7379
if (shouldUseReasoningBudget({ model, settings })) {
74-
return settings?.modelMaxTokens || DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS
80+
return DEFAULT_HYBRID_REASONING_MODEL_MAX_TOKENS
7581
}
7682

7783
const isAnthropicContext =

webview-ui/src/components/settings/ApiOptions.tsx

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ import { DiffSettingsControl } from "./DiffSettingsControl"
8181
import { TemperatureControl } from "./TemperatureControl"
8282
import { RateLimitSecondsControl } from "./RateLimitSecondsControl"
8383
import { ConsecutiveMistakeLimitControl } from "./ConsecutiveMistakeLimitControl"
84+
import { MaxTokensControl } from "./MaxTokensControl"
8485
import { BedrockCustomArn } from "./providers/BedrockCustomArn"
8586
import { buildDocLink } from "@src/utils/docLinks"
8687

@@ -574,6 +575,13 @@ const ApiOptions = ({
574575
onChange={handleInputChange("modelTemperature", noTransform)}
575576
maxValue={2}
576577
/>
578+
<MaxTokensControl
579+
value={apiConfiguration.modelMaxTokens}
580+
onChange={(value) => setApiConfigurationField("modelMaxTokens", value)}
581+
modelInfo={selectedModelInfo}
582+
minValue={1000}
583+
maxValue={selectedModelInfo?.maxTokens || 200000}
584+
/>
577585
<RateLimitSecondsControl
578586
value={apiConfiguration.rateLimitSeconds || 0}
579587
onChange={(value) => setApiConfigurationField("rateLimitSeconds", value)}
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
import React from "react"
2+
import { useAppTranslation } from "@/i18n/TranslationContext"
3+
import { ModelInfo } from "@roo-code/types"
4+
import { Input } from "@/components/ui"
5+
6+
interface MaxTokensControlProps {
7+
value?: number
8+
onChange: (value: number | undefined) => void
9+
modelInfo?: ModelInfo
10+
minValue?: number
11+
maxValue?: number
12+
className?: string
13+
}
14+
15+
export const MaxTokensControl: React.FC<MaxTokensControlProps> = ({
16+
value,
17+
onChange,
18+
modelInfo,
19+
minValue = 1000,
20+
maxValue = 200000,
21+
className,
22+
}) => {
23+
const { t } = useAppTranslation()
24+
25+
const handleChange = (e: React.ChangeEvent<HTMLInputElement>) => {
26+
const inputValue = e.target.value
27+
if (inputValue === "") {
28+
onChange(undefined)
29+
return
30+
}
31+
32+
const numValue = parseInt(inputValue, 10)
33+
if (!isNaN(numValue)) {
34+
onChange(numValue)
35+
}
36+
}
37+
38+
const effectiveMaxValue = modelInfo?.maxTokens || maxValue
39+
const displayValue = value ?? 8192
40+
41+
const isValueTooHigh = displayValue > effectiveMaxValue
42+
const isValueTooLow = displayValue < minValue
43+
const hasError = isValueTooHigh || isValueTooLow
44+
45+
return (
46+
<div className={`flex flex-col gap-1 ${className || ""}`}>
47+
<label htmlFor="max-output-tokens" className="block font-medium mb-1">
48+
{t("settings:providers.maxOutputTokens.label")}
49+
</label>
50+
<Input
51+
id="max-output-tokens"
52+
type="number"
53+
value={displayValue}
54+
onChange={handleChange}
55+
min={minValue}
56+
max={effectiveMaxValue}
57+
className={`w-full ${hasError ? "border-red-500 focus:border-red-500" : ""}`}
58+
/>
59+
<div className="text-sm text-vscode-descriptionForeground">
60+
{t("settings:providers.maxOutputTokens.description")}
61+
</div>
62+
{isValueTooHigh && (
63+
<div className="text-sm text-red-500">
64+
{t("settings:providers.maxOutputTokens.validation.tooHigh", { max: effectiveMaxValue })}
65+
</div>
66+
)}
67+
{isValueTooLow && (
68+
<div className="text-sm text-red-500">
69+
{t("settings:providers.maxOutputTokens.validation.tooLow", { min: minValue })}
70+
</div>
71+
)}
72+
{modelInfo && !hasError && (
73+
<div className="text-sm text-vscode-descriptionForeground">
74+
{t("settings:providers.maxOutputTokens.modelSupports", { max: modelInfo.maxTokens })}
75+
</div>
76+
)}
77+
</div>
78+
)
79+
}
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
import { describe, test, expect, vi } from "vitest"
2+
import { render, screen, fireEvent } from "@testing-library/react"
3+
import { MaxTokensControl } from "../MaxTokensControl"
4+
import { ModelInfo } from "@roo-code/types"
5+
6+
// Mock the translation hook
7+
vi.mock("@/i18n/TranslationContext", () => ({
8+
useAppTranslation: () => ({
9+
t: (key: string, params?: any) => {
10+
if (params) {
11+
return key.replace(/\{\{(\w+)\}\}/g, (_, p) => params[p])
12+
}
13+
return key
14+
},
15+
}),
16+
}))
17+
18+
describe("MaxTokensControl", () => {
19+
const mockOnChange = vi.fn()
20+
const defaultProps = {
21+
onChange: mockOnChange,
22+
}
23+
24+
beforeEach(() => {
25+
mockOnChange.mockClear()
26+
})
27+
28+
test("should render with default value of 8192", () => {
29+
render(<MaxTokensControl {...defaultProps} />)
30+
31+
const input = screen.getByRole("spinbutton") as HTMLInputElement
32+
expect(input.value).toBe("8192")
33+
})
34+
35+
test("should render with provided value", () => {
36+
render(<MaxTokensControl {...defaultProps} value={16000} />)
37+
38+
const input = screen.getByRole("spinbutton") as HTMLInputElement
39+
expect(input.value).toBe("16000")
40+
})
41+
42+
test("should call onChange when value changes", () => {
43+
render(<MaxTokensControl {...defaultProps} />)
44+
45+
const input = screen.getByRole("spinbutton")
46+
fireEvent.change(input, { target: { value: "20000" } })
47+
48+
expect(mockOnChange).toHaveBeenCalledWith(20000)
49+
})
50+
51+
test("should call onChange with undefined when input is cleared", () => {
52+
render(<MaxTokensControl {...defaultProps} value={16000} />)
53+
54+
const input = screen.getByRole("spinbutton")
55+
fireEvent.change(input, { target: { value: "" } })
56+
57+
expect(mockOnChange).toHaveBeenCalledWith(undefined)
58+
})
59+
60+
test("should show validation error when value exceeds model max", () => {
61+
const modelInfo: ModelInfo = {
62+
maxTokens: 10000,
63+
contextWindow: 100000,
64+
supportsPromptCache: true,
65+
}
66+
67+
render(<MaxTokensControl {...defaultProps} value={15000} modelInfo={modelInfo} />)
68+
69+
expect(screen.getByText("settings:providers.maxOutputTokens.validation.tooHigh")).toBeInTheDocument()
70+
})
71+
72+
test("should show validation error when value is below minimum", () => {
73+
render(<MaxTokensControl {...defaultProps} value={500} minValue={1000} />)
74+
75+
expect(screen.getByText("settings:providers.maxOutputTokens.validation.tooLow")).toBeInTheDocument()
76+
})
77+
78+
test("should show model support message when valid", () => {
79+
const modelInfo: ModelInfo = {
80+
maxTokens: 64000,
81+
contextWindow: 200000,
82+
supportsPromptCache: true,
83+
}
84+
85+
render(<MaxTokensControl {...defaultProps} value={8192} modelInfo={modelInfo} />)
86+
87+
expect(screen.getByText("settings:providers.maxOutputTokens.modelSupports")).toBeInTheDocument()
88+
})
89+
90+
test("should use custom min and max values", () => {
91+
render(<MaxTokensControl {...defaultProps} minValue={2000} maxValue={50000} />)
92+
93+
const input = screen.getByRole("spinbutton") as HTMLInputElement
94+
expect(input.min).toBe("2000")
95+
expect(input.max).toBe("50000")
96+
})
97+
98+
test("should use model's maxTokens as max value when available", () => {
99+
const modelInfo: ModelInfo = {
100+
maxTokens: 32000,
101+
contextWindow: 100000,
102+
supportsPromptCache: true,
103+
}
104+
105+
render(<MaxTokensControl {...defaultProps} modelInfo={modelInfo} />)
106+
107+
const input = screen.getByRole("spinbutton") as HTMLInputElement
108+
expect(input.max).toBe("32000")
109+
})
110+
111+
test("should apply error styling when validation fails", () => {
112+
render(<MaxTokensControl {...defaultProps} value={500} minValue={1000} />)
113+
114+
const input = screen.getByRole("spinbutton")
115+
expect(input.className).toContain("border-red-500")
116+
})
117+
})

webview-ui/src/i18n/locales/ca/settings.json

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,15 @@
386386
"placeholder": "Per defecte: claude",
387387
"maxTokensLabel": "Tokens màxims de sortida",
388388
"maxTokensDescription": "Nombre màxim de tokens de sortida per a les respostes de Claude Code. El valor per defecte és 8000."
389+
},
390+
"maxOutputTokens": {
391+
"label": "Tokens màxims de sortida",
392+
"description": "Nombre màxim de tokens a reservar per a la sortida del model. El valor predeterminat és 8192.",
393+
"validation": {
394+
"tooHigh": "El valor excedeix el màxim del model de {{max}} tokens",
395+
"tooLow": "El valor ha de ser almenys {{min}} tokens"
396+
},
397+
"modelSupports": "Aquest model admet fins a {{max}} tokens"
389398
}
390399
},
391400
"browser": {

webview-ui/src/i18n/locales/de/settings.json

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,15 @@
386386
"placeholder": "Standard: claude",
387387
"maxTokensLabel": "Maximale Ausgabe-Tokens",
388388
"maxTokensDescription": "Maximale Anzahl an Ausgabe-Tokens für Claude Code-Antworten. Standard ist 8000."
389+
},
390+
"maxOutputTokens": {
391+
"label": "Maximale Ausgabe-Token",
392+
"description": "Maximale Anzahl von Token, die für die Modellausgabe reserviert werden sollen. Standard ist 8192.",
393+
"validation": {
394+
"tooHigh": "Wert überschreitet das Maximum des Modells von {{max}} Token",
395+
"tooLow": "Wert muss mindestens {{min}} Token betragen"
396+
},
397+
"modelSupports": "Dieses Modell unterstützt bis zu {{max}} Token"
389398
}
390399
},
391400
"browser": {

webview-ui/src/i18n/locales/en/settings.json

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,15 @@
367367
"label": "Rate limit",
368368
"description": "Minimum time between API requests."
369369
},
370+
"maxOutputTokens": {
371+
"label": "Max Output Tokens",
372+
"description": "Maximum number of tokens to reserve for model output. Default is 8192.",
373+
"validation": {
374+
"tooHigh": "Value exceeds model's maximum of {{max}} tokens",
375+
"tooLow": "Value must be at least {{min}} tokens"
376+
},
377+
"modelSupports": "This model supports up to {{max}} tokens"
378+
},
370379
"consecutiveMistakeLimit": {
371380
"label": "Error & Repetition Limit",
372381
"description": "Number of consecutive errors or repeated actions before showing 'Roo is having trouble' dialog",

webview-ui/src/i18n/locales/es/settings.json

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,15 @@
367367
"label": "Límite de tasa",
368368
"description": "Tiempo mínimo entre solicitudes de API."
369369
},
370+
"maxOutputTokens": {
371+
"label": "Tokens máximos de salida",
372+
"description": "Número máximo de tokens a reservar para la salida del modelo. El valor predeterminado es 8192.",
373+
"validation": {
374+
"tooHigh": "El valor excede el máximo del modelo de {{max}} tokens",
375+
"tooLow": "El valor debe ser al menos {{min}} tokens"
376+
},
377+
"modelSupports": "Este modelo admite hasta {{max}} tokens"
378+
},
370379
"consecutiveMistakeLimit": {
371380
"label": "Límite de errores y repeticiones",
372381
"description": "Número de errores consecutivos o acciones repetidas antes de mostrar el diálogo 'Roo está teniendo problemas'",

webview-ui/src/i18n/locales/fr/settings.json

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,15 @@
386386
"placeholder": "Défaut : claude",
387387
"maxTokensLabel": "Jetons de sortie max",
388388
"maxTokensDescription": "Nombre maximum de jetons de sortie pour les réponses de Claude Code. La valeur par défaut est 8000."
389+
},
390+
"maxOutputTokens": {
391+
"label": "Tokens de sortie maximum",
392+
"description": "Nombre maximum de tokens à réserver pour la sortie du modèle. La valeur par défaut est 8192.",
393+
"validation": {
394+
"tooHigh": "La valeur dépasse le maximum du modèle de {{max}} tokens",
395+
"tooLow": "La valeur doit être d'au moins {{min}} tokens"
396+
},
397+
"modelSupports": "Ce modèle prend en charge jusqu'à {{max}} tokens"
389398
}
390399
},
391400
"browser": {

0 commit comments

Comments
 (0)