Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
efaaed9
feat: add Issue Fixer Orchestrator mode
MuriloFP Jul 3, 2025
57d3fbe
Merge branch 'RooCodeInc:main' into main
MuriloFP Jul 3, 2025
ef61905
Merge branch 'RooCodeInc:main' into main
MuriloFP Jul 4, 2025
f5a51c4
Merge branch 'RooCodeInc:main' into main
MuriloFP Jul 4, 2025
bcbf329
Merge branch 'RooCodeInc:main' into main
MuriloFP Jul 5, 2025
80413c0
Merge branch 'RooCodeInc:main' into main
MuriloFP Jul 5, 2025
ab10140
Merge branch 'RooCodeInc:main' into main
MuriloFP Jul 7, 2025
39c5cf7
Merge branch 'RooCodeInc:main' into main
MuriloFP Jul 7, 2025
00a0b63
Merge branch 'RooCodeInc:main' into main
MuriloFP Jul 8, 2025
080b61b
Merge branch 'RooCodeInc:main' into main
MuriloFP Jul 8, 2025
7a5ad14
Merge branch 'RooCodeInc:main' into main
MuriloFP Jul 8, 2025
2c73ff2
Merge branch 'RooCodeInc:main' into main
MuriloFP Jul 9, 2025
05ccf57
Merge branch 'RooCodeInc:main' into main
MuriloFP Jul 10, 2025
fdb1f35
Merge branch 'RooCodeInc:main' into main
MuriloFP Jul 11, 2025
10ce509
Merge branch 'RooCodeInc:main' into main
MuriloFP Jul 14, 2025
ab1f9fc
Merge branch 'RooCodeInc:main' into main
MuriloFP Jul 15, 2025
74fd8b4
Merge branch 'RooCodeInc:main' into main
MuriloFP Jul 15, 2025
6745c8f
Merge branch 'RooCodeInc:main' into main
MuriloFP Jul 16, 2025
1c1a42c
feat: Add Advanced Setting for Custom Max Tokens per Provider Profile…
MuriloFP Jul 16, 2025
28b2f56
fix: add missing Input mock to ApiOptions test suite
MuriloFP Jul 16, 2025
4812569
fix: update provider tests to expect custom maxTokens for all models
MuriloFP Jul 16, 2025
1829d51
Update webview-ui/src/i18n/locales/tr/settings.json
MuriloFP Jul 16, 2025
08487e4
fix(settings): address PR review feedback for custom max tokens feature
MuriloFP Jul 17, 2025
714da87
test: update provider tests to expect capped maxTokens values
MuriloFP Jul 17, 2025
e6cc00b
fix(ui): allow clearing max tokens input field before entering new value
MuriloFP Jul 21, 2025
3a527b7
fix: remove Max Tokens slider from ThinkingBudget component
MuriloFP Jul 24, 2025
9c2fb52
fix: allow thinking tokens slider to go from 0 to 80% of max tokens
MuriloFP Jul 24, 2025
1e76736
fix: increase min max output tokens to 2048 to support reasoning models
MuriloFP Jul 24, 2025
ca0fb2d
fix: enforce minimum thinking tokens of 1024 in UI slider
MuriloFP Jul 24, 2025
83522e2
fix: replace MaxTokensControl input with MaxTokensSlider
MuriloFP Jul 29, 2025
6a4653a
fix: ensure OpenAI-compatible providers use custom max tokens setting
MuriloFP Jul 31, 2025
e3a3883
fix: ensure user's max tokens setting overrides model defaults across…
MuriloFP Jul 31, 2025
7623d21
fix: update OpenAI test to reflect correct max tokens behavior
MuriloFP Jul 31, 2025
8ade97f
fix: revert BaseOpenAiCompatibleProvider and hide max tokens slider f…
MuriloFP Jul 31, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/api/providers/__tests__/anthropic-vertex.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -705,7 +705,7 @@ describe("VertexHandler", () => {
expect(result.temperature).toBe(1.0)
})

it("does not honor custom maxTokens for non-thinking models", () => {
it("honors custom maxTokens for all models", () => {
const handler = new AnthropicVertexHandler({
apiKey: "test-api-key",
apiModelId: "claude-3-7-sonnet@20250219",
Expand All @@ -714,7 +714,7 @@ describe("VertexHandler", () => {
})

const result = handler.getModel()
expect(result.maxTokens).toBe(8192)
expect(result.maxTokens).toBe(8192) // Capped to model's actual maxTokens
expect(result.reasoningBudget).toBeUndefined()
expect(result.temperature).toBe(0)
})
Expand Down
4 changes: 2 additions & 2 deletions src/api/providers/__tests__/anthropic.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ describe("AnthropicHandler", () => {
expect(result.temperature).toBe(1.0)
})

it("does not honor custom maxTokens for non-thinking models", () => {
it("honors custom maxTokens for all models", () => {
const handler = new AnthropicHandler({
apiKey: "test-api-key",
apiModelId: "claude-3-7-sonnet-20250219",
Expand All @@ -260,7 +260,7 @@ describe("AnthropicHandler", () => {
})

const result = handler.getModel()
expect(result.maxTokens).toBe(8192)
expect(result.maxTokens).toBe(8192) // Capped to model's actual maxTokens
expect(result.reasoningBudget).toBeUndefined()
expect(result.temperature).toBe(0)
})
Expand Down
32 changes: 27 additions & 5 deletions src/api/providers/__tests__/openai.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -263,14 +263,14 @@ describe("OpenAiHandler", () => {
expect(callArgs.max_completion_tokens).toBeUndefined()
})

it("should use user-configured modelMaxTokens instead of model default maxTokens", async () => {
it("should use user-configured modelMaxTokens but cap it to model's max capability", async () => {
const optionsWithUserMaxTokens: ApiHandlerOptions = {
...mockOptions,
includeMaxTokens: true,
modelMaxTokens: 32000, // User-configured value
modelMaxTokens: 32000, // User tries to set higher than model supports
openAiCustomModelInfo: {
contextWindow: 128_000,
maxTokens: 4096, // Model's default value (should not be used)
maxTokens: 4096, // Model's actual max capability
supportsPromptCache: false,
},
}
Expand All @@ -279,10 +279,32 @@ describe("OpenAiHandler", () => {
// Consume the stream to trigger the API call
for await (const _chunk of stream) {
}
// Assert the mockCreate was called with user-configured modelMaxTokens (32000), not model default maxTokens (4096)
// Assert the mockCreate was called with the model's max capability (4096), not the user's request (32000)
expect(mockCreate).toHaveBeenCalled()
const callArgs = mockCreate.mock.calls[0][0]
expect(callArgs.max_completion_tokens).toBe(32000)
expect(callArgs.max_completion_tokens).toBe(4096)
})

it("should use user-configured modelMaxTokens when it's less than model's max", async () => {
const optionsWithLowerUserMaxTokens: ApiHandlerOptions = {
...mockOptions,
includeMaxTokens: true,
modelMaxTokens: 2000, // User sets lower than model's max
openAiCustomModelInfo: {
contextWindow: 128_000,
maxTokens: 4096, // Model's max capability
supportsPromptCache: false,
},
}
const handlerWithLowerMaxTokens = new OpenAiHandler(optionsWithLowerUserMaxTokens)
const stream = handlerWithLowerMaxTokens.createMessage(systemPrompt, messages)
// Consume the stream to trigger the API call
for await (const _chunk of stream) {
}
// Assert the mockCreate was called with user's setting (2000)
expect(mockCreate).toHaveBeenCalled()
const callArgs = mockCreate.mock.calls[0][0]
expect(callArgs.max_completion_tokens).toBe(2000)
})

it("should fallback to model default maxTokens when user modelMaxTokens is not set", async () => {
Expand Down
8 changes: 4 additions & 4 deletions src/api/providers/__tests__/openrouter.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ describe("OpenRouterHandler", () => {
expect(result.info.supportsPromptCache).toBe(true)
})

it("honors custom maxTokens for thinking models", async () => {
it("honors custom maxTokens for all models", async () => {
const handler = new OpenRouterHandler({
openRouterApiKey: "test-key",
openRouterModelId: "anthropic/claude-3.7-sonnet:thinking",
Expand All @@ -98,20 +98,20 @@ describe("OpenRouterHandler", () => {
})

const result = await handler.fetchModel()
expect(result.maxTokens).toBe(128000) // Use actual implementation value
expect(result.maxTokens).toBe(32_768) // Not capped since model supports 128000
expect(result.reasoningBudget).toBeUndefined() // Use actual implementation value
expect(result.temperature).toBe(0) // Use actual implementation value
})

it("does not honor custom maxTokens for non-thinking models", async () => {
it("honors custom maxTokens for non-thinking models", async () => {
const handler = new OpenRouterHandler({
...mockOptions,
modelMaxTokens: 32_768,
modelMaxThinkingTokens: 16_384,
})

const result = await handler.fetchModel()
expect(result.maxTokens).toBe(8192)
expect(result.maxTokens).toBe(8192) // Capped to model's actual maxTokens
expect(result.reasoningBudget).toBeUndefined()
expect(result.temperature).toBe(0)
})
Expand Down
15 changes: 9 additions & 6 deletions src/api/providers/chutes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { Anthropic } from "@anthropic-ai/sdk"
import OpenAI from "openai"

import type { ApiHandlerOptions } from "../../shared/api"
import { getModelMaxOutputTokens } from "../../shared/api"
import { XmlMatcher } from "../../utils/xml-matcher"
import { convertToR1Format } from "../transform/r1-format"
import { convertToOpenAiMessages } from "../transform/openai-format"
Expand All @@ -27,15 +28,17 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> {
systemPrompt: string,
messages: Anthropic.Messages.MessageParam[],
): OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming {
const {
id: model,
info: { maxTokens: max_tokens },
} = this.getModel()
const model = this.getModel()
const max_tokens = getModelMaxOutputTokens({
modelId: model.id,
model: model.info,
settings: this.options as any,
})

const temperature = this.options.modelTemperature ?? this.getModel().info.temperature
const temperature = this.options.modelTemperature ?? model.info.temperature

return {
model,
model: model.id,
max_tokens,
temperature,
messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
Expand Down
2 changes: 1 addition & 1 deletion src/api/providers/gemini.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
systemInstruction,
httpOptions: this.options.googleGeminiBaseUrl ? { baseUrl: this.options.googleGeminiBaseUrl } : undefined,
thinkingConfig,
maxOutputTokens: this.options.modelMaxTokens ?? maxTokens ?? undefined,
maxOutputTokens: maxTokens ?? undefined,
temperature: this.options.modelTemperature ?? 0,
}

Expand Down
22 changes: 14 additions & 8 deletions src/api/providers/glama.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import OpenAI from "openai"
import { glamaDefaultModelId, glamaDefaultModelInfo, GLAMA_DEFAULT_TEMPERATURE } from "@roo-code/types"

import { Package } from "../../shared/package"
import { ApiHandlerOptions } from "../../shared/api"
import { ApiHandlerOptions, getModelMaxOutputTokens } from "../../shared/api"

import { ApiStream } from "../transform/stream"
import { convertToOpenAiMessages } from "../transform/openai-format"
Expand Down Expand Up @@ -49,12 +49,14 @@ export class GlamaHandler extends RouterProvider implements SingleCompletionHand
addCacheBreakpoints(systemPrompt, openAiMessages)
}

// Required by Anthropic; other providers default to max tokens allowed.
let maxTokens: number | undefined

if (modelId.startsWith("anthropic/")) {
maxTokens = info.maxTokens ?? undefined
}
// Use getModelMaxOutputTokens to respect user's custom max tokens setting
const maxTokens = modelId.startsWith("anthropic/")
? getModelMaxOutputTokens({
modelId,
model: info,
settings: this.options as any,
})
: undefined

const requestOptions: OpenAI.Chat.ChatCompletionCreateParams = {
model: modelId,
Expand Down Expand Up @@ -130,7 +132,11 @@ export class GlamaHandler extends RouterProvider implements SingleCompletionHand
}

if (modelId.startsWith("anthropic/")) {
requestOptions.max_tokens = info.maxTokens
requestOptions.max_tokens = getModelMaxOutputTokens({
modelId,
model: info,
settings: this.options as any,
})
}

const response = await this.client.chat.completions.create(requestOptions)
Expand Down
16 changes: 12 additions & 4 deletions src/api/providers/lite-llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { litellmDefaultModelId, litellmDefaultModelInfo } from "@roo-code/types"

import { calculateApiCostOpenAI } from "../../shared/cost"

import { ApiHandlerOptions } from "../../shared/api"
import { ApiHandlerOptions, getModelMaxOutputTokens } from "../../shared/api"

import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
import { convertToOpenAiMessages } from "../transform/openai-format"
Expand Down Expand Up @@ -44,8 +44,12 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa
...convertToOpenAiMessages(messages),
]

// Required by some providers; others default to max tokens allowed
let maxTokens: number | undefined = info.maxTokens ?? undefined
// Use getModelMaxOutputTokens to respect user's custom max tokens setting
const maxTokens = getModelMaxOutputTokens({
modelId,
model: info,
settings: this.options as any,
})

const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
model: modelId,
Expand Down Expand Up @@ -119,7 +123,11 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa
requestOptions.temperature = this.options.modelTemperature ?? 0
}

requestOptions.max_tokens = info.maxTokens
requestOptions.max_tokens = getModelMaxOutputTokens({
modelId,
model: info,
settings: this.options as any,
})

const response = await this.client.chat.completions.create(requestOptions)
return response.choices[0]?.message.content || ""
Expand Down
10 changes: 8 additions & 2 deletions src/api/providers/mistral.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { Mistral } from "@mistralai/mistralai"

import { type MistralModelId, mistralDefaultModelId, mistralModels, MISTRAL_DEFAULT_TEMPERATURE } from "@roo-code/types"

import { ApiHandlerOptions } from "../../shared/api"
import { ApiHandlerOptions, getModelMaxOutputTokens } from "../../shared/api"

import { convertToMistralMessages } from "../transform/mistral-format"
import { ApiStream } from "../transform/stream"
Expand Down Expand Up @@ -78,7 +78,13 @@ export class MistralHandler extends BaseProvider implements SingleCompletionHand
const info = mistralModels[id as MistralModelId] ?? mistralModels[mistralDefaultModelId]

// @TODO: Move this to the `getModelParams` function.
const maxTokens = this.options.includeMaxTokens ? info.maxTokens : undefined
const maxTokens = this.options.includeMaxTokens
? getModelMaxOutputTokens({
modelId: id,
model: info,
settings: { ...this.options, apiProvider: "mistral" } as any,
})
: undefined
const temperature = this.options.modelTemperature ?? MISTRAL_DEFAULT_TEMPERATURE

return { id, info, maxTokens, temperature }
Expand Down
11 changes: 8 additions & 3 deletions src/api/providers/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import {
OPENAI_AZURE_AI_INFERENCE_PATH,
} from "@roo-code/types"

import type { ApiHandlerOptions } from "../../shared/api"
import { ApiHandlerOptions, getModelMaxOutputTokens } from "../../shared/api"

import { XmlMatcher } from "../../utils/xml-matcher"

Expand Down Expand Up @@ -403,9 +403,14 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
): void {
// Only add max_completion_tokens if includeMaxTokens is true
if (this.options.includeMaxTokens === true) {
// Use user-configured modelMaxTokens if available, otherwise fall back to model's default maxTokens
// Use getModelMaxOutputTokens to properly handle user settings and model limits
// Using max_completion_tokens as max_tokens is deprecated
requestOptions.max_completion_tokens = this.options.modelMaxTokens || modelInfo.maxTokens
const modelId = this.options.openAiModelId ?? ""
requestOptions.max_completion_tokens = getModelMaxOutputTokens({
modelId,
model: modelInfo,
settings: { ...this.options, apiProvider: "openai" } as any,
})
}
}
}
Expand Down
21 changes: 14 additions & 7 deletions src/api/providers/unbound.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import OpenAI from "openai"
import { unboundDefaultModelId, unboundDefaultModelInfo } from "@roo-code/types"

import type { ApiHandlerOptions } from "../../shared/api"
import { getModelMaxOutputTokens } from "../../shared/api"

import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
import { convertToOpenAiMessages } from "../transform/openai-format"
Expand Down Expand Up @@ -76,12 +77,14 @@ export class UnboundHandler extends RouterProvider implements SingleCompletionHa
addVertexCacheBreakpoints(messages)
}

// Required by Anthropic; other providers default to max tokens allowed.
let maxTokens: number | undefined

if (modelId.startsWith("anthropic/")) {
maxTokens = info.maxTokens ?? undefined
}
// Use getModelMaxOutputTokens to respect user's custom max tokens setting
const maxTokens = modelId.startsWith("anthropic/")
? getModelMaxOutputTokens({
modelId,
model: info,
settings: this.options as any,
})
: undefined

const requestOptions: UnboundChatCompletionCreateParamsStreaming = {
model: modelId.split("/")[1],
Expand Down Expand Up @@ -149,7 +152,11 @@ export class UnboundHandler extends RouterProvider implements SingleCompletionHa
}

if (modelId.startsWith("anthropic/")) {
requestOptions.max_tokens = info.maxTokens
requestOptions.max_tokens = getModelMaxOutputTokens({
modelId,
model: info,
settings: this.options as any,
})
}

const response = await this.client.chat.completions.create(requestOptions, { headers: DEFAULT_HEADERS })
Expand Down
19 changes: 13 additions & 6 deletions src/api/providers/xai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import OpenAI from "openai"

import { type XAIModelId, xaiDefaultModelId, xaiModels } from "@roo-code/types"

import type { ApiHandlerOptions } from "../../shared/api"
import { ApiHandlerOptions, getModelMaxOutputTokens } from "../../shared/api"

import { ApiStream } from "../transform/stream"
import { convertToOpenAiMessages } from "../transform/openai-format"
Expand Down Expand Up @@ -50,7 +50,11 @@ export class XAIHandler extends BaseProvider implements SingleCompletionHandler
// Use the OpenAI-compatible API.
const stream = await this.client.chat.completions.create({
model: modelId,
max_tokens: modelInfo.maxTokens,
max_tokens: getModelMaxOutputTokens({
modelId,
model: modelInfo,
settings: { ...this.options, apiProvider: "xai" } as any,
}),
temperature: this.options.modelTemperature ?? XAI_DEFAULT_TEMPERATURE,
messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
stream: true,
Expand Down Expand Up @@ -78,12 +82,15 @@ export class XAIHandler extends BaseProvider implements SingleCompletionHandler
if (chunk.usage) {
// Extract detailed token information if available
// First check for prompt_tokens_details structure (real API response)
const promptDetails = "prompt_tokens_details" in chunk.usage ? chunk.usage.prompt_tokens_details : null;
const cachedTokens = promptDetails && "cached_tokens" in promptDetails ? promptDetails.cached_tokens : 0;
const promptDetails = "prompt_tokens_details" in chunk.usage ? chunk.usage.prompt_tokens_details : null
const cachedTokens = promptDetails && "cached_tokens" in promptDetails ? promptDetails.cached_tokens : 0

// Fall back to direct fields in usage (used in test mocks)
const readTokens = cachedTokens || ("cache_read_input_tokens" in chunk.usage ? (chunk.usage as any).cache_read_input_tokens : 0);
const writeTokens = "cache_creation_input_tokens" in chunk.usage ? (chunk.usage as any).cache_creation_input_tokens : 0;
const readTokens =
cachedTokens ||
("cache_read_input_tokens" in chunk.usage ? (chunk.usage as any).cache_read_input_tokens : 0)
const writeTokens =
"cache_creation_input_tokens" in chunk.usage ? (chunk.usage as any).cache_creation_input_tokens : 0

yield {
type: "usage",
Expand Down
2 changes: 1 addition & 1 deletion src/api/transform/__tests__/model-params.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -665,7 +665,7 @@ describe("getModelParams", () => {
model,
})

expect(result.maxTokens).toBe(20000)
expect(result.maxTokens).toBe(16000) // Capped to model's actual maxTokens
expect(result.reasoningBudget).toBe(10000)
expect(result.temperature).toBe(1.0) // Overridden for reasoning budget models
expect(result.reasoningEffort).toBeUndefined() // Budget takes precedence
Expand Down
Loading