Skip to content

Commit e3a3883

Browse files
committed
fix: ensure user's max tokens setting overrides model defaults across all providers
- Updated BaseOpenAiCompatibleProvider to use getModelMaxOutputTokens() - Fixed ChutesHandler to respect user's custom max tokens - Fixed LiteLLM createMessage and completePrompt methods - Fixed Glama createMessage and completePrompt methods - Fixed Unbound createMessage and completePrompt methods - Fixed Mistral getModel method to use getModelMaxOutputTokens() - Fixed XAI to use getModelMaxOutputTokens() - Fixed OpenAI addMaxTokensIfNeeded to use getModelMaxOutputTokens() - Fixed Gemini to use maxTokens from getModel() which already applies user settings This ensures that when users set a custom max output tokens value in their provider settings, it will be respected across all providers (capped to the model's actual maximum).
1 parent 6a4653a commit e3a3883

File tree

7 files changed

+70
-31
lines changed

7 files changed

+70
-31
lines changed

src/api/providers/gemini.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
7171
systemInstruction,
7272
httpOptions: this.options.googleGeminiBaseUrl ? { baseUrl: this.options.googleGeminiBaseUrl } : undefined,
7373
thinkingConfig,
74-
maxOutputTokens: this.options.modelMaxTokens ?? maxTokens ?? undefined,
74+
maxOutputTokens: maxTokens ?? undefined,
7575
temperature: this.options.modelTemperature ?? 0,
7676
}
7777

src/api/providers/glama.ts

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import OpenAI from "openai"
55
import { glamaDefaultModelId, glamaDefaultModelInfo, GLAMA_DEFAULT_TEMPERATURE } from "@roo-code/types"
66

77
import { Package } from "../../shared/package"
8-
import { ApiHandlerOptions } from "../../shared/api"
8+
import { ApiHandlerOptions, getModelMaxOutputTokens } from "../../shared/api"
99

1010
import { ApiStream } from "../transform/stream"
1111
import { convertToOpenAiMessages } from "../transform/openai-format"
@@ -49,12 +49,14 @@ export class GlamaHandler extends RouterProvider implements SingleCompletionHand
4949
addCacheBreakpoints(systemPrompt, openAiMessages)
5050
}
5151

52-
// Required by Anthropic; other providers default to max tokens allowed.
53-
let maxTokens: number | undefined
54-
55-
if (modelId.startsWith("anthropic/")) {
56-
maxTokens = info.maxTokens ?? undefined
57-
}
52+
// Use getModelMaxOutputTokens to respect user's custom max tokens setting
53+
const maxTokens = modelId.startsWith("anthropic/")
54+
? getModelMaxOutputTokens({
55+
modelId,
56+
model: info,
57+
settings: this.options as any,
58+
})
59+
: undefined
5860

5961
const requestOptions: OpenAI.Chat.ChatCompletionCreateParams = {
6062
model: modelId,
@@ -130,7 +132,11 @@ export class GlamaHandler extends RouterProvider implements SingleCompletionHand
130132
}
131133

132134
if (modelId.startsWith("anthropic/")) {
133-
requestOptions.max_tokens = info.maxTokens
135+
requestOptions.max_tokens = getModelMaxOutputTokens({
136+
modelId,
137+
model: info,
138+
settings: this.options as any,
139+
})
134140
}
135141

136142
const response = await this.client.chat.completions.create(requestOptions)

src/api/providers/lite-llm.ts

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import { litellmDefaultModelId, litellmDefaultModelInfo } from "@roo-code/types"
55

66
import { calculateApiCostOpenAI } from "../../shared/cost"
77

8-
import { ApiHandlerOptions } from "../../shared/api"
8+
import { ApiHandlerOptions, getModelMaxOutputTokens } from "../../shared/api"
99

1010
import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
1111
import { convertToOpenAiMessages } from "../transform/openai-format"
@@ -44,8 +44,12 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa
4444
...convertToOpenAiMessages(messages),
4545
]
4646

47-
// Required by some providers; others default to max tokens allowed
48-
let maxTokens: number | undefined = info.maxTokens ?? undefined
47+
// Use getModelMaxOutputTokens to respect user's custom max tokens setting
48+
const maxTokens = getModelMaxOutputTokens({
49+
modelId,
50+
model: info,
51+
settings: this.options as any,
52+
})
4953

5054
const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
5155
model: modelId,
@@ -119,7 +123,11 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa
119123
requestOptions.temperature = this.options.modelTemperature ?? 0
120124
}
121125

122-
requestOptions.max_tokens = info.maxTokens
126+
requestOptions.max_tokens = getModelMaxOutputTokens({
127+
modelId,
128+
model: info,
129+
settings: this.options as any,
130+
})
123131

124132
const response = await this.client.chat.completions.create(requestOptions)
125133
return response.choices[0]?.message.content || ""

src/api/providers/mistral.ts

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import { Mistral } from "@mistralai/mistralai"
33

44
import { type MistralModelId, mistralDefaultModelId, mistralModels, MISTRAL_DEFAULT_TEMPERATURE } from "@roo-code/types"
55

6-
import { ApiHandlerOptions } from "../../shared/api"
6+
import { ApiHandlerOptions, getModelMaxOutputTokens } from "../../shared/api"
77

88
import { convertToMistralMessages } from "../transform/mistral-format"
99
import { ApiStream } from "../transform/stream"
@@ -78,7 +78,13 @@ export class MistralHandler extends BaseProvider implements SingleCompletionHand
7878
const info = mistralModels[id as MistralModelId] ?? mistralModels[mistralDefaultModelId]
7979

8080
// @TODO: Move this to the `getModelParams` function.
81-
const maxTokens = this.options.includeMaxTokens ? info.maxTokens : undefined
81+
const maxTokens = this.options.includeMaxTokens
82+
? getModelMaxOutputTokens({
83+
modelId: id,
84+
model: info,
85+
settings: { ...this.options, apiProvider: "mistral" } as any,
86+
})
87+
: undefined
8288
const temperature = this.options.modelTemperature ?? MISTRAL_DEFAULT_TEMPERATURE
8389

8490
return { id, info, maxTokens, temperature }

src/api/providers/openai.ts

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import {
1010
OPENAI_AZURE_AI_INFERENCE_PATH,
1111
} from "@roo-code/types"
1212

13-
import type { ApiHandlerOptions } from "../../shared/api"
13+
import { ApiHandlerOptions, getModelMaxOutputTokens } from "../../shared/api"
1414

1515
import { XmlMatcher } from "../../utils/xml-matcher"
1616

@@ -403,9 +403,14 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
403403
): void {
404404
// Only add max_completion_tokens if includeMaxTokens is true
405405
if (this.options.includeMaxTokens === true) {
406-
// Use user-configured modelMaxTokens if available, otherwise fall back to model's default maxTokens
406+
// Use getModelMaxOutputTokens to properly handle user settings and model limits
407407
// Using max_completion_tokens as max_tokens is deprecated
408-
requestOptions.max_completion_tokens = this.options.modelMaxTokens || modelInfo.maxTokens
408+
const modelId = this.options.openAiModelId ?? ""
409+
requestOptions.max_completion_tokens = getModelMaxOutputTokens({
410+
modelId,
411+
model: modelInfo,
412+
settings: { ...this.options, apiProvider: "openai" } as any,
413+
})
409414
}
410415
}
411416
}

src/api/providers/unbound.ts

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import OpenAI from "openai"
44
import { unboundDefaultModelId, unboundDefaultModelInfo } from "@roo-code/types"
55

66
import type { ApiHandlerOptions } from "../../shared/api"
7+
import { getModelMaxOutputTokens } from "../../shared/api"
78

89
import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
910
import { convertToOpenAiMessages } from "../transform/openai-format"
@@ -76,12 +77,14 @@ export class UnboundHandler extends RouterProvider implements SingleCompletionHa
7677
addVertexCacheBreakpoints(messages)
7778
}
7879

79-
// Required by Anthropic; other providers default to max tokens allowed.
80-
let maxTokens: number | undefined
81-
82-
if (modelId.startsWith("anthropic/")) {
83-
maxTokens = info.maxTokens ?? undefined
84-
}
80+
// Use getModelMaxOutputTokens to respect user's custom max tokens setting
81+
const maxTokens = modelId.startsWith("anthropic/")
82+
? getModelMaxOutputTokens({
83+
modelId,
84+
model: info,
85+
settings: this.options as any,
86+
})
87+
: undefined
8588

8689
const requestOptions: UnboundChatCompletionCreateParamsStreaming = {
8790
model: modelId.split("/")[1],
@@ -149,7 +152,11 @@ export class UnboundHandler extends RouterProvider implements SingleCompletionHa
149152
}
150153

151154
if (modelId.startsWith("anthropic/")) {
152-
requestOptions.max_tokens = info.maxTokens
155+
requestOptions.max_tokens = getModelMaxOutputTokens({
156+
modelId,
157+
model: info,
158+
settings: this.options as any,
159+
})
153160
}
154161

155162
const response = await this.client.chat.completions.create(requestOptions, { headers: DEFAULT_HEADERS })

src/api/providers/xai.ts

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import OpenAI from "openai"
33

44
import { type XAIModelId, xaiDefaultModelId, xaiModels } from "@roo-code/types"
55

6-
import type { ApiHandlerOptions } from "../../shared/api"
6+
import { ApiHandlerOptions, getModelMaxOutputTokens } from "../../shared/api"
77

88
import { ApiStream } from "../transform/stream"
99
import { convertToOpenAiMessages } from "../transform/openai-format"
@@ -50,7 +50,11 @@ export class XAIHandler extends BaseProvider implements SingleCompletionHandler
5050
// Use the OpenAI-compatible API.
5151
const stream = await this.client.chat.completions.create({
5252
model: modelId,
53-
max_tokens: modelInfo.maxTokens,
53+
max_tokens: getModelMaxOutputTokens({
54+
modelId,
55+
model: modelInfo,
56+
settings: { ...this.options, apiProvider: "xai" } as any,
57+
}),
5458
temperature: this.options.modelTemperature ?? XAI_DEFAULT_TEMPERATURE,
5559
messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
5660
stream: true,
@@ -78,12 +82,15 @@ export class XAIHandler extends BaseProvider implements SingleCompletionHandler
7882
if (chunk.usage) {
7983
// Extract detailed token information if available
8084
// First check for prompt_tokens_details structure (real API response)
81-
const promptDetails = "prompt_tokens_details" in chunk.usage ? chunk.usage.prompt_tokens_details : null;
82-
const cachedTokens = promptDetails && "cached_tokens" in promptDetails ? promptDetails.cached_tokens : 0;
85+
const promptDetails = "prompt_tokens_details" in chunk.usage ? chunk.usage.prompt_tokens_details : null
86+
const cachedTokens = promptDetails && "cached_tokens" in promptDetails ? promptDetails.cached_tokens : 0
8387

8488
// Fall back to direct fields in usage (used in test mocks)
85-
const readTokens = cachedTokens || ("cache_read_input_tokens" in chunk.usage ? (chunk.usage as any).cache_read_input_tokens : 0);
86-
const writeTokens = "cache_creation_input_tokens" in chunk.usage ? (chunk.usage as any).cache_creation_input_tokens : 0;
89+
const readTokens =
90+
cachedTokens ||
91+
("cache_read_input_tokens" in chunk.usage ? (chunk.usage as any).cache_read_input_tokens : 0)
92+
const writeTokens =
93+
"cache_creation_input_tokens" in chunk.usage ? (chunk.usage as any).cache_creation_input_tokens : 0
8794

8895
yield {
8996
type: "usage",

0 commit comments

Comments
 (0)