Skip to content

Commit 9faf28a

Browse files
authored
fix: use actual max_completion_tokens from OpenRouter API (#5240)
- Update parseOpenRouterModel to always use actual max_completion_tokens from OpenRouter API - Remove artificial restriction that only reasoning budget and Anthropic models get their actual max tokens - Fall back to 20% of context window when max_completion_tokens is null - Update getModelMaxOutputTokens to use same fallback logic for consistency - Update tests to reflect new behavior - Fixes issue where reserved tokens showed ~209k instead of actual model limits (e.g. GPT-4o: 16,384)
1 parent 5b1ca51 commit 9faf28a

File tree

3 files changed

+7
-6
lines changed

3 files changed

+7
-6
lines changed

src/api/providers/fetchers/openrouter.ts

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -190,10 +190,8 @@ export const parseOpenRouterModel = ({
190190

191191
const supportsPromptCache = typeof cacheWritesPrice !== "undefined" && typeof cacheReadsPrice !== "undefined"
192192

193-
const useMaxTokens = OPEN_ROUTER_REASONING_BUDGET_MODELS.has(id) || id.startsWith("anthropic/")
194-
195193
const modelInfo: ModelInfo = {
196-
maxTokens: useMaxTokens ? maxTokens || 0 : 0,
194+
maxTokens: maxTokens || Math.ceil(model.context_length * 0.2),
197195
contextWindow: model.context_length,
198196
supportsImages: modality?.includes("image") ?? false,
199197
supportsPromptCache,

src/shared/__tests__/api.spec.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ describe("getMaxTokensForModel", () => {
6666
expect(getModelMaxOutputTokens({ modelId, model, settings })).toBe(8000)
6767
})
6868

69-
it("should return undefined for non-thinking models with undefined maxTokens", () => {
69+
it("should return 20% of context window for non-thinking models with undefined maxTokens", () => {
7070
const model: ModelInfo = {
7171
contextWindow: 200_000,
7272
supportsPromptCache: true,
@@ -76,7 +76,8 @@ describe("getMaxTokensForModel", () => {
7676
modelMaxTokens: 4000,
7777
}
7878

79-
expect(getModelMaxOutputTokens({ modelId, model, settings })).toBeUndefined()
79+
// Should return 20% of context window when maxTokens is undefined
80+
expect(getModelMaxOutputTokens({ modelId, model, settings })).toBe(40000)
8081
})
8182

8283
test("should return maxTokens from modelInfo when thinking is false", () => {

src/shared/api.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,9 @@ export const getModelMaxOutputTokens = ({
7171
return ANTHROPIC_DEFAULT_MAX_TOKENS
7272
}
7373

74-
return model.maxTokens ?? undefined
74+
// If maxTokens is 0 or undefined, fall back to 20% of context window
75+
// This matches the sliding window logic
76+
return model.maxTokens || Math.ceil(model.contextWindow * 0.2)
7577
}
7678

7779
// GetModelsOptions

0 commit comments

Comments
 (0)