fix: use actual max_completion_tokens from OpenRouter API (#5240)

shariqriazz · web-flow · commit 9faf28acacaa · 2025-07-06T04:15:05.000-04:00
- Update parseOpenRouterModel to always use actual max_completion_tokens from OpenRouter API
- Remove artificial restriction that only reasoning budget and Anthropic models get their actual max tokens
- Fall back to 20% of context window when max_completion_tokens is null
- Update getModelMaxOutputTokens to use same fallback logic for consistency
- Update tests to reflect new behavior
- Fixes issue where reserved tokens showed ~209k instead of actual model limits (e.g. GPT-4o: 16,384)
diff --git a/src/api/providers/fetchers/openrouter.ts b/src/api/providers/fetchers/openrouter.ts
@@ -190,10 +190,8 @@ export const parseOpenRouterModel = ({
 
 	const supportsPromptCache = typeof cacheWritesPrice !== "undefined" && typeof cacheReadsPrice !== "undefined"
 
-	const useMaxTokens = OPEN_ROUTER_REASONING_BUDGET_MODELS.has(id) || id.startsWith("anthropic/")
-
 	const modelInfo: ModelInfo = {
-		maxTokens: useMaxTokens ? maxTokens || 0 : 0,
+		maxTokens: maxTokens || Math.ceil(model.context_length * 0.2),
 		contextWindow: model.context_length,
 		supportsImages: modality?.includes("image") ?? false,
 		supportsPromptCache,
diff --git a/src/shared/__tests__/api.spec.ts b/src/shared/__tests__/api.spec.ts
@@ -66,7 +66,7 @@ describe("getMaxTokensForModel", () => {
 		expect(getModelMaxOutputTokens({ modelId, model, settings })).toBe(8000)
 	})
 
-	it("should return undefined for non-thinking models with undefined maxTokens", () => {
+	it("should return 20% of context window for non-thinking models with undefined maxTokens", () => {
 		const model: ModelInfo = {
 			contextWindow: 200_000,
 			supportsPromptCache: true,
@@ -76,7 +76,8 @@ describe("getMaxTokensForModel", () => {
 			modelMaxTokens: 4000,
 		}
 
-		expect(getModelMaxOutputTokens({ modelId, model, settings })).toBeUndefined()
+		// Should return 20% of context window when maxTokens is undefined
+		expect(getModelMaxOutputTokens({ modelId, model, settings })).toBe(40000)
 	})
 
 	test("should return maxTokens from modelInfo when thinking is false", () => {
diff --git a/src/shared/api.ts b/src/shared/api.ts
@@ -71,7 +71,9 @@ export const getModelMaxOutputTokens = ({
 		return ANTHROPIC_DEFAULT_MAX_TOKENS
 	}
 
-	return model.maxTokens ?? undefined
+	// If maxTokens is 0 or undefined, fall back to 20% of context window
+	// This matches the sliding window logic
+	return model.maxTokens || Math.ceil(model.contextWindow * 0.2)
 }
 
 // GetModelsOptions

Original file line number	Diff line number	Diff line change
`@@ -71,7 +71,9 @@ export const getModelMaxOutputTokens = ({`
`71`	`71`	`return ANTHROPIC_DEFAULT_MAX_TOKENS`
`72`	`72`	`}`
`73`	`73`
`74`		`- return model.maxTokens ?? undefined`
	`74`	`+ // If maxTokens is 0 or undefined, fall back to 20% of context window`
	`75`	`+ // This matches the sliding window logic`
	`76`	`+ return model.maxTokens \|\| Math.ceil(model.contextWindow * 0.2)`
`75`	`77`	`}`
`76`	`78`
`77`	`79`	`// GetModelsOptions`