fix: bypass 20% context limit for OpenAI Compatible providers

roomote · roomote · commit b15c90d14603 · 2025-10-26T03:33:35.000Z
- Added exception for OpenAI Compatible providers to use full maxTokens - OpenAI Compatible providers (identified by custom baseUrl) now bypass the 20% context limit - This fixes the issue where operations fail when context exceeds 128k - Added comprehensive tests for the new behavior Fixes #8833
diff --git a/src/shared/__tests__/api.spec.ts b/src/shared/__tests__/api.spec.ts
@@ -310,6 +310,86 @@ describe("getModelMaxOutputTokens", () => {
 
 		expect(getModelMaxOutputTokens({ modelId: "test", model, settings })).toBe(16_384)
 	})
+
+	it("should return full maxTokens for OpenAI Compatible providers without clamping", () => {
+		const model: ModelInfo = {
+			supportsPromptCache: false,
+			maxTokens: 128_000, // 64% of context window
+			contextWindow: 200_000,
+			supportsImages: false,
+		}
+
+		// Test with custom OpenAI baseUrl (OpenAI Compatible)
+		const settings: ProviderSettings = {
+			apiProvider: "openai",
+			openAiBaseUrl: "https://custom-api.example.com/v1",
+		}
+
+		// Should return full 128_000 without clamping to 20%
+		expect(getModelMaxOutputTokens({ modelId: "glm-4.6", model, settings })).toBe(128_000)
+	})
+
+	it("should apply 20% clamping for regular OpenAI provider", () => {
+		const model: ModelInfo = {
+			supportsPromptCache: false,
+			maxTokens: 128_000, // 64% of context window
+			contextWindow: 200_000,
+			supportsImages: false,
+		}
+
+		// Test with default OpenAI baseUrl (regular OpenAI)
+		const settings: ProviderSettings = {
+			apiProvider: "openai",
+			openAiBaseUrl: "https://api.openai.com/v1",
+		}
+
+		// Should clamp to 20% of context window: 200_000 * 0.2 = 40_000
+		expect(getModelMaxOutputTokens({ modelId: "some-model", model, settings })).toBe(40_000)
+	})
+
+	it("should apply 20% clamping when openAiBaseUrl is not set", () => {
+		const model: ModelInfo = {
+			supportsPromptCache: false,
+			maxTokens: 128_000, // 64% of context window
+			contextWindow: 200_000,
+			supportsImages: false,
+		}
+
+		// Test without openAiBaseUrl (defaults to regular OpenAI)
+		const settings: ProviderSettings = {
+			apiProvider: "openai",
+		}
+
+		// Should clamp to 20% of context window: 200_000 * 0.2 = 40_000
+		expect(getModelMaxOutputTokens({ modelId: "some-model", model, settings })).toBe(40_000)
+	})
+
+	it("should handle OpenAI Compatible with various base URLs", () => {
+		const model: ModelInfo = {
+			supportsPromptCache: false,
+			maxTokens: 100_000,
+			contextWindow: 128_000,
+			supportsImages: false,
+		}
+
+		// Test with various custom URLs that indicate OpenAI Compatible
+		const customUrls = [
+			"http://localhost:11434/v1",
+			"https://api.groq.com/openai/v1",
+			"https://api.together.xyz/v1",
+			"https://api.deepinfra.com/v1/openai",
+		]
+
+		customUrls.forEach((url) => {
+			const settings: ProviderSettings = {
+				apiProvider: "openai",
+				openAiBaseUrl: url,
+			}
+
+			// Should return full maxTokens without clamping
+			expect(getModelMaxOutputTokens({ modelId: "test-model", model, settings })).toBe(100_000)
+		})
+	})
 })
 
 describe("shouldUseReasoningBudget", () => {
diff --git a/src/shared/api.ts b/src/shared/api.ts
@@ -116,13 +116,21 @@ export const getModelMaxOutputTokens = ({
 	}
 
 	// If model has explicit maxTokens, clamp it to 20% of the context window
-	// Exception: GPT-5 models should use their exact configured max output tokens
+	// Exception 1: GPT-5 models should use their exact configured max output tokens
+	// Exception 2: OpenAI Compatible providers should use their exact configured max output tokens
 	if (model.maxTokens) {
 		// Check if this is a GPT-5 model (case-insensitive)
 		const isGpt5Model = modelId.toLowerCase().includes("gpt-5")
 
-		// GPT-5 models bypass the 20% cap and use their full configured max tokens
-		if (isGpt5Model) {
+		// Check if this is an OpenAI Compatible provider
+		// OpenAI Compatible uses apiProvider "openai" with a custom baseUrl
+		const isOpenAiCompatible =
+			settings?.apiProvider === "openai" &&
+			settings?.openAiBaseUrl &&
+			settings.openAiBaseUrl !== "https://api.openai.com/v1"
+
+		// GPT-5 models and OpenAI Compatible providers bypass the 20% cap and use their full configured max tokens
+		if (isGpt5Model || isOpenAiCompatible) {
 			return model.maxTokens
 		}