Skip to content

Commit b15c90d

Browse files
committed
fix: bypass 20% context limit for OpenAI Compatible providers
- Added exception for OpenAI Compatible providers to use full maxTokens - OpenAI Compatible providers (identified by custom baseUrl) now bypass the 20% context limit - This fixes the issue where operations fail when context exceeds 128k - Added comprehensive tests for the new behavior Fixes #8833
1 parent 98b8d5b commit b15c90d

File tree

2 files changed

+91
-3
lines changed

2 files changed

+91
-3
lines changed

src/shared/__tests__/api.spec.ts

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,86 @@ describe("getModelMaxOutputTokens", () => {
310310

311311
expect(getModelMaxOutputTokens({ modelId: "test", model, settings })).toBe(16_384)
312312
})
313+
314+
it("should return full maxTokens for OpenAI Compatible providers without clamping", () => {
315+
const model: ModelInfo = {
316+
supportsPromptCache: false,
317+
maxTokens: 128_000, // 64% of context window
318+
contextWindow: 200_000,
319+
supportsImages: false,
320+
}
321+
322+
// Test with custom OpenAI baseUrl (OpenAI Compatible)
323+
const settings: ProviderSettings = {
324+
apiProvider: "openai",
325+
openAiBaseUrl: "https://custom-api.example.com/v1",
326+
}
327+
328+
// Should return full 128_000 without clamping to 20%
329+
expect(getModelMaxOutputTokens({ modelId: "glm-4.6", model, settings })).toBe(128_000)
330+
})
331+
332+
it("should apply 20% clamping for regular OpenAI provider", () => {
333+
const model: ModelInfo = {
334+
supportsPromptCache: false,
335+
maxTokens: 128_000, // 64% of context window
336+
contextWindow: 200_000,
337+
supportsImages: false,
338+
}
339+
340+
// Test with default OpenAI baseUrl (regular OpenAI)
341+
const settings: ProviderSettings = {
342+
apiProvider: "openai",
343+
openAiBaseUrl: "https://api.openai.com/v1",
344+
}
345+
346+
// Should clamp to 20% of context window: 200_000 * 0.2 = 40_000
347+
expect(getModelMaxOutputTokens({ modelId: "some-model", model, settings })).toBe(40_000)
348+
})
349+
350+
it("should apply 20% clamping when openAiBaseUrl is not set", () => {
351+
const model: ModelInfo = {
352+
supportsPromptCache: false,
353+
maxTokens: 128_000, // 64% of context window
354+
contextWindow: 200_000,
355+
supportsImages: false,
356+
}
357+
358+
// Test without openAiBaseUrl (defaults to regular OpenAI)
359+
const settings: ProviderSettings = {
360+
apiProvider: "openai",
361+
}
362+
363+
// Should clamp to 20% of context window: 200_000 * 0.2 = 40_000
364+
expect(getModelMaxOutputTokens({ modelId: "some-model", model, settings })).toBe(40_000)
365+
})
366+
367+
it("should handle OpenAI Compatible with various base URLs", () => {
368+
const model: ModelInfo = {
369+
supportsPromptCache: false,
370+
maxTokens: 100_000,
371+
contextWindow: 128_000,
372+
supportsImages: false,
373+
}
374+
375+
// Test with various custom URLs that indicate OpenAI Compatible
376+
const customUrls = [
377+
"http://localhost:11434/v1",
378+
"https://api.groq.com/openai/v1",
379+
"https://api.together.xyz/v1",
380+
"https://api.deepinfra.com/v1/openai",
381+
]
382+
383+
customUrls.forEach((url) => {
384+
const settings: ProviderSettings = {
385+
apiProvider: "openai",
386+
openAiBaseUrl: url,
387+
}
388+
389+
// Should return full maxTokens without clamping
390+
expect(getModelMaxOutputTokens({ modelId: "test-model", model, settings })).toBe(100_000)
391+
})
392+
})
313393
})
314394

315395
describe("shouldUseReasoningBudget", () => {

src/shared/api.ts

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,13 +116,21 @@ export const getModelMaxOutputTokens = ({
116116
}
117117

118118
// If model has explicit maxTokens, clamp it to 20% of the context window
119-
// Exception: GPT-5 models should use their exact configured max output tokens
119+
// Exception 1: GPT-5 models should use their exact configured max output tokens
120+
// Exception 2: OpenAI Compatible providers should use their exact configured max output tokens
120121
if (model.maxTokens) {
121122
// Check if this is a GPT-5 model (case-insensitive)
122123
const isGpt5Model = modelId.toLowerCase().includes("gpt-5")
123124

124-
// GPT-5 models bypass the 20% cap and use their full configured max tokens
125-
if (isGpt5Model) {
125+
// Check if this is an OpenAI Compatible provider
126+
// OpenAI Compatible uses apiProvider "openai" with a custom baseUrl
127+
const isOpenAiCompatible =
128+
settings?.apiProvider === "openai" &&
129+
settings?.openAiBaseUrl &&
130+
settings.openAiBaseUrl !== "https://api.openai.com/v1"
131+
132+
// GPT-5 models and OpenAI Compatible providers bypass the 20% cap and use their full configured max tokens
133+
if (isGpt5Model || isOpenAiCompatible) {
126134
return model.maxTokens
127135
}
128136

0 commit comments

Comments
 (0)