Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 80 additions & 0 deletions src/shared/__tests__/api.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,86 @@ describe("getModelMaxOutputTokens", () => {

expect(getModelMaxOutputTokens({ modelId: "test", model, settings })).toBe(16_384)
})

it("should return full maxTokens for OpenAI Compatible providers without clamping", () => {
const model: ModelInfo = {
supportsPromptCache: false,
maxTokens: 128_000, // 64% of context window
contextWindow: 200_000,
supportsImages: false,
}

// Test with custom OpenAI baseUrl (OpenAI Compatible)
const settings: ProviderSettings = {
apiProvider: "openai",
openAiBaseUrl: "https://custom-api.example.com/v1",
}

// Should return full 128_000 without clamping to 20%
expect(getModelMaxOutputTokens({ modelId: "glm-4.6", model, settings })).toBe(128_000)
})

it("should apply 20% clamping for regular OpenAI provider", () => {
const model: ModelInfo = {
supportsPromptCache: false,
maxTokens: 128_000, // 64% of context window
contextWindow: 200_000,
supportsImages: false,
}

// Test with default OpenAI baseUrl (regular OpenAI)
const settings: ProviderSettings = {
apiProvider: "openai",
openAiBaseUrl: "https://api.openai.com/v1",
}

// Should clamp to 20% of context window: 200_000 * 0.2 = 40_000
expect(getModelMaxOutputTokens({ modelId: "some-model", model, settings })).toBe(40_000)
})

it("should apply 20% clamping when openAiBaseUrl is not set", () => {
const model: ModelInfo = {
supportsPromptCache: false,
maxTokens: 128_000, // 64% of context window
contextWindow: 200_000,
supportsImages: false,
}

// Test without openAiBaseUrl (defaults to regular OpenAI)
const settings: ProviderSettings = {
apiProvider: "openai",
}

// Should clamp to 20% of context window: 200_000 * 0.2 = 40_000
expect(getModelMaxOutputTokens({ modelId: "some-model", model, settings })).toBe(40_000)
})

it("should handle OpenAI Compatible with various base URLs", () => {
const model: ModelInfo = {
supportsPromptCache: false,
maxTokens: 100_000,
contextWindow: 128_000,
supportsImages: false,
}

// Test with various custom URLs that indicate OpenAI Compatible
const customUrls = [
"http://localhost:11434/v1",
"https://api.groq.com/openai/v1",
"https://api.together.xyz/v1",
"https://api.deepinfra.com/v1/openai",
]

customUrls.forEach((url) => {
const settings: ProviderSettings = {
apiProvider: "openai",
openAiBaseUrl: url,
}

// Should return full maxTokens without clamping
expect(getModelMaxOutputTokens({ modelId: "test-model", model, settings })).toBe(100_000)
})
})
})

describe("shouldUseReasoningBudget", () => {
Expand Down
14 changes: 11 additions & 3 deletions src/shared/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -116,13 +116,21 @@ export const getModelMaxOutputTokens = ({
}

// If model has explicit maxTokens, clamp it to 20% of the context window
// Exception: GPT-5 models should use their exact configured max output tokens
// Exception 1: GPT-5 models should use their exact configured max output tokens
// Exception 2: OpenAI Compatible providers should use their exact configured max output tokens
if (model.maxTokens) {
// Check if this is a GPT-5 model (case-insensitive)
const isGpt5Model = modelId.toLowerCase().includes("gpt-5")

// GPT-5 models bypass the 20% cap and use their full configured max tokens
if (isGpt5Model) {
// Check if this is an OpenAI Compatible provider
// OpenAI Compatible uses apiProvider "openai" with a custom baseUrl
const isOpenAiCompatible =
settings?.apiProvider === "openai" &&
settings?.openAiBaseUrl &&
settings.openAiBaseUrl !== "https://api.openai.com/v1"
Comment on lines +127 to +130
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The exact string comparison for the OpenAI base URL doesn't handle URL normalization. If a user configures the official OpenAI API with a trailing slash (https://api.openai.com/v1/) or different casing (https://API.openai.com/v1), the code will incorrectly treat it as an OpenAI Compatible provider and bypass the 20% context limit. This could cause official OpenAI models to attempt using more tokens than allowed. Consider normalizing the URL (trim trailing slashes, lowercase the hostname) before comparison or using URL parsing to compare the normalized forms.


// GPT-5 models and OpenAI Compatible providers bypass the 20% cap and use their full configured max tokens
if (isGpt5Model || isOpenAiCompatible) {
return model.maxTokens
}

Expand Down