Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 8 additions & 21 deletions src/shared/__tests__/api.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -194,18 +194,17 @@ describe("getModelMaxOutputTokens", () => {
expect(result).toBe(20_000) // Should use model.maxTokens since it's exactly at 20%
})

test("should bypass 20% cap for GPT-5 models and use exact configured max tokens", () => {
test("should apply 20% cap for GPT-5 models like other models", () => {
const model: ModelInfo = {
contextWindow: 200_000,
supportsPromptCache: false,
maxTokens: 128_000, // 64% of context window, normally would be capped
maxTokens: 128_000, // 64% of context window, should be capped
}

const settings: ProviderSettings = {
apiProvider: "openai",
}

// Test various GPT-5 model IDs
const gpt5ModelIds = ["gpt-5", "gpt-5-turbo", "GPT-5", "openai/gpt-5-preview", "gpt-5-32k", "GPT-5-TURBO"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[P3] Missing OpenRouter coverage for GPT-5 clamping. The PR mentions alignment "including OpenRouter", but this test only exercises format: 'openai'. Add a parallel assertion using format: 'openrouter' and settings.apiProvider: 'openrouter' with a GPT-5 modelId (e.g., 'openai/gpt-5-preview') to ensure the 20% cap applies via the OpenRouter path as well.


gpt5ModelIds.forEach((modelId) => {
Expand All @@ -215,8 +214,8 @@ describe("getModelMaxOutputTokens", () => {
settings,
format: "openai",
})
// Should use full 128k tokens, not capped to 20% (40k)
expect(result).toBe(128_000)
// Should be capped to 20% of context window: 200_000 * 0.2 = 40_000
expect(result).toBe(40_000)
})
})

Expand Down Expand Up @@ -246,23 +245,11 @@ describe("getModelMaxOutputTokens", () => {
})
})

test("should handle GPT-5 models with various max token configurations", () => {
test("should cap GPT-5 models to min(model.maxTokens, 20% of contextWindow)", () => {
const testCases = [
{
maxTokens: 128_000,
contextWindow: 200_000,
expected: 128_000, // Uses full 128k
},
{
maxTokens: 64_000,
contextWindow: 200_000,
expected: 64_000, // Uses configured 64k
},
{
maxTokens: 256_000,
contextWindow: 400_000,
expected: 256_000, // Uses full 256k even though it's 64% of context
},
{ maxTokens: 128_000, contextWindow: 200_000, expected: 40_000 },
{ maxTokens: 64_000, contextWindow: 200_000, expected: 40_000 },
{ maxTokens: 256_000, contextWindow: 400_000, expected: 80_000 },
]

testCases.forEach(({ maxTokens, contextWindow, expected }) => {
Expand Down
10 changes: 0 additions & 10 deletions src/shared/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -116,17 +116,7 @@ export const getModelMaxOutputTokens = ({
}

// If model has explicit maxTokens, clamp it to 20% of the context window
// Exception: GPT-5 models should use their exact configured max output tokens
if (model.maxTokens) {
// Check if this is a GPT-5 model (case-insensitive)
const isGpt5Model = modelId.toLowerCase().includes("gpt-5")

// GPT-5 models bypass the 20% cap and use their full configured max tokens
if (isGpt5Model) {
return model.maxTokens
}

// All other models are clamped to 20% of context window
return Math.min(model.maxTokens, Math.ceil(model.contextWindow * 0.2))
}

Expand Down