Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions src/shared/__tests__/api.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,95 @@ describe("getModelMaxOutputTokens", () => {
expect(result).toBe(20_000) // Should use model.maxTokens since it's exactly at 20%
})

test("should bypass 20% cap for GPT-5 models and use exact configured max tokens", () => {
const model: ModelInfo = {
contextWindow: 200_000,
supportsPromptCache: false,
maxTokens: 128_000, // 64% of context window, normally would be capped
}

const settings: ProviderSettings = {
apiProvider: "openai",
}

// Test various GPT-5 model IDs
const gpt5ModelIds = ["gpt-5", "gpt-5-turbo", "GPT-5", "openai/gpt-5-preview", "gpt-5-32k", "GPT-5-TURBO"]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great test coverage! Though it might be worth adding edge case tests to ensure the pattern matching doesn't have false positives. For example, testing that "not-gpt-5", "gpt-500", or "legacy-gpt-5-incompatible" don't incorrectly bypass the cap.


gpt5ModelIds.forEach((modelId) => {
const result = getModelMaxOutputTokens({
modelId,
model,
settings,
format: "openai",
})
// Should use full 128k tokens, not capped to 20% (40k)
expect(result).toBe(128_000)
})
})

test("should still apply 20% cap to non-GPT-5 models", () => {
const model: ModelInfo = {
contextWindow: 200_000,
supportsPromptCache: false,
maxTokens: 128_000, // 64% of context window, should be capped
}

const settings: ProviderSettings = {
apiProvider: "openai",
}

// Test non-GPT-5 model IDs
const nonGpt5ModelIds = ["gpt-4", "gpt-4-turbo", "gpt-3.5-turbo", "claude-3-5-sonnet", "gemini-pro"]

nonGpt5ModelIds.forEach((modelId) => {
const result = getModelMaxOutputTokens({
modelId,
model,
settings,
format: "openai",
})
// Should be capped to 20% of context window: 200_000 * 0.2 = 40_000
expect(result).toBe(40_000)
})
})

test("should handle GPT-5 models with various max token configurations", () => {
const testCases = [
{
maxTokens: 128_000,
contextWindow: 200_000,
expected: 128_000, // Uses full 128k
},
{
maxTokens: 64_000,
contextWindow: 200_000,
expected: 64_000, // Uses configured 64k
},
{
maxTokens: 256_000,
contextWindow: 400_000,
expected: 256_000, // Uses full 256k even though it's 64% of context
},
]

testCases.forEach(({ maxTokens, contextWindow, expected }) => {
const model: ModelInfo = {
contextWindow,
supportsPromptCache: false,
maxTokens,
}

const result = getModelMaxOutputTokens({
modelId: "gpt-5-turbo",
model,
settings: { apiProvider: "openai" },
format: "openai",
})

expect(result).toBe(expected)
})
})

test("should return modelMaxTokens from settings when reasoning budget is required", () => {
const model: ModelInfo = {
contextWindow: 200_000,
Expand Down
10 changes: 10 additions & 0 deletions src/shared/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,17 @@ export const getModelMaxOutputTokens = ({
}

// If model has explicit maxTokens, clamp it to 20% of the context window
// Exception: GPT-5 models should use their exact configured max output tokens
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This GPT-5 exception is a significant behavior change that should be documented in the function's JSDoc. Future maintainers might wonder why GPT-5 models get special treatment. Could we add a note to the function documentation explaining this exception?

if (model.maxTokens) {
// Check if this is a GPT-5 model (case-insensitive)
const isGpt5Model = modelId.toLowerCase().includes("gpt-5")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The pattern matching here could be more precise. Currently, modelId.toLowerCase().includes("gpt-5") would match unintended model IDs like "not-gpt-5-compatible" or "legacy-gpt-500".

Could we consider a more specific pattern? Perhaps:

Suggested change
const isGpt5Model = modelId.toLowerCase().includes("gpt-5")
// Check if this is a GPT-5 model (case-insensitive, more precise matching)
const isGpt5Model = /^(openai\/)?gpt-5($|-|\.)/.test(modelId.toLowerCase())

This would match "gpt-5", "gpt-5-turbo", "openai/gpt-5-preview" but not "not-gpt-5" or "gpt-500".


// GPT-5 models bypass the 20% cap and use their full configured max tokens
if (isGpt5Model) {
return model.maxTokens
}

// All other models are clamped to 20% of context window
return Math.min(model.maxTokens, Math.ceil(model.contextWindow * 0.2))
}

Expand Down
Loading