-
Notifications
You must be signed in to change notification settings - Fork 2.5k
fix: add maxInputTokens parameter to enforce per-request token limits #7854
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1243,5 +1243,115 @@ describe("Sliding Window", () => { | |
| expect(result2).not.toEqual(messagesWithSmallContent) | ||
| expect(result2.messages.length).toBe(3) // Truncated with 0.5 fraction | ||
| }) | ||
|
|
||
| it("should respect maxInputTokens limit when provided", async () => { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The test coverage is comprehensive! Consider adding one more test case that explicitly verifies the interaction between |
||
| const modelInfo = createModelInfo(200000, 8192) | ||
| const messagesWithSmallContent = [ | ||
| ...messages.slice(0, -1), | ||
| { ...messages[messages.length - 1], content: "" }, | ||
| ] | ||
|
|
||
| // Test with maxInputTokens set to 125000 (Gemini free tier limit) | ||
| const result = await truncateConversationIfNeeded({ | ||
| messages: messagesWithSmallContent, | ||
| totalTokens: 150000, // Above the maxInputTokens limit | ||
| contextWindow: modelInfo.contextWindow, | ||
| maxTokens: modelInfo.maxTokens, | ||
| maxInputTokens: 125000, // Gemini free tier limit | ||
| apiHandler: mockApiHandler, | ||
| autoCondenseContext: false, | ||
| autoCondenseContextPercent: 100, | ||
| systemPrompt: "test", | ||
| taskId, | ||
| profileThresholds: {}, | ||
| currentProfileId: "default", | ||
| }) | ||
|
|
||
| // Should truncate because total tokens exceed maxInputTokens | ||
| expect(result.messages).toHaveLength(3) | ||
| expect(result.summary).toBe("") | ||
| expect(result.cost).toBe(0) | ||
| expect(result.prevContextTokens).toBe(150000) | ||
| }) | ||
|
|
||
| it("should use the more restrictive limit between maxInputTokens and context window", async () => { | ||
| const modelInfo = createModelInfo(200000, 8192) | ||
| const messagesWithSmallContent = [ | ||
| ...messages.slice(0, -1), | ||
| { ...messages[messages.length - 1], content: "" }, | ||
| ] | ||
|
|
||
| // Test where context window limit is more restrictive | ||
| // Context window limit: 200000 * 0.9 - 8192 = 171808 | ||
| const result1 = await truncateConversationIfNeeded({ | ||
| messages: messagesWithSmallContent, | ||
| totalTokens: 171809, // Just above context window limit | ||
| contextWindow: modelInfo.contextWindow, | ||
| maxTokens: modelInfo.maxTokens, | ||
| maxInputTokens: 300000, // Higher than context window | ||
| apiHandler: mockApiHandler, | ||
| autoCondenseContext: false, | ||
| autoCondenseContextPercent: 100, | ||
| systemPrompt: "test", | ||
| taskId, | ||
| profileThresholds: {}, | ||
| currentProfileId: "default", | ||
| }) | ||
|
|
||
| // Should truncate based on context window limit | ||
| expect(result1.messages).toHaveLength(3) | ||
|
|
||
| // Test where maxInputTokens is more restrictive | ||
| const result2 = await truncateConversationIfNeeded({ | ||
| messages: messagesWithSmallContent, | ||
| totalTokens: 100000, | ||
| contextWindow: modelInfo.contextWindow, | ||
| maxTokens: modelInfo.maxTokens, | ||
| maxInputTokens: 50000, // Lower than current tokens | ||
| apiHandler: mockApiHandler, | ||
| autoCondenseContext: false, | ||
| autoCondenseContextPercent: 100, | ||
| systemPrompt: "test", | ||
| taskId, | ||
| profileThresholds: {}, | ||
| currentProfileId: "default", | ||
| }) | ||
|
|
||
| // Should truncate based on maxInputTokens limit | ||
| expect(result2.messages).toHaveLength(3) | ||
| expect(result2.summary).toBe("") | ||
| expect(result2.cost).toBe(0) | ||
| expect(result2.prevContextTokens).toBe(100000) | ||
| }) | ||
|
|
||
| it("should not truncate when maxInputTokens is not exceeded", async () => { | ||
| const modelInfo = createModelInfo(200000, 8192) | ||
| const messagesWithSmallContent = [ | ||
| ...messages.slice(0, -1), | ||
| { ...messages[messages.length - 1], content: "" }, | ||
| ] | ||
|
|
||
| // Test with tokens below maxInputTokens limit | ||
| const result = await truncateConversationIfNeeded({ | ||
| messages: messagesWithSmallContent, | ||
| totalTokens: 50000, // Below the maxInputTokens limit | ||
| contextWindow: modelInfo.contextWindow, | ||
| maxTokens: modelInfo.maxTokens, | ||
| maxInputTokens: 125000, // Gemini free tier limit | ||
| apiHandler: mockApiHandler, | ||
| autoCondenseContext: false, | ||
| autoCondenseContextPercent: 100, | ||
| systemPrompt: "test", | ||
| taskId, | ||
| profileThresholds: {}, | ||
| currentProfileId: "default", | ||
| }) | ||
|
|
||
| // Should not truncate because total tokens are below maxInputTokens | ||
| expect(result.messages).toEqual(messagesWithSmallContent) | ||
| expect(result.summary).toBe("") | ||
| expect(result.cost).toBe(0) | ||
| expect(result.prevContextTokens).toBe(50000) | ||
| }) | ||
| }) | ||
| }) | ||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -68,6 +68,7 @@ type TruncateOptions = { | |||||||||||||||||||
| totalTokens: number | ||||||||||||||||||||
| contextWindow: number | ||||||||||||||||||||
| maxTokens?: number | null | ||||||||||||||||||||
| maxInputTokens?: number | null | ||||||||||||||||||||
| apiHandler: ApiHandler | ||||||||||||||||||||
| autoCondenseContext: boolean | ||||||||||||||||||||
| autoCondenseContextPercent: number | ||||||||||||||||||||
|
|
@@ -93,6 +94,7 @@ export async function truncateConversationIfNeeded({ | |||||||||||||||||||
| totalTokens, | ||||||||||||||||||||
| contextWindow, | ||||||||||||||||||||
| maxTokens, | ||||||||||||||||||||
| maxInputTokens, | ||||||||||||||||||||
| apiHandler, | ||||||||||||||||||||
| autoCondenseContext, | ||||||||||||||||||||
| autoCondenseContextPercent, | ||||||||||||||||||||
|
|
@@ -119,8 +121,16 @@ export async function truncateConversationIfNeeded({ | |||||||||||||||||||
| const prevContextTokens = totalTokens + lastMessageTokens | ||||||||||||||||||||
|
|
||||||||||||||||||||
| // Calculate available tokens for conversation history | ||||||||||||||||||||
| // Truncate if we're within TOKEN_BUFFER_PERCENTAGE of the context window | ||||||||||||||||||||
| const allowedTokens = contextWindow * (1 - TOKEN_BUFFER_PERCENTAGE) - reservedTokens | ||||||||||||||||||||
| // First check if there's a maxInputTokens limit (e.g., for Gemini free tier) | ||||||||||||||||||||
| let allowedTokens: number | ||||||||||||||||||||
| if (maxInputTokens && maxInputTokens > 0) { | ||||||||||||||||||||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider adding a warning log when
Suggested change
|
||||||||||||||||||||
| // Use the more restrictive limit between maxInputTokens and context window | ||||||||||||||||||||
| const contextWindowLimit = contextWindow * (1 - TOKEN_BUFFER_PERCENTAGE) - reservedTokens | ||||||||||||||||||||
| allowedTokens = Math.min(maxInputTokens, contextWindowLimit) | ||||||||||||||||||||
| } else { | ||||||||||||||||||||
| // Truncate if we're within TOKEN_BUFFER_PERCENTAGE of the context window | ||||||||||||||||||||
| allowedTokens = contextWindow * (1 - TOKEN_BUFFER_PERCENTAGE) - reservedTokens | ||||||||||||||||||||
| } | ||||||||||||||||||||
|
|
||||||||||||||||||||
| // Determine the effective threshold to use | ||||||||||||||||||||
| let effectiveThreshold = autoCondenseContextPercent | ||||||||||||||||||||
|
|
||||||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Consider adding more detailed JSDoc comments for the
maxInputTokensparameter. It would be helpful to explain its purpose, when to use it, and provide example values for different providers (e.g., "125000 for Gemini 2.5 Pro free tier"). This would make it easier for users to understand how to configure this parameter correctly.