fix: prevent negative allowedTokens when maxTokens equals contextWindow

daniel-lxs · daniel-lxs · commit 1950019b17d0 · 2025-07-19T16:09:38.000-05:00
- Modified reservedTokens calculation to check if maxTokens equals contextWindow
- When they are equal, fall back to 20% of context window instead of using full window
- This prevents allowedTokens from becoming negative and triggering premature condensing
- Added test case to verify the fix works correctly
diff --git a/src/core/sliding-window/__tests__/sliding-window.spec.ts b/src/core/sliding-window/__tests__/sliding-window.spec.ts
@@ -1244,5 +1244,51 @@ describe("Sliding Window", () => {
 			expect(result2).not.toEqual(messagesWithSmallContent)
 			expect(result2.messages.length).toBe(3) // Truncated with 0.5 fraction
 		})
+
+		it("should handle models where maxTokens equals contextWindow without negative allowedTokens", async () => {
+			const contextWindow = 100000
+			const modelInfo = createModelInfo(contextWindow, contextWindow) // maxTokens = contextWindow
+
+			// Create messages with very small content in the last one to avoid token overflow
+			const messagesWithSmallContent = [
+				...messages.slice(0, -1),
+				{ ...messages[messages.length - 1], content: "" },
+			]
+
+			// With the fix, reservedTokens should be 20% of context window (20000)
+			// allowedTokens = 100000 * 0.9 - 20000 = 70000
+			// So tokens below 70000 should not trigger truncation
+			const result1 = await truncateConversationIfNeeded({
+				messages: messagesWithSmallContent,
+				totalTokens: 69999, // Just below the fixed threshold
+				contextWindow: modelInfo.contextWindow,
+				maxTokens: modelInfo.maxTokens,
+				apiHandler: mockApiHandler,
+				autoCondenseContext: false,
+				autoCondenseContextPercent: 100,
+				systemPrompt: "System prompt",
+				taskId,
+				profileThresholds: {},
+				currentProfileId: "default",
+			})
+			expect(result1.messages).toEqual(messagesWithSmallContent) // No truncation
+
+			// Above the threshold should trigger truncation
+			const result2 = await truncateConversationIfNeeded({
+				messages: messagesWithSmallContent,
+				totalTokens: 70001, // Above the fixed threshold
+				contextWindow: modelInfo.contextWindow,
+				maxTokens: modelInfo.maxTokens,
+				apiHandler: mockApiHandler,
+				autoCondenseContext: false,
+				autoCondenseContextPercent: 100,
+				systemPrompt: "System prompt",
+				taskId,
+				profileThresholds: {},
+				currentProfileId: "default",
+			})
+			expect(result2.messages).not.toEqual(messagesWithSmallContent) // Should truncate
+			expect(result2.messages.length).toBe(3) // Truncated with 0.5 fraction
+		})
 	})
 })
diff --git a/src/core/sliding-window/index.ts b/src/core/sliding-window/index.ts
@@ -105,7 +105,8 @@ export async function truncateConversationIfNeeded({
 	let error: string | undefined
 	let cost = 0
 	// Calculate the maximum tokens reserved for response
-	const reservedTokens = maxTokens || contextWindow * 0.2
+	// If maxTokens equals contextWindow, fall back to 20% to avoid negative allowedTokens
+	const reservedTokens = maxTokens && maxTokens !== contextWindow ? maxTokens : contextWindow * 0.2
 
 	// Estimate tokens for the last message (which is always a user message)
 	const lastMessage = messages[messages.length - 1]