Update the max_tokens fallback logic in the sliding window (#5993)

mrubens · web-flow · commit 8334f0869ff8 · 2025-07-21T22:26:39.000-04:00
diff --git a/src/core/sliding-window/__tests__/sliding-window.spec.ts b/src/core/sliding-window/__tests__/sliding-window.spec.ts
@@ -1103,9 +1103,9 @@ describe("Sliding Window", () => {
 			expect(result2.prevContextTokens).toBe(50001)
 		})
 
-		it("should use 20% of context window as buffer when maxTokens is undefined", async () => {
+		it("should use ANTHROPIC_DEFAULT_MAX_TOKENS as buffer when maxTokens is undefined", async () => {
 			const modelInfo = createModelInfo(100000, undefined)
-			// Max tokens = 100000 - (100000 * 0.2) = 80000
+			// Max tokens = 100000 - ANTHROPIC_DEFAULT_MAX_TOKENS = 100000 - 8192 = 91808
 
 			// Create messages with very small content in the last one to avoid token overflow
 			const messagesWithSmallContent = [
@@ -1117,7 +1117,7 @@ describe("Sliding Window", () => {
 			// Below max tokens and buffer - no truncation
 			const result1 = await truncateConversationIfNeeded({
 				messages: messagesWithSmallContent,
-				totalTokens: 69999, // Well below threshold + dynamic buffer
+				totalTokens: 81807, // Well below threshold + dynamic buffer (91808 - 10000 = 81808)
 				contextWindow: modelInfo.contextWindow,
 				maxTokens: modelInfo.maxTokens,
 				apiHandler: mockApiHandler,
@@ -1132,13 +1132,13 @@ describe("Sliding Window", () => {
 				messages: messagesWithSmallContent,
 				summary: "",
 				cost: 0,
-				prevContextTokens: 69999,
+				prevContextTokens: 81807,
 			})
 
 			// Above max tokens - truncate
 			const result2 = await truncateConversationIfNeeded({
 				messages: messagesWithSmallContent,
-				totalTokens: 80001, // Above threshold
+				totalTokens: 81809, // Above threshold (81808)
 				contextWindow: modelInfo.contextWindow,
 				maxTokens: modelInfo.maxTokens,
 				apiHandler: mockApiHandler,
@@ -1153,7 +1153,7 @@ describe("Sliding Window", () => {
 			expect(result2.messages.length).toBe(3) // Truncated with 0.5 fraction
 			expect(result2.summary).toBe("")
 			expect(result2.cost).toBe(0)
-			expect(result2.prevContextTokens).toBe(80001)
+			expect(result2.prevContextTokens).toBe(81809)
 		})
 
 		it("should handle small context windows appropriately", async () => {
diff --git a/src/core/sliding-window/index.ts b/src/core/sliding-window/index.ts
@@ -5,6 +5,7 @@ import { TelemetryService } from "@roo-code/telemetry"
 import { ApiHandler } from "../../api"
 import { MAX_CONDENSE_THRESHOLD, MIN_CONDENSE_THRESHOLD, summarizeConversation, SummarizeResponse } from "../condense"
 import { ApiMessage } from "../task-persistence/apiMessages"
+import { ANTHROPIC_DEFAULT_MAX_TOKENS } from "@roo-code/types"
 
 /**
  * Default percentage of the context window to use as a buffer when deciding when to truncate
@@ -105,7 +106,7 @@ export async function truncateConversationIfNeeded({
 	let error: string | undefined
 	let cost = 0
 	// Calculate the maximum tokens reserved for response
-	const reservedTokens = maxTokens || contextWindow * 0.2
+	const reservedTokens = maxTokens || ANTHROPIC_DEFAULT_MAX_TOKENS
 
 	// Estimate tokens for the last message (which is always a user message)
 	const lastMessage = messages[messages.length - 1]