fix: respect condensation percentage threshold when saving settings

roomote · roomote · commit 6df165da1c01 · 2025-08-08T18:47:47.000Z
- Fixed issue where context condensation was triggered on every action after saving settings
- Condensation now only triggers when the percentage threshold is actually met
- If tokens exceed hard limit but percentage is below threshold, sliding window truncation is used instead
- Added detailed logging to help debug condensation triggers
- Updated tests to reflect the new behavior

This prevents unnecessary condensation operations that were happening even when the configured percentage threshold had not been reached.
diff --git a/src/core/sliding-window/__tests__/sliding-window.spec.ts b/src/core/sliding-window/__tests__/sliding-window.spec.ts
@@ -544,72 +544,56 @@ describe("Sliding Window", () => {
 			})
 		})
 
-		it("should use summarizeConversation when autoCondenseContext is true and tokens exceed threshold", async () => {
-			// Mock the summarizeConversation function
-			const mockSummary = "This is a summary of the conversation"
-			const mockCost = 0.05
-			const mockSummarizeResponse: condenseModule.SummarizeResponse = {
-				messages: [
-					{ role: "user", content: "First message" },
-					{ role: "assistant", content: mockSummary, isSummary: true },
-					{ role: "user", content: "Last message" },
-				],
-				summary: mockSummary,
-				cost: mockCost,
-				newContextTokens: 100,
-			}
-
-			const summarizeSpy = vi
-				.spyOn(condenseModule, "summarizeConversation")
-				.mockResolvedValue(mockSummarizeResponse)
+		it("should NOT use summarizeConversation when tokens exceed hard limit but percentage is below threshold", async () => {
+			// Reset any previous mock calls
+			vi.clearAllMocks()
+			const summarizeSpy = vi.spyOn(condenseModule, "summarizeConversation")
 
 			const modelInfo = createModelInfo(100000, 30000)
-			const totalTokens = 70001 // Above threshold
+			const totalTokens = 70001 // Above hard limit but only 70% of context window
 			const messagesWithSmallContent = [
 				...messages.slice(0, -1),
 				{ ...messages[messages.length - 1], content: "" },
 			]
 
+			// When truncating, always uses 0.5 fraction
+			// With 4 messages after the first, 0.5 fraction means remove 2 messages
+			const expectedMessages = [
+				messagesWithSmallContent[0],
+				messagesWithSmallContent[3],
+				messagesWithSmallContent[4],
+			]
+
 			const result = await truncateConversationIfNeeded({
 				messages: messagesWithSmallContent,
 				totalTokens,
 				contextWindow: modelInfo.contextWindow,
 				maxTokens: modelInfo.maxTokens,
 				apiHandler: mockApiHandler,
 				autoCondenseContext: true,
-				autoCondenseContextPercent: 100,
+				autoCondenseContextPercent: 100, // 100% threshold, but tokens are only at 70%
 				systemPrompt: "System prompt",
 				taskId,
 				profileThresholds: {},
 				currentProfileId: "default",
 			})
 
-			// Verify summarizeConversation was called with the right parameters
-			expect(summarizeSpy).toHaveBeenCalledWith(
-				messagesWithSmallContent,
-				mockApiHandler,
-				"System prompt",
-				taskId,
-				70001,
-				true,
-				undefined, // customCondensingPrompt
-				undefined, // condensingApiHandler
-			)
+			// Verify summarizeConversation was NOT called (percentage threshold not met)
+			expect(summarizeSpy).not.toHaveBeenCalled()
 
-			// Verify the result contains the summary information
-			expect(result).toMatchObject({
-				messages: mockSummarizeResponse.messages,
-				summary: mockSummary,
-				cost: mockCost,
+			// Verify it used sliding window truncation instead
+			expect(result).toEqual({
+				messages: expectedMessages,
+				summary: "",
+				cost: 0,
 				prevContextTokens: totalTokens,
 			})
-			// newContextTokens might be present, but we don't need to verify its exact value
 
 			// Clean up
 			summarizeSpy.mockRestore()
 		})
 
-		it("should fall back to truncateConversation when autoCondenseContext is true but summarization fails", async () => {
+		it("should fall back to truncateConversation when autoCondenseContext is true, percentage threshold is met, but summarization fails", async () => {
 			// Mock the summarizeConversation function to return an error
 			const mockSummarizeResponse: condenseModule.SummarizeResponse = {
 				messages: messages, // Original messages unchanged
@@ -623,7 +607,8 @@ describe("Sliding Window", () => {
 				.mockResolvedValue(mockSummarizeResponse)
 
 			const modelInfo = createModelInfo(100000, 30000)
-			const totalTokens = 70001 // Above threshold
+			// Set tokens to meet percentage threshold (100% of context)
+			const totalTokens = 100001 // Above 100% threshold
 			const messagesWithSmallContent = [
 				...messages.slice(0, -1),
 				{ ...messages[messages.length - 1], content: "" },
@@ -644,17 +629,17 @@ describe("Sliding Window", () => {
 				maxTokens: modelInfo.maxTokens,
 				apiHandler: mockApiHandler,
 				autoCondenseContext: true,
-				autoCondenseContextPercent: 100,
+				autoCondenseContextPercent: 100, // 100% threshold, tokens are at 100%+
 				systemPrompt: "System prompt",
 				taskId,
 				profileThresholds: {},
 				currentProfileId: "default",
 			})
 
-			// Verify summarizeConversation was called
+			// Verify summarizeConversation was called (percentage threshold was met)
 			expect(summarizeSpy).toHaveBeenCalled()
 
-			// Verify it fell back to truncation
+			// Verify it fell back to truncation after summarization failed
 			expect(result.messages).toEqual(expectedMessages)
 			expect(result.summary).toBe("")
 			expect(result.prevContextTokens).toBe(totalTokens)
diff --git a/src/core/sliding-window/index.ts b/src/core/sliding-window/index.ts
@@ -144,7 +144,17 @@ export async function truncateConversationIfNeeded({
 
 	if (autoCondenseContext) {
 		const contextPercent = (100 * prevContextTokens) / contextWindow
-		if (contextPercent >= effectiveThreshold || prevContextTokens > allowedTokens) {
+		// Check if we should trigger condensation based on percentage threshold
+		const shouldCondenseByPercentage = contextPercent >= effectiveThreshold
+		// Check if we're over the hard token limit
+		const isOverHardLimit = prevContextTokens > allowedTokens
+
+		// Only trigger automatic condensation when the percentage threshold is met
+		// If we're over the hard limit but below percentage, we'll use sliding window instead
+		if (shouldCondenseByPercentage) {
+			console.log(
+				`[Condensation] Triggering automatic condensation: ${contextPercent.toFixed(1)}% >= ${effectiveThreshold}% threshold`,
+			)
 			// Attempt to intelligently condense the context
 			const result = await summarizeConversation(
 				messages,
@@ -159,14 +169,29 @@ export async function truncateConversationIfNeeded({
 			if (result.error) {
 				error = result.error
 				cost = result.cost
+				console.log(`[Condensation] Condensation failed: ${result.error}`)
+				// If condensation fails and we're over the hard limit, fall through to sliding window
 			} else {
+				console.log(
+					`[Condensation] Successfully condensed context from ${prevContextTokens} to ${result.newContextTokens} tokens`,
+				)
 				return { ...result, prevContextTokens }
 			}
+		} else if (isOverHardLimit) {
+			// If we're over the hard limit but haven't reached the percentage threshold,
+			// log this condition - we'll use sliding window truncation below
+			console.log(
+				`[Condensation] Context tokens (${prevContextTokens}) exceed allowed (${allowedTokens}), but percentage (${contextPercent.toFixed(1)}%) < threshold (${effectiveThreshold}%). Will use sliding window truncation.`,
+			)
 		}
 	}
 
 	// Fall back to sliding window truncation if needed
+	// This happens when we're over the hard token limit regardless of percentage
 	if (prevContextTokens > allowedTokens) {
+		console.log(
+			`[Condensation] Using sliding window truncation: ${prevContextTokens} tokens > ${allowedTokens} allowed`,
+		)
 		const truncatedMessages = truncateConversation(messages, 0.5, taskId)
 		return { messages: truncatedMessages, prevContextTokens, summary: "", cost, error }
 	}