Skip to content

Commit 64a90b9

Browse files
committed
fix: respect autoCondenseContext setting in sliding window truncation
- Fixed bug where context condensing was triggered even when autoCondenseContext was disabled - Modified truncateConversationIfNeeded to only use sliding window truncation when auto-condense is disabled - Updated handleContextWindowExceededError to respect user preference for auto-condense - Added comprehensive test coverage for the fix Fixes #7953
1 parent c4c4780 commit 64a90b9

File tree

3 files changed

+268
-8
lines changed

3 files changed

+268
-8
lines changed
Lines changed: 248 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
// npx vitest src/core/sliding-window/__tests__/auto-condense-disabled.spec.ts
2+
3+
import { Anthropic } from "@anthropic-ai/sdk"
4+
import type { ModelInfo } from "@roo-code/types"
5+
import { TelemetryService } from "@roo-code/telemetry"
6+
import { BaseProvider } from "../../../api/providers/base-provider"
7+
import { ApiMessage } from "../../task-persistence/apiMessages"
8+
import * as condenseModule from "../../condense"
9+
import { truncateConversationIfNeeded } from "../index"
10+
11+
// Create a mock ApiHandler for testing
12+
class MockApiHandler extends BaseProvider {
13+
createMessage(): any {
14+
// Mock implementation for testing - returns an async iterable stream
15+
const mockStream = {
16+
async *[Symbol.asyncIterator]() {
17+
yield { type: "text", text: "Mock summary content" }
18+
yield { type: "usage", inputTokens: 100, outputTokens: 50 }
19+
},
20+
}
21+
return mockStream
22+
}
23+
24+
getModel(): { id: string; info: ModelInfo } {
25+
return {
26+
id: "test-model",
27+
info: {
28+
contextWindow: 100000,
29+
maxTokens: 50000,
30+
supportsPromptCache: true,
31+
supportsImages: false,
32+
inputPrice: 0,
33+
outputPrice: 0,
34+
description: "Test model",
35+
},
36+
}
37+
}
38+
}
39+
40+
// Create a singleton instance for tests
41+
const mockApiHandler = new MockApiHandler()
42+
const taskId = "test-task-id"
43+
44+
describe("Auto-condense disabled behavior", () => {
45+
beforeEach(() => {
46+
if (!TelemetryService.hasInstance()) {
47+
TelemetryService.createInstance([])
48+
}
49+
vi.clearAllMocks()
50+
})
51+
52+
const createModelInfo = (contextWindow: number, maxTokens?: number): ModelInfo => ({
53+
contextWindow,
54+
supportsPromptCache: true,
55+
maxTokens,
56+
})
57+
58+
const messages: ApiMessage[] = [
59+
{ role: "user", content: "First message" },
60+
{ role: "assistant", content: "Second message" },
61+
{ role: "user", content: "Third message" },
62+
{ role: "assistant", content: "Fourth message" },
63+
{ role: "user", content: "Fifth message" },
64+
]
65+
66+
it("should NOT condense when autoCondenseContext is false and tokens are below limit", async () => {
67+
const summarizeSpy = vi.spyOn(condenseModule, "summarizeConversation")
68+
const modelInfo = createModelInfo(100000, 30000)
69+
70+
// Set tokens below the limit
71+
const totalTokens = 50000 // Below the 60000 limit (100000 * 0.9 - 30000)
72+
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
73+
74+
const result = await truncateConversationIfNeeded({
75+
messages: messagesWithSmallContent,
76+
totalTokens,
77+
contextWindow: modelInfo.contextWindow,
78+
maxTokens: modelInfo.maxTokens,
79+
apiHandler: mockApiHandler,
80+
autoCondenseContext: false, // Disabled
81+
autoCondenseContextPercent: 50, // Should be ignored
82+
systemPrompt: "System prompt",
83+
taskId,
84+
profileThresholds: {},
85+
currentProfileId: "default",
86+
})
87+
88+
// Should NOT call summarizeConversation
89+
expect(summarizeSpy).not.toHaveBeenCalled()
90+
91+
// Should return original messages
92+
expect(result.messages).toEqual(messagesWithSmallContent)
93+
expect(result.summary).toBe("")
94+
expect(result.cost).toBe(0)
95+
96+
summarizeSpy.mockRestore()
97+
})
98+
99+
it("should use sliding window truncation when autoCondenseContext is false and tokens exceed limit", async () => {
100+
const summarizeSpy = vi.spyOn(condenseModule, "summarizeConversation")
101+
const modelInfo = createModelInfo(100000, 30000)
102+
103+
// Set tokens above the limit
104+
const totalTokens = 70001 // Above the 60000 limit
105+
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
106+
107+
const result = await truncateConversationIfNeeded({
108+
messages: messagesWithSmallContent,
109+
totalTokens,
110+
contextWindow: modelInfo.contextWindow,
111+
maxTokens: modelInfo.maxTokens,
112+
apiHandler: mockApiHandler,
113+
autoCondenseContext: false, // Disabled
114+
autoCondenseContextPercent: 50, // Should be ignored
115+
systemPrompt: "System prompt",
116+
taskId,
117+
profileThresholds: {},
118+
currentProfileId: "default",
119+
})
120+
121+
// Should NOT call summarizeConversation
122+
expect(summarizeSpy).not.toHaveBeenCalled()
123+
124+
// Should use sliding window truncation (removes 2 messages with 0.5 fraction)
125+
const expectedMessages = [messagesWithSmallContent[0], messagesWithSmallContent[3], messagesWithSmallContent[4]]
126+
expect(result.messages).toEqual(expectedMessages)
127+
expect(result.summary).toBe("") // No summary when using sliding window
128+
expect(result.cost).toBe(0)
129+
130+
summarizeSpy.mockRestore()
131+
})
132+
133+
it("should NOT condense even when percentage threshold is exceeded if autoCondenseContext is false", async () => {
134+
const summarizeSpy = vi.spyOn(condenseModule, "summarizeConversation")
135+
const modelInfo = createModelInfo(100000, 30000)
136+
137+
// Set tokens to 80% of context window (exceeds typical percentage thresholds)
138+
const totalTokens = 80000
139+
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
140+
141+
const result = await truncateConversationIfNeeded({
142+
messages: messagesWithSmallContent,
143+
totalTokens,
144+
contextWindow: modelInfo.contextWindow,
145+
maxTokens: modelInfo.maxTokens,
146+
apiHandler: mockApiHandler,
147+
autoCondenseContext: false, // Disabled
148+
autoCondenseContextPercent: 50, // 80% exceeds this, but should be ignored
149+
systemPrompt: "System prompt",
150+
taskId,
151+
profileThresholds: {},
152+
currentProfileId: "default",
153+
})
154+
155+
// Should NOT call summarizeConversation even though percentage is exceeded
156+
expect(summarizeSpy).not.toHaveBeenCalled()
157+
158+
// Should use sliding window truncation since tokens exceed hard limit
159+
const expectedMessages = [messagesWithSmallContent[0], messagesWithSmallContent[3], messagesWithSmallContent[4]]
160+
expect(result.messages).toEqual(expectedMessages)
161+
expect(result.summary).toBe("")
162+
expect(result.cost).toBe(0)
163+
164+
summarizeSpy.mockRestore()
165+
})
166+
167+
it("should respect autoCondenseContext setting in forced truncation scenarios", async () => {
168+
const summarizeSpy = vi.spyOn(condenseModule, "summarizeConversation")
169+
const modelInfo = createModelInfo(100000, 30000)
170+
171+
// Simulate a forced truncation scenario (e.g., context window exceeded)
172+
// This would be called from handleContextWindowExceededError
173+
const totalTokens = 95000 // Way above limit, simulating context window error
174+
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
175+
176+
// Test with autoCondenseContext = false (user preference)
177+
const result = await truncateConversationIfNeeded({
178+
messages: messagesWithSmallContent,
179+
totalTokens,
180+
contextWindow: modelInfo.contextWindow,
181+
maxTokens: modelInfo.maxTokens,
182+
apiHandler: mockApiHandler,
183+
autoCondenseContext: false, // User has disabled auto-condense
184+
autoCondenseContextPercent: 75, // FORCED_CONTEXT_REDUCTION_PERCENT
185+
systemPrompt: "System prompt",
186+
taskId,
187+
profileThresholds: {},
188+
currentProfileId: "default",
189+
})
190+
191+
// Should NOT call summarizeConversation, respecting user preference
192+
expect(summarizeSpy).not.toHaveBeenCalled()
193+
194+
// Should use sliding window truncation instead
195+
const expectedMessages = [messagesWithSmallContent[0], messagesWithSmallContent[3], messagesWithSmallContent[4]]
196+
expect(result.messages).toEqual(expectedMessages)
197+
expect(result.summary).toBe("")
198+
expect(result.cost).toBe(0)
199+
200+
summarizeSpy.mockRestore()
201+
})
202+
203+
it("should use condensing when autoCondenseContext is true and tokens exceed limit", async () => {
204+
// This is a control test to ensure condensing still works when enabled
205+
const mockSummary = "This is a summary"
206+
const mockCost = 0.05
207+
const mockSummarizeResponse: condenseModule.SummarizeResponse = {
208+
messages: [
209+
{ role: "user", content: "First message" },
210+
{ role: "assistant", content: mockSummary, isSummary: true },
211+
{ role: "user", content: "Last message" },
212+
],
213+
summary: mockSummary,
214+
cost: mockCost,
215+
newContextTokens: 100,
216+
}
217+
218+
const summarizeSpy = vi.spyOn(condenseModule, "summarizeConversation").mockResolvedValue(mockSummarizeResponse)
219+
220+
const modelInfo = createModelInfo(100000, 30000)
221+
const totalTokens = 70001 // Above limit
222+
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
223+
224+
const result = await truncateConversationIfNeeded({
225+
messages: messagesWithSmallContent,
226+
totalTokens,
227+
contextWindow: modelInfo.contextWindow,
228+
maxTokens: modelInfo.maxTokens,
229+
apiHandler: mockApiHandler,
230+
autoCondenseContext: true, // Enabled
231+
autoCondenseContextPercent: 100,
232+
systemPrompt: "System prompt",
233+
taskId,
234+
profileThresholds: {},
235+
currentProfileId: "default",
236+
})
237+
238+
// Should call summarizeConversation when enabled
239+
expect(summarizeSpy).toHaveBeenCalled()
240+
241+
// Should return condensed result
242+
expect(result.messages).toEqual(mockSummarizeResponse.messages)
243+
expect(result.summary).toBe(mockSummary)
244+
expect(result.cost).toBe(mockCost)
245+
246+
summarizeSpy.mockRestore()
247+
})
248+
})

src/core/sliding-window/index.ts

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -159,17 +159,27 @@ export async function truncateConversationIfNeeded({
159159
if (result.error) {
160160
error = result.error
161161
cost = result.cost
162+
// If summarization failed but we still need to reduce context,
163+
// fall back to sliding window truncation
164+
if (prevContextTokens > allowedTokens) {
165+
const truncatedMessages = truncateConversation(messages, 0.5, taskId)
166+
return { messages: truncatedMessages, prevContextTokens, summary: "", cost, error }
167+
}
162168
} else {
163169
return { ...result, prevContextTokens }
164170
}
165171
}
172+
} else {
173+
// When auto-condense is disabled, only perform sliding window truncation
174+
// if we absolutely must (i.e., exceeding hard token limit)
175+
// This is a forced truncation scenario (e.g., context window exceeded error)
176+
if (prevContextTokens > allowedTokens) {
177+
// Use sliding window truncation instead of condensing
178+
const truncatedMessages = truncateConversation(messages, 0.5, taskId)
179+
return { messages: truncatedMessages, prevContextTokens, summary: "", cost, error }
180+
}
166181
}
167182

168-
// Fall back to sliding window truncation if needed
169-
if (prevContextTokens > allowedTokens) {
170-
const truncatedMessages = truncateConversation(messages, 0.5, taskId)
171-
return { messages: truncatedMessages, prevContextTokens, summary: "", cost, error }
172-
}
173183
// No truncation or condensation needed
174184
return { messages, summary: "", cost, prevContextTokens, error }
175185
}

src/core/task/Task.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2438,7 +2438,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
24382438

24392439
private async handleContextWindowExceededError(): Promise<void> {
24402440
const state = await this.providerRef.deref()?.getState()
2441-
const { profileThresholds = {} } = state ?? {}
2441+
const { profileThresholds = {}, autoCondenseContext = true } = state ?? {}
24422442

24432443
const { contextTokens } = this.getTokenUsage()
24442444
const modelInfo = this.api.getModel().info
@@ -2461,14 +2461,16 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
24612461
`Forcing truncation to ${FORCED_CONTEXT_REDUCTION_PERCENT}% of current context.`,
24622462
)
24632463

2464-
// Force aggressive truncation by keeping only 75% of the conversation history
2464+
// When context window is exceeded, we must reduce context size
2465+
// If auto-condense is enabled, use intelligent condensing
2466+
// If auto-condense is disabled, use sliding window truncation
24652467
const truncateResult = await truncateConversationIfNeeded({
24662468
messages: this.apiConversationHistory,
24672469
totalTokens: contextTokens || 0,
24682470
maxTokens,
24692471
contextWindow,
24702472
apiHandler: this.api,
2471-
autoCondenseContext: true,
2473+
autoCondenseContext: autoCondenseContext, // Respect user's setting
24722474
autoCondenseContextPercent: FORCED_CONTEXT_REDUCTION_PERCENT,
24732475
systemPrompt: await this.getSystemPrompt(),
24742476
taskId: this.taskId,

0 commit comments

Comments
 (0)