-
Notifications
You must be signed in to change notification settings - Fork 2.5k
fix: respect autoCondenseContext setting in sliding window truncation #7956
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,248 @@ | ||
| // npx vitest src/core/sliding-window/__tests__/auto-condense-disabled.spec.ts | ||
|
|
||
| import { Anthropic } from "@anthropic-ai/sdk" | ||
| import type { ModelInfo } from "@roo-code/types" | ||
| import { TelemetryService } from "@roo-code/telemetry" | ||
| import { BaseProvider } from "../../../api/providers/base-provider" | ||
| import { ApiMessage } from "../../task-persistence/apiMessages" | ||
| import * as condenseModule from "../../condense" | ||
| import { truncateConversationIfNeeded } from "../index" | ||
|
|
||
| // Create a mock ApiHandler for testing | ||
| class MockApiHandler extends BaseProvider { | ||
| createMessage(): any { | ||
| // Mock implementation for testing - returns an async iterable stream | ||
| const mockStream = { | ||
| async *[Symbol.asyncIterator]() { | ||
| yield { type: "text", text: "Mock summary content" } | ||
| yield { type: "usage", inputTokens: 100, outputTokens: 50 } | ||
| }, | ||
| } | ||
| return mockStream | ||
| } | ||
|
|
||
| getModel(): { id: string; info: ModelInfo } { | ||
| return { | ||
| id: "test-model", | ||
| info: { | ||
| contextWindow: 100000, | ||
| maxTokens: 50000, | ||
| supportsPromptCache: true, | ||
| supportsImages: false, | ||
| inputPrice: 0, | ||
| outputPrice: 0, | ||
| description: "Test model", | ||
| }, | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // Create a singleton instance for tests | ||
| const mockApiHandler = new MockApiHandler() | ||
| const taskId = "test-task-id" | ||
|
|
||
| describe("Auto-condense disabled behavior", () => { | ||
| beforeEach(() => { | ||
| if (!TelemetryService.hasInstance()) { | ||
| TelemetryService.createInstance([]) | ||
| } | ||
| vi.clearAllMocks() | ||
| }) | ||
|
|
||
| const createModelInfo = (contextWindow: number, maxTokens?: number): ModelInfo => ({ | ||
| contextWindow, | ||
| supportsPromptCache: true, | ||
| maxTokens, | ||
| }) | ||
|
|
||
| const messages: ApiMessage[] = [ | ||
| { role: "user", content: "First message" }, | ||
| { role: "assistant", content: "Second message" }, | ||
| { role: "user", content: "Third message" }, | ||
| { role: "assistant", content: "Fourth message" }, | ||
| { role: "user", content: "Fifth message" }, | ||
| ] | ||
|
|
||
| it("should NOT condense when autoCondenseContext is false and tokens are below limit", async () => { | ||
| const summarizeSpy = vi.spyOn(condenseModule, "summarizeConversation") | ||
| const modelInfo = createModelInfo(100000, 30000) | ||
|
|
||
| // Set tokens below the limit | ||
| const totalTokens = 50000 // Below the 60000 limit (100000 * 0.9 - 30000) | ||
| const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }] | ||
|
|
||
| const result = await truncateConversationIfNeeded({ | ||
| messages: messagesWithSmallContent, | ||
| totalTokens, | ||
| contextWindow: modelInfo.contextWindow, | ||
| maxTokens: modelInfo.maxTokens, | ||
| apiHandler: mockApiHandler, | ||
| autoCondenseContext: false, // Disabled | ||
| autoCondenseContextPercent: 50, // Should be ignored | ||
| systemPrompt: "System prompt", | ||
| taskId, | ||
| profileThresholds: {}, | ||
| currentProfileId: "default", | ||
| }) | ||
|
|
||
| // Should NOT call summarizeConversation | ||
| expect(summarizeSpy).not.toHaveBeenCalled() | ||
|
|
||
| // Should return original messages | ||
| expect(result.messages).toEqual(messagesWithSmallContent) | ||
| expect(result.summary).toBe("") | ||
| expect(result.cost).toBe(0) | ||
|
|
||
| summarizeSpy.mockRestore() | ||
| }) | ||
|
|
||
| it("should use sliding window truncation when autoCondenseContext is false and tokens exceed limit", async () => { | ||
| const summarizeSpy = vi.spyOn(condenseModule, "summarizeConversation") | ||
| const modelInfo = createModelInfo(100000, 30000) | ||
|
|
||
| // Set tokens above the limit | ||
| const totalTokens = 70001 // Above the 60000 limit | ||
| const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }] | ||
|
|
||
| const result = await truncateConversationIfNeeded({ | ||
| messages: messagesWithSmallContent, | ||
| totalTokens, | ||
| contextWindow: modelInfo.contextWindow, | ||
| maxTokens: modelInfo.maxTokens, | ||
| apiHandler: mockApiHandler, | ||
| autoCondenseContext: false, // Disabled | ||
| autoCondenseContextPercent: 50, // Should be ignored | ||
| systemPrompt: "System prompt", | ||
| taskId, | ||
| profileThresholds: {}, | ||
| currentProfileId: "default", | ||
| }) | ||
|
|
||
| // Should NOT call summarizeConversation | ||
| expect(summarizeSpy).not.toHaveBeenCalled() | ||
|
|
||
| // Should use sliding window truncation (removes 2 messages with 0.5 fraction) | ||
| const expectedMessages = [messagesWithSmallContent[0], messagesWithSmallContent[3], messagesWithSmallContent[4]] | ||
| expect(result.messages).toEqual(expectedMessages) | ||
| expect(result.summary).toBe("") // No summary when using sliding window | ||
| expect(result.cost).toBe(0) | ||
|
|
||
| summarizeSpy.mockRestore() | ||
| }) | ||
|
|
||
| it("should NOT condense even when percentage threshold is exceeded if autoCondenseContext is false", async () => { | ||
| const summarizeSpy = vi.spyOn(condenseModule, "summarizeConversation") | ||
| const modelInfo = createModelInfo(100000, 30000) | ||
|
|
||
| // Set tokens to 80% of context window (exceeds typical percentage thresholds) | ||
| const totalTokens = 80000 | ||
| const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }] | ||
|
|
||
| const result = await truncateConversationIfNeeded({ | ||
| messages: messagesWithSmallContent, | ||
| totalTokens, | ||
| contextWindow: modelInfo.contextWindow, | ||
| maxTokens: modelInfo.maxTokens, | ||
| apiHandler: mockApiHandler, | ||
| autoCondenseContext: false, // Disabled | ||
| autoCondenseContextPercent: 50, // 80% exceeds this, but should be ignored | ||
| systemPrompt: "System prompt", | ||
| taskId, | ||
| profileThresholds: {}, | ||
| currentProfileId: "default", | ||
| }) | ||
|
|
||
| // Should NOT call summarizeConversation even though percentage is exceeded | ||
| expect(summarizeSpy).not.toHaveBeenCalled() | ||
|
|
||
| // Should use sliding window truncation since tokens exceed hard limit | ||
| const expectedMessages = [messagesWithSmallContent[0], messagesWithSmallContent[3], messagesWithSmallContent[4]] | ||
| expect(result.messages).toEqual(expectedMessages) | ||
| expect(result.summary).toBe("") | ||
| expect(result.cost).toBe(0) | ||
|
|
||
| summarizeSpy.mockRestore() | ||
| }) | ||
|
|
||
| it("should respect autoCondenseContext setting in forced truncation scenarios", async () => { | ||
| const summarizeSpy = vi.spyOn(condenseModule, "summarizeConversation") | ||
| const modelInfo = createModelInfo(100000, 30000) | ||
|
|
||
| // Simulate a forced truncation scenario (e.g., context window exceeded) | ||
| // This would be called from handleContextWindowExceededError | ||
| const totalTokens = 95000 // Way above limit, simulating context window error | ||
| const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }] | ||
|
|
||
| // Test with autoCondenseContext = false (user preference) | ||
| const result = await truncateConversationIfNeeded({ | ||
| messages: messagesWithSmallContent, | ||
| totalTokens, | ||
| contextWindow: modelInfo.contextWindow, | ||
| maxTokens: modelInfo.maxTokens, | ||
| apiHandler: mockApiHandler, | ||
| autoCondenseContext: false, // User has disabled auto-condense | ||
| autoCondenseContextPercent: 75, // FORCED_CONTEXT_REDUCTION_PERCENT | ||
| systemPrompt: "System prompt", | ||
| taskId, | ||
| profileThresholds: {}, | ||
| currentProfileId: "default", | ||
| }) | ||
|
|
||
| // Should NOT call summarizeConversation, respecting user preference | ||
| expect(summarizeSpy).not.toHaveBeenCalled() | ||
|
|
||
| // Should use sliding window truncation instead | ||
| const expectedMessages = [messagesWithSmallContent[0], messagesWithSmallContent[3], messagesWithSmallContent[4]] | ||
| expect(result.messages).toEqual(expectedMessages) | ||
| expect(result.summary).toBe("") | ||
| expect(result.cost).toBe(0) | ||
|
|
||
| summarizeSpy.mockRestore() | ||
| }) | ||
|
|
||
| it("should use condensing when autoCondenseContext is true and tokens exceed limit", async () => { | ||
| // This is a control test to ensure condensing still works when enabled | ||
| const mockSummary = "This is a summary" | ||
| const mockCost = 0.05 | ||
| const mockSummarizeResponse: condenseModule.SummarizeResponse = { | ||
| messages: [ | ||
| { role: "user", content: "First message" }, | ||
| { role: "assistant", content: mockSummary, isSummary: true }, | ||
| { role: "user", content: "Last message" }, | ||
| ], | ||
| summary: mockSummary, | ||
| cost: mockCost, | ||
| newContextTokens: 100, | ||
| } | ||
|
|
||
| const summarizeSpy = vi.spyOn(condenseModule, "summarizeConversation").mockResolvedValue(mockSummarizeResponse) | ||
|
|
||
| const modelInfo = createModelInfo(100000, 30000) | ||
| const totalTokens = 70001 // Above limit | ||
| const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }] | ||
|
|
||
| const result = await truncateConversationIfNeeded({ | ||
| messages: messagesWithSmallContent, | ||
| totalTokens, | ||
| contextWindow: modelInfo.contextWindow, | ||
| maxTokens: modelInfo.maxTokens, | ||
| apiHandler: mockApiHandler, | ||
| autoCondenseContext: true, // Enabled | ||
| autoCondenseContextPercent: 100, | ||
| systemPrompt: "System prompt", | ||
| taskId, | ||
| profileThresholds: {}, | ||
| currentProfileId: "default", | ||
| }) | ||
|
|
||
| // Should call summarizeConversation when enabled | ||
| expect(summarizeSpy).toHaveBeenCalled() | ||
|
|
||
| // Should return condensed result | ||
| expect(result.messages).toEqual(mockSummarizeResponse.messages) | ||
| expect(result.summary).toBe(mockSummary) | ||
| expect(result.cost).toBe(mockCost) | ||
|
|
||
| summarizeSpy.mockRestore() | ||
| }) | ||
| }) | ||
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -159,17 +159,27 @@ export async function truncateConversationIfNeeded({ | |||||||||||
| if (result.error) { | ||||||||||||
| error = result.error | ||||||||||||
| cost = result.cost | ||||||||||||
| // If summarization failed but we still need to reduce context, | ||||||||||||
| // fall back to sliding window truncation | ||||||||||||
| if (prevContextTokens > allowedTokens) { | ||||||||||||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When condensing fails and we fall back to sliding window truncation, should we log this failure for debugging purposes? Currently it silently falls back which might make troubleshooting harder. Consider adding:
Suggested change
|
||||||||||||
| const truncatedMessages = truncateConversation(messages, 0.5, taskId) | ||||||||||||
| return { messages: truncatedMessages, prevContextTokens, summary: "", cost, error } | ||||||||||||
| } | ||||||||||||
| } else { | ||||||||||||
| return { ...result, prevContextTokens } | ||||||||||||
| } | ||||||||||||
| } | ||||||||||||
| } else { | ||||||||||||
| // When auto-condense is disabled, only perform sliding window truncation | ||||||||||||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This comment could be more specific about what 'absolutely must' means. Consider:
Suggested change
|
||||||||||||
| // if we absolutely must (i.e., exceeding hard token limit) | ||||||||||||
| // This is a forced truncation scenario (e.g., context window exceeded error) | ||||||||||||
| if (prevContextTokens > allowedTokens) { | ||||||||||||
| // Use sliding window truncation instead of condensing | ||||||||||||
| const truncatedMessages = truncateConversation(messages, 0.5, taskId) | ||||||||||||
| return { messages: truncatedMessages, prevContextTokens, summary: "", cost, error } | ||||||||||||
| } | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| // Fall back to sliding window truncation if needed | ||||||||||||
| if (prevContextTokens > allowedTokens) { | ||||||||||||
| const truncatedMessages = truncateConversation(messages, 0.5, taskId) | ||||||||||||
| return { messages: truncatedMessages, prevContextTokens, summary: "", cost, error } | ||||||||||||
| } | ||||||||||||
| // No truncation or condensation needed | ||||||||||||
| return { messages, summary: "", cost, prevContextTokens, error } | ||||||||||||
| } | ||||||||||||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -2438,7 +2438,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike { | |||||
|
|
||||||
| private async handleContextWindowExceededError(): Promise<void> { | ||||||
| const state = await this.providerRef.deref()?.getState() | ||||||
| const { profileThresholds = {} } = state ?? {} | ||||||
| const { profileThresholds = {}, autoCondenseContext = true } = state ?? {} | ||||||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For consistency with how it's done elsewhere in the codebase, consider destructuring with a default value:
Suggested change
This matches the pattern used in line 2514 of the same file. |
||||||
|
|
||||||
| const { contextTokens } = this.getTokenUsage() | ||||||
| const modelInfo = this.api.getModel().info | ||||||
|
|
@@ -2461,14 +2461,16 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike { | |||||
| `Forcing truncation to ${FORCED_CONTEXT_REDUCTION_PERCENT}% of current context.`, | ||||||
| ) | ||||||
|
|
||||||
| // Force aggressive truncation by keeping only 75% of the conversation history | ||||||
| // When context window is exceeded, we must reduce context size | ||||||
| // If auto-condense is enabled, use intelligent condensing | ||||||
| // If auto-condense is disabled, use sliding window truncation | ||||||
| const truncateResult = await truncateConversationIfNeeded({ | ||||||
| messages: this.apiConversationHistory, | ||||||
| totalTokens: contextTokens || 0, | ||||||
| maxTokens, | ||||||
| contextWindow, | ||||||
| apiHandler: this.api, | ||||||
| autoCondenseContext: true, | ||||||
| autoCondenseContext: autoCondenseContext, // Respect user's setting | ||||||
| autoCondenseContextPercent: FORCED_CONTEXT_REDUCTION_PERCENT, | ||||||
| systemPrompt: await this.getSystemPrompt(), | ||||||
| taskId: this.taskId, | ||||||
|
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Consider adding a test case that simulates condensing failure to ensure the fallback to sliding window truncation works correctly when autoCondenseContext is true but condensing fails. This would test the error handling path in lines 159-167 of index.ts.