Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
248 changes: 248 additions & 0 deletions src/core/sliding-window/__tests__/auto-condense-disabled.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,248 @@
// npx vitest src/core/sliding-window/__tests__/auto-condense-disabled.spec.ts

import { Anthropic } from "@anthropic-ai/sdk"
import type { ModelInfo } from "@roo-code/types"
import { TelemetryService } from "@roo-code/telemetry"
import { BaseProvider } from "../../../api/providers/base-provider"
import { ApiMessage } from "../../task-persistence/apiMessages"
import * as condenseModule from "../../condense"
import { truncateConversationIfNeeded } from "../index"

// Create a mock ApiHandler for testing
class MockApiHandler extends BaseProvider {
createMessage(): any {
// Mock implementation for testing - returns an async iterable stream
const mockStream = {
async *[Symbol.asyncIterator]() {
yield { type: "text", text: "Mock summary content" }
yield { type: "usage", inputTokens: 100, outputTokens: 50 }
},
}
return mockStream
}

getModel(): { id: string; info: ModelInfo } {
return {
id: "test-model",
info: {
contextWindow: 100000,
maxTokens: 50000,
supportsPromptCache: true,
supportsImages: false,
inputPrice: 0,
outputPrice: 0,
description: "Test model",
},
}
}
}

// Create a singleton instance for tests
const mockApiHandler = new MockApiHandler()
const taskId = "test-task-id"

describe("Auto-condense disabled behavior", () => {
beforeEach(() => {
if (!TelemetryService.hasInstance()) {
TelemetryService.createInstance([])
}
vi.clearAllMocks()
})

const createModelInfo = (contextWindow: number, maxTokens?: number): ModelInfo => ({
contextWindow,
supportsPromptCache: true,
maxTokens,
})

const messages: ApiMessage[] = [
{ role: "user", content: "First message" },
{ role: "assistant", content: "Second message" },
{ role: "user", content: "Third message" },
{ role: "assistant", content: "Fourth message" },
{ role: "user", content: "Fifth message" },
]

it("should NOT condense when autoCondenseContext is false and tokens are below limit", async () => {
const summarizeSpy = vi.spyOn(condenseModule, "summarizeConversation")
const modelInfo = createModelInfo(100000, 30000)

// Set tokens below the limit
const totalTokens = 50000 // Below the 60000 limit (100000 * 0.9 - 30000)
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]

const result = await truncateConversationIfNeeded({
messages: messagesWithSmallContent,
totalTokens,
contextWindow: modelInfo.contextWindow,
maxTokens: modelInfo.maxTokens,
apiHandler: mockApiHandler,
autoCondenseContext: false, // Disabled
autoCondenseContextPercent: 50, // Should be ignored
systemPrompt: "System prompt",
taskId,
profileThresholds: {},
currentProfileId: "default",
})

// Should NOT call summarizeConversation
expect(summarizeSpy).not.toHaveBeenCalled()

// Should return original messages
expect(result.messages).toEqual(messagesWithSmallContent)
expect(result.summary).toBe("")
expect(result.cost).toBe(0)

summarizeSpy.mockRestore()
})

it("should use sliding window truncation when autoCondenseContext is false and tokens exceed limit", async () => {
const summarizeSpy = vi.spyOn(condenseModule, "summarizeConversation")
const modelInfo = createModelInfo(100000, 30000)

// Set tokens above the limit
const totalTokens = 70001 // Above the 60000 limit
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]

const result = await truncateConversationIfNeeded({
messages: messagesWithSmallContent,
totalTokens,
contextWindow: modelInfo.contextWindow,
maxTokens: modelInfo.maxTokens,
apiHandler: mockApiHandler,
autoCondenseContext: false, // Disabled
autoCondenseContextPercent: 50, // Should be ignored
systemPrompt: "System prompt",
taskId,
profileThresholds: {},
currentProfileId: "default",
})

// Should NOT call summarizeConversation
expect(summarizeSpy).not.toHaveBeenCalled()

// Should use sliding window truncation (removes 2 messages with 0.5 fraction)
const expectedMessages = [messagesWithSmallContent[0], messagesWithSmallContent[3], messagesWithSmallContent[4]]
expect(result.messages).toEqual(expectedMessages)
expect(result.summary).toBe("") // No summary when using sliding window
expect(result.cost).toBe(0)

summarizeSpy.mockRestore()
})

it("should NOT condense even when percentage threshold is exceeded if autoCondenseContext is false", async () => {
const summarizeSpy = vi.spyOn(condenseModule, "summarizeConversation")
const modelInfo = createModelInfo(100000, 30000)

// Set tokens to 80% of context window (exceeds typical percentage thresholds)
const totalTokens = 80000
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]

const result = await truncateConversationIfNeeded({
messages: messagesWithSmallContent,
totalTokens,
contextWindow: modelInfo.contextWindow,
maxTokens: modelInfo.maxTokens,
apiHandler: mockApiHandler,
autoCondenseContext: false, // Disabled
autoCondenseContextPercent: 50, // 80% exceeds this, but should be ignored
systemPrompt: "System prompt",
taskId,
profileThresholds: {},
currentProfileId: "default",
})

// Should NOT call summarizeConversation even though percentage is exceeded
expect(summarizeSpy).not.toHaveBeenCalled()

// Should use sliding window truncation since tokens exceed hard limit
const expectedMessages = [messagesWithSmallContent[0], messagesWithSmallContent[3], messagesWithSmallContent[4]]
expect(result.messages).toEqual(expectedMessages)
expect(result.summary).toBe("")
expect(result.cost).toBe(0)

summarizeSpy.mockRestore()
})

it("should respect autoCondenseContext setting in forced truncation scenarios", async () => {
const summarizeSpy = vi.spyOn(condenseModule, "summarizeConversation")
const modelInfo = createModelInfo(100000, 30000)

// Simulate a forced truncation scenario (e.g., context window exceeded)
// This would be called from handleContextWindowExceededError
const totalTokens = 95000 // Way above limit, simulating context window error
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]

// Test with autoCondenseContext = false (user preference)
const result = await truncateConversationIfNeeded({
messages: messagesWithSmallContent,
totalTokens,
contextWindow: modelInfo.contextWindow,
maxTokens: modelInfo.maxTokens,
apiHandler: mockApiHandler,
autoCondenseContext: false, // User has disabled auto-condense
autoCondenseContextPercent: 75, // FORCED_CONTEXT_REDUCTION_PERCENT
systemPrompt: "System prompt",
taskId,
profileThresholds: {},
currentProfileId: "default",
})

// Should NOT call summarizeConversation, respecting user preference
expect(summarizeSpy).not.toHaveBeenCalled()

// Should use sliding window truncation instead
const expectedMessages = [messagesWithSmallContent[0], messagesWithSmallContent[3], messagesWithSmallContent[4]]
expect(result.messages).toEqual(expectedMessages)
expect(result.summary).toBe("")
expect(result.cost).toBe(0)

summarizeSpy.mockRestore()
})

it("should use condensing when autoCondenseContext is true and tokens exceed limit", async () => {
// This is a control test to ensure condensing still works when enabled
const mockSummary = "This is a summary"
const mockCost = 0.05
const mockSummarizeResponse: condenseModule.SummarizeResponse = {
messages: [
{ role: "user", content: "First message" },
{ role: "assistant", content: mockSummary, isSummary: true },
{ role: "user", content: "Last message" },
],
summary: mockSummary,
cost: mockCost,
newContextTokens: 100,
}

const summarizeSpy = vi.spyOn(condenseModule, "summarizeConversation").mockResolvedValue(mockSummarizeResponse)

const modelInfo = createModelInfo(100000, 30000)
const totalTokens = 70001 // Above limit
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]

const result = await truncateConversationIfNeeded({
messages: messagesWithSmallContent,
totalTokens,
contextWindow: modelInfo.contextWindow,
maxTokens: modelInfo.maxTokens,
apiHandler: mockApiHandler,
autoCondenseContext: true, // Enabled
autoCondenseContextPercent: 100,
systemPrompt: "System prompt",
taskId,
profileThresholds: {},
currentProfileId: "default",
})

// Should call summarizeConversation when enabled
expect(summarizeSpy).toHaveBeenCalled()

// Should return condensed result
expect(result.messages).toEqual(mockSummarizeResponse.messages)
expect(result.summary).toBe(mockSummary)
expect(result.cost).toBe(mockCost)

summarizeSpy.mockRestore()
})
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider adding a test case that simulates condensing failure to ensure the fallback to sliding window truncation works correctly when autoCondenseContext is true but condensing fails. This would test the error handling path in lines 159-167 of index.ts.

})
20 changes: 15 additions & 5 deletions src/core/sliding-window/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -159,17 +159,27 @@ export async function truncateConversationIfNeeded({
if (result.error) {
error = result.error
cost = result.cost
// If summarization failed but we still need to reduce context,
// fall back to sliding window truncation
if (prevContextTokens > allowedTokens) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When condensing fails and we fall back to sliding window truncation, should we log this failure for debugging purposes? Currently it silently falls back which might make troubleshooting harder.

Consider adding:

Suggested change
if (prevContextTokens > allowedTokens) {
if (result.error) {
console.warn(`[Task#${taskId}] Condensing failed: ${result.error}. Falling back to sliding window truncation.`);
error = result.error
cost = result.cost

const truncatedMessages = truncateConversation(messages, 0.5, taskId)
return { messages: truncatedMessages, prevContextTokens, summary: "", cost, error }
}
} else {
return { ...result, prevContextTokens }
}
}
} else {
// When auto-condense is disabled, only perform sliding window truncation
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment could be more specific about what 'absolutely must' means. Consider:

Suggested change
// When auto-condense is disabled, only perform sliding window truncation
// When auto-condense is disabled, use sliding window truncation only
// when tokens exceed the hard limit (context window minus buffer and reserved tokens)

// if we absolutely must (i.e., exceeding hard token limit)
// This is a forced truncation scenario (e.g., context window exceeded error)
if (prevContextTokens > allowedTokens) {
// Use sliding window truncation instead of condensing
const truncatedMessages = truncateConversation(messages, 0.5, taskId)
return { messages: truncatedMessages, prevContextTokens, summary: "", cost, error }
}
}

// Fall back to sliding window truncation if needed
if (prevContextTokens > allowedTokens) {
const truncatedMessages = truncateConversation(messages, 0.5, taskId)
return { messages: truncatedMessages, prevContextTokens, summary: "", cost, error }
}
// No truncation or condensation needed
return { messages, summary: "", cost, prevContextTokens, error }
}
8 changes: 5 additions & 3 deletions src/core/task/Task.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2438,7 +2438,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {

private async handleContextWindowExceededError(): Promise<void> {
const state = await this.providerRef.deref()?.getState()
const { profileThresholds = {} } = state ?? {}
const { profileThresholds = {}, autoCondenseContext = true } = state ?? {}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For consistency with how it's done elsewhere in the codebase, consider destructuring with a default value:

Suggested change
const { profileThresholds = {}, autoCondenseContext = true } = state ?? {}
const { profileThresholds = {}, autoCondenseContext = true } = state ?? {}

This matches the pattern used in line 2514 of the same file.


const { contextTokens } = this.getTokenUsage()
const modelInfo = this.api.getModel().info
Expand All @@ -2461,14 +2461,16 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
`Forcing truncation to ${FORCED_CONTEXT_REDUCTION_PERCENT}% of current context.`,
)

// Force aggressive truncation by keeping only 75% of the conversation history
// When context window is exceeded, we must reduce context size
// If auto-condense is enabled, use intelligent condensing
// If auto-condense is disabled, use sliding window truncation
const truncateResult = await truncateConversationIfNeeded({
messages: this.apiConversationHistory,
totalTokens: contextTokens || 0,
maxTokens,
contextWindow,
apiHandler: this.api,
autoCondenseContext: true,
autoCondenseContext: autoCondenseContext, // Respect user's setting
autoCondenseContextPercent: FORCED_CONTEXT_REDUCTION_PERCENT,
systemPrompt: await this.getSystemPrompt(),
taskId: this.taskId,
Expand Down
Loading