From 0f4cfec82a29ead0b2a05a167e6b9fa9b5a758e8 Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Tue, 13 May 2025 16:35:14 -0700 Subject: [PATCH 01/25] [Condense] Condense messages with an LLM rather than truncating --- src/core/condense/index.ts | 116 ++++ .../__tests__/sliding-window.test.ts | 553 ------------------ src/core/sliding-window/index.ts | 100 ---- src/core/task-persistence/apiMessages.ts | 2 +- src/core/task/Task.ts | 50 +- 5 files changed, 142 insertions(+), 679 deletions(-) create mode 100644 src/core/condense/index.ts delete mode 100644 src/core/sliding-window/__tests__/sliding-window.test.ts delete mode 100644 src/core/sliding-window/index.ts diff --git a/src/core/condense/index.ts b/src/core/condense/index.ts new file mode 100644 index 0000000000..f194a20152 --- /dev/null +++ b/src/core/condense/index.ts @@ -0,0 +1,116 @@ +import { ApiHandler } from "../../api" +import { ApiMessage } from "../task-persistence/apiMessages" + +const CONTEXT_FRAC_FOR_SUMMARY = 0.5 // TODO(canyon): make this configurable + +const SUMMARY_PROMPT = `\ +Your task is to create a detailed summary of the conversation so far, paying close attention to the user's explicit requests and your previous actions. +This summary should be thorough in capturing technical details, code patterns, and architectural decisions that would be essential for continuing with the conversation and supporting any continuing tasks. + +Your summary should be structure as follows: +Context: The context to continue the conversation with. If applicable based on the current task, this should include: + 1. Previous Conversation: High level details about what was discussed throughout the entire conversation with the user. This should be written to allow someone to be able to follow the general overarching conversation flow. + 2. Current Work: Describe in detail what was being worked on prior to this request to summarize the conversation. Pay special attention to the more recent messages in the conversation. + 3. Key Technical Concepts: List all important technical concepts, technologies, coding conventions, and frameworks discussed, which might be relevant for continuing with this work. + 4. Relevant Files and Code: If applicable, enumerate specific files and code sections examined, modified, or created for the task continuation. Pay special attention to the most recent messages and changes. + 5. Problem Solving: Document problems solved thus far and any ongoing troubleshooting efforts. + 6. Pending Tasks and Next Steps: Outline all pending tasks that you have explicitly been asked to work on, as well as list the next steps you will take for all outstanding work, if applicable. Include code snippets where they add clarity. For any next steps, include direct quotes from the most recent conversation showing exactly what task you were working on and where you left off. This should be verbatim to ensure there's no information loss in context between tasks. + +Example summary structure: +1. Previous Conversation: + [Detailed description] +2. Current Work: + [Detailed description] +3. Key Technical Concepts: + - [Concept 1] + - [Concept 2] + - [...] +4. Relevant Files and Code: + - [File Name 1] + - [Summary of why this file is important] + - [Summary of the changes made to this file, if any] + - [Important Code Snippet] + - [File Name 2] + - [Important Code Snippet] + - [...] +5. Problem Solving: + [Detailed description] +6. Pending Tasks and Next Steps: + - [Task 1 details & next steps] + - [Task 2 details & next steps] + - [...] + +The conversation history which you should summarize is included below. Output only the summary, without any additional commentary or explanation. +### BEGIN CONVERSATION HISTORY +{messages} +### END CONVERSATION HISTORY +` + +/** + * Conditionally summarizes the conversation messages if the total token count + * exceeds a set fraction of the context window. + * + * @param {ApiMessage[]} messages - The conversation messages + * @param {number} totalTokens - The total number of tokens in the conversation, excluding the last user message. + * @param {number} contextWindow - The context window size. + * @param {ApiHandler} apiHandler - The API handler to use for token counting. + * @returns {ApiMessage[]} - The input messages, potentially including a new summary message before the last message. + */ +export async function summarizeConversationIfNeeded( + messages: ApiMessage[], + totalTokens: number, + contextWindow: number, + apiHandler: ApiHandler, +): Promise { + if (totalTokens < contextWindow * CONTEXT_FRAC_FOR_SUMMARY) { + return messages + } + return await summarizeConversation(messages, apiHandler) +} + +async function summarizeConversation(messages: ApiMessage[], apiHandler: ApiHandler): Promise { + if (messages.length < 2) { + return messages + } + const messagesToSummarize = messages.slice(0, -1) + const summaryPrompt = getSummaryPrompt(messagesToSummarize) + const stream = apiHandler.createMessage(summaryPrompt, []) + let summary = "" + for await (const chunk of stream) { + if (chunk.type === "text") { + summary += chunk.text + } + } + summary = summary.trim() + if (summary.length === 0) { + console.warn("Received empty summary from API") + return messages + } + const summaryMessage: ApiMessage = { + role: "assistant", + content: summary, + ts: Date.now(), + isSummary: true, + } + + return [...messagesToSummarize, summaryMessage, messages[messages.length - 1]] +} + +function getSummaryPrompt(messages: ApiMessage[]): string { + return SUMMARY_PROMPT.replace("{messages}", getMessageStr(messages)) +} + +function getMessageStr(messages: ApiMessage[]): string { + let messageStr = "" + for (const message of messages) { + if (message.role === "user") { + messageStr += "# User:" + } else if (message.role === "assistant") { + messageStr += "# Assistant:" + } else { + continue + } + messageStr += `\n${message.content}\n\n` + } + return messageStr +} diff --git a/src/core/sliding-window/__tests__/sliding-window.test.ts b/src/core/sliding-window/__tests__/sliding-window.test.ts deleted file mode 100644 index 16af2d4630..0000000000 --- a/src/core/sliding-window/__tests__/sliding-window.test.ts +++ /dev/null @@ -1,553 +0,0 @@ -// npx jest src/core/sliding-window/__tests__/sliding-window.test.ts - -import { Anthropic } from "@anthropic-ai/sdk" - -import { ModelInfo } from "../../../shared/api" -import { BaseProvider } from "../../../api/providers/base-provider" -import { - TOKEN_BUFFER_PERCENTAGE, - estimateTokenCount, - truncateConversation, - truncateConversationIfNeeded, -} from "../index" - -// Create a mock ApiHandler for testing -class MockApiHandler extends BaseProvider { - createMessage(): any { - throw new Error("Method not implemented.") - } - - getModel(): { id: string; info: ModelInfo } { - return { - id: "test-model", - info: { - contextWindow: 100000, - maxTokens: 50000, - supportsPromptCache: true, - supportsImages: false, - inputPrice: 0, - outputPrice: 0, - description: "Test model", - }, - } - } -} - -// Create a singleton instance for tests -const mockApiHandler = new MockApiHandler() - -/** - * Tests for the truncateConversation function - */ -describe("truncateConversation", () => { - it("should retain the first message", () => { - const messages: Anthropic.Messages.MessageParam[] = [ - { role: "user", content: "First message" }, - { role: "assistant", content: "Second message" }, - { role: "user", content: "Third message" }, - ] - - const result = truncateConversation(messages, 0.5) - - // With 2 messages after the first, 0.5 fraction means remove 1 message - // But 1 is odd, so it rounds down to 0 (to make it even) - expect(result.length).toBe(3) // First message + 2 remaining messages - expect(result[0]).toEqual(messages[0]) - expect(result[1]).toEqual(messages[1]) - expect(result[2]).toEqual(messages[2]) - }) - - it("should remove the specified fraction of messages (rounded to even number)", () => { - const messages: Anthropic.Messages.MessageParam[] = [ - { role: "user", content: "First message" }, - { role: "assistant", content: "Second message" }, - { role: "user", content: "Third message" }, - { role: "assistant", content: "Fourth message" }, - { role: "user", content: "Fifth message" }, - ] - - // 4 messages excluding first, 0.5 fraction = 2 messages to remove - // 2 is already even, so no rounding needed - const result = truncateConversation(messages, 0.5) - - expect(result.length).toBe(3) - expect(result[0]).toEqual(messages[0]) - expect(result[1]).toEqual(messages[3]) - expect(result[2]).toEqual(messages[4]) - }) - - it("should round to an even number of messages to remove", () => { - const messages: Anthropic.Messages.MessageParam[] = [ - { role: "user", content: "First message" }, - { role: "assistant", content: "Second message" }, - { role: "user", content: "Third message" }, - { role: "assistant", content: "Fourth message" }, - { role: "user", content: "Fifth message" }, - { role: "assistant", content: "Sixth message" }, - { role: "user", content: "Seventh message" }, - ] - - // 6 messages excluding first, 0.3 fraction = 1.8 messages to remove - // 1.8 rounds down to 1, then to 0 to make it even - const result = truncateConversation(messages, 0.3) - - expect(result.length).toBe(7) // No messages removed - expect(result).toEqual(messages) - }) - - it("should handle edge case with fracToRemove = 0", () => { - const messages: Anthropic.Messages.MessageParam[] = [ - { role: "user", content: "First message" }, - { role: "assistant", content: "Second message" }, - { role: "user", content: "Third message" }, - ] - - const result = truncateConversation(messages, 0) - - expect(result).toEqual(messages) - }) - - it("should handle edge case with fracToRemove = 1", () => { - const messages: Anthropic.Messages.MessageParam[] = [ - { role: "user", content: "First message" }, - { role: "assistant", content: "Second message" }, - { role: "user", content: "Third message" }, - { role: "assistant", content: "Fourth message" }, - ] - - // 3 messages excluding first, 1.0 fraction = 3 messages to remove - // But 3 is odd, so it rounds down to 2 to make it even - const result = truncateConversation(messages, 1) - - expect(result.length).toBe(2) - expect(result[0]).toEqual(messages[0]) - expect(result[1]).toEqual(messages[3]) - }) -}) - -/** - * Tests for the estimateTokenCount function - */ -describe("estimateTokenCount", () => { - it("should return 0 for empty or undefined content", async () => { - expect(await estimateTokenCount([], mockApiHandler)).toBe(0) - // @ts-ignore - Testing with undefined - expect(await estimateTokenCount(undefined, mockApiHandler)).toBe(0) - }) - - it("should estimate tokens for text blocks", async () => { - const content: Array = [ - { type: "text", text: "This is a text block with 36 characters" }, - ] - - // With tiktoken, the exact token count may differ from character-based estimation - // Instead of expecting an exact number, we verify it's a reasonable positive number - const result = await estimateTokenCount(content, mockApiHandler) - expect(result).toBeGreaterThan(0) - - // We can also verify that longer text results in more tokens - const longerContent: Array = [ - { - type: "text", - text: "This is a longer text block with significantly more characters to encode into tokens", - }, - ] - const longerResult = await estimateTokenCount(longerContent, mockApiHandler) - expect(longerResult).toBeGreaterThan(result) - }) - - it("should estimate tokens for image blocks based on data size", async () => { - // Small image - const smallImage: Array = [ - { type: "image", source: { type: "base64", media_type: "image/jpeg", data: "small_dummy_data" } }, - ] - // Larger image with more data - const largerImage: Array = [ - { type: "image", source: { type: "base64", media_type: "image/png", data: "X".repeat(1000) } }, - ] - - // Verify the token count scales with the size of the image data - const smallImageTokens = await estimateTokenCount(smallImage, mockApiHandler) - const largerImageTokens = await estimateTokenCount(largerImage, mockApiHandler) - - // Small image should have some tokens - expect(smallImageTokens).toBeGreaterThan(0) - - // Larger image should have proportionally more tokens - expect(largerImageTokens).toBeGreaterThan(smallImageTokens) - - // Verify the larger image calculation matches our formula including the 50% fudge factor - expect(largerImageTokens).toBe(48) - }) - - it("should estimate tokens for mixed content blocks", async () => { - const content: Array = [ - { type: "text", text: "A text block with 30 characters" }, - { type: "image", source: { type: "base64", media_type: "image/jpeg", data: "dummy_data" } }, - { type: "text", text: "Another text with 24 chars" }, - ] - - // We know image tokens calculation should be consistent - const imageTokens = Math.ceil(Math.sqrt("dummy_data".length)) * 1.5 - - // With tiktoken, we can't predict exact text token counts, - // but we can verify the total is greater than just the image tokens - const result = await estimateTokenCount(content, mockApiHandler) - expect(result).toBeGreaterThan(imageTokens) - - // Also test against a version with only the image to verify text adds tokens - const imageOnlyContent: Array = [ - { type: "image", source: { type: "base64", media_type: "image/jpeg", data: "dummy_data" } }, - ] - const imageOnlyResult = await estimateTokenCount(imageOnlyContent, mockApiHandler) - expect(result).toBeGreaterThan(imageOnlyResult) - }) - - it("should handle empty text blocks", async () => { - const content: Array = [{ type: "text", text: "" }] - expect(await estimateTokenCount(content, mockApiHandler)).toBe(0) - }) - - it("should handle plain string messages", async () => { - const content = "This is a plain text message" - expect(await estimateTokenCount([{ type: "text", text: content }], mockApiHandler)).toBeGreaterThan(0) - }) -}) - -/** - * Tests for the truncateConversationIfNeeded function - */ -describe("truncateConversationIfNeeded", () => { - const createModelInfo = (contextWindow: number, maxTokens?: number): ModelInfo => ({ - contextWindow, - supportsPromptCache: true, - maxTokens, - }) - - const messages: Anthropic.Messages.MessageParam[] = [ - { role: "user", content: "First message" }, - { role: "assistant", content: "Second message" }, - { role: "user", content: "Third message" }, - { role: "assistant", content: "Fourth message" }, - { role: "user", content: "Fifth message" }, - ] - - it("should not truncate if tokens are below max tokens threshold", async () => { - const modelInfo = createModelInfo(100000, 30000) - const dynamicBuffer = modelInfo.contextWindow * TOKEN_BUFFER_PERCENTAGE // 10000 - const totalTokens = 70000 - dynamicBuffer - 1 // Just below threshold - buffer - - // Create messages with very small content in the last one to avoid token overflow - const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }] - - const result = await truncateConversationIfNeeded({ - messages: messagesWithSmallContent, - totalTokens, - contextWindow: modelInfo.contextWindow, - maxTokens: modelInfo.maxTokens, - apiHandler: mockApiHandler, - }) - expect(result).toEqual(messagesWithSmallContent) // No truncation occurs - }) - - it("should truncate if tokens are above max tokens threshold", async () => { - const modelInfo = createModelInfo(100000, 30000) - const totalTokens = 70001 // Above threshold - - // Create messages with very small content in the last one to avoid token overflow - const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }] - - // When truncating, always uses 0.5 fraction - // With 4 messages after the first, 0.5 fraction means remove 2 messages - const expectedResult = [messagesWithSmallContent[0], messagesWithSmallContent[3], messagesWithSmallContent[4]] - - const result = await truncateConversationIfNeeded({ - messages: messagesWithSmallContent, - totalTokens, - contextWindow: modelInfo.contextWindow, - maxTokens: modelInfo.maxTokens, - apiHandler: mockApiHandler, - }) - expect(result).toEqual(expectedResult) - }) - - it("should work with non-prompt caching models the same as prompt caching models", async () => { - // The implementation no longer differentiates between prompt caching and non-prompt caching models - const modelInfo1 = createModelInfo(100000, 30000) - const modelInfo2 = createModelInfo(100000, 30000) - - // Create messages with very small content in the last one to avoid token overflow - const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }] - - // Test below threshold - const belowThreshold = 69999 - const result1 = await truncateConversationIfNeeded({ - messages: messagesWithSmallContent, - totalTokens: belowThreshold, - contextWindow: modelInfo1.contextWindow, - maxTokens: modelInfo1.maxTokens, - apiHandler: mockApiHandler, - }) - - const result2 = await truncateConversationIfNeeded({ - messages: messagesWithSmallContent, - totalTokens: belowThreshold, - contextWindow: modelInfo2.contextWindow, - maxTokens: modelInfo2.maxTokens, - apiHandler: mockApiHandler, - }) - - expect(result1).toEqual(result2) - - // Test above threshold - const aboveThreshold = 70001 - const result3 = await truncateConversationIfNeeded({ - messages: messagesWithSmallContent, - totalTokens: aboveThreshold, - contextWindow: modelInfo1.contextWindow, - maxTokens: modelInfo1.maxTokens, - apiHandler: mockApiHandler, - }) - - const result4 = await truncateConversationIfNeeded({ - messages: messagesWithSmallContent, - totalTokens: aboveThreshold, - contextWindow: modelInfo2.contextWindow, - maxTokens: modelInfo2.maxTokens, - apiHandler: mockApiHandler, - }) - - expect(result3).toEqual(result4) - }) - - it("should consider incoming content when deciding to truncate", async () => { - const modelInfo = createModelInfo(100000, 30000) - const maxTokens = 30000 - const availableTokens = modelInfo.contextWindow - maxTokens - - // Test case 1: Small content that won't push us over the threshold - const smallContent = [{ type: "text" as const, text: "Small content" }] - const smallContentTokens = await estimateTokenCount(smallContent, mockApiHandler) - const messagesWithSmallContent: Anthropic.Messages.MessageParam[] = [ - ...messages.slice(0, -1), - { role: messages[messages.length - 1].role, content: smallContent }, - ] - - // Set base tokens so total is well below threshold + buffer even with small content added - const dynamicBuffer = modelInfo.contextWindow * TOKEN_BUFFER_PERCENTAGE - const baseTokensForSmall = availableTokens - smallContentTokens - dynamicBuffer - 10 - const resultWithSmall = await truncateConversationIfNeeded({ - messages: messagesWithSmallContent, - totalTokens: baseTokensForSmall, - contextWindow: modelInfo.contextWindow, - maxTokens, - apiHandler: mockApiHandler, - }) - expect(resultWithSmall).toEqual(messagesWithSmallContent) // No truncation - - // Test case 2: Large content that will push us over the threshold - const largeContent = [ - { - type: "text" as const, - text: "A very large incoming message that would consume a significant number of tokens and push us over the threshold", - }, - ] - const largeContentTokens = await estimateTokenCount(largeContent, mockApiHandler) - const messagesWithLargeContent: Anthropic.Messages.MessageParam[] = [ - ...messages.slice(0, -1), - { role: messages[messages.length - 1].role, content: largeContent }, - ] - - // Set base tokens so we're just below threshold without content, but over with content - const baseTokensForLarge = availableTokens - Math.floor(largeContentTokens / 2) - const resultWithLarge = await truncateConversationIfNeeded({ - messages: messagesWithLargeContent, - totalTokens: baseTokensForLarge, - contextWindow: modelInfo.contextWindow, - maxTokens, - apiHandler: mockApiHandler, - }) - expect(resultWithLarge).not.toEqual(messagesWithLargeContent) // Should truncate - - // Test case 3: Very large content that will definitely exceed threshold - const veryLargeContent = [{ type: "text" as const, text: "X".repeat(1000) }] - const veryLargeContentTokens = await estimateTokenCount(veryLargeContent, mockApiHandler) - const messagesWithVeryLargeContent: Anthropic.Messages.MessageParam[] = [ - ...messages.slice(0, -1), - { role: messages[messages.length - 1].role, content: veryLargeContent }, - ] - - // Set base tokens so we're just below threshold without content - const baseTokensForVeryLarge = availableTokens - Math.floor(veryLargeContentTokens / 2) - const resultWithVeryLarge = await truncateConversationIfNeeded({ - messages: messagesWithVeryLargeContent, - totalTokens: baseTokensForVeryLarge, - contextWindow: modelInfo.contextWindow, - maxTokens, - apiHandler: mockApiHandler, - }) - expect(resultWithVeryLarge).not.toEqual(messagesWithVeryLargeContent) // Should truncate - }) - - it("should truncate if tokens are within TOKEN_BUFFER_PERCENTAGE of the threshold", async () => { - const modelInfo = createModelInfo(100000, 30000) - const dynamicBuffer = modelInfo.contextWindow * TOKEN_BUFFER_PERCENTAGE // 10% of 100000 = 10000 - const totalTokens = 70000 - dynamicBuffer + 1 // Just within the dynamic buffer of threshold (70000) - - // Create messages with very small content in the last one to avoid token overflow - const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }] - - // When truncating, always uses 0.5 fraction - // With 4 messages after the first, 0.5 fraction means remove 2 messages - const expectedResult = [messagesWithSmallContent[0], messagesWithSmallContent[3], messagesWithSmallContent[4]] - - const result = await truncateConversationIfNeeded({ - messages: messagesWithSmallContent, - totalTokens, - contextWindow: modelInfo.contextWindow, - maxTokens: modelInfo.maxTokens, - apiHandler: mockApiHandler, - }) - expect(result).toEqual(expectedResult) - }) -}) - -/** - * Tests for the getMaxTokens function (private but tested through truncateConversationIfNeeded) - */ -describe("getMaxTokens", () => { - // We'll test this indirectly through truncateConversationIfNeeded - const createModelInfo = (contextWindow: number, maxTokens?: number): ModelInfo => ({ - contextWindow, - supportsPromptCache: true, // Not relevant for getMaxTokens - maxTokens, - }) - - // Reuse across tests for consistency - const messages: Anthropic.Messages.MessageParam[] = [ - { role: "user", content: "First message" }, - { role: "assistant", content: "Second message" }, - { role: "user", content: "Third message" }, - { role: "assistant", content: "Fourth message" }, - { role: "user", content: "Fifth message" }, - ] - - it("should use maxTokens as buffer when specified", async () => { - const modelInfo = createModelInfo(100000, 50000) - // Max tokens = 100000 - 50000 = 50000 - - // Create messages with very small content in the last one to avoid token overflow - const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }] - - // Account for the dynamic buffer which is 10% of context window (10,000 tokens) - // Below max tokens and buffer - no truncation - const result1 = await truncateConversationIfNeeded({ - messages: messagesWithSmallContent, - totalTokens: 39999, // Well below threshold + dynamic buffer - contextWindow: modelInfo.contextWindow, - maxTokens: modelInfo.maxTokens, - apiHandler: mockApiHandler, - }) - expect(result1).toEqual(messagesWithSmallContent) - - // Above max tokens - truncate - const result2 = await truncateConversationIfNeeded({ - messages: messagesWithSmallContent, - totalTokens: 50001, // Above threshold - contextWindow: modelInfo.contextWindow, - maxTokens: modelInfo.maxTokens, - apiHandler: mockApiHandler, - }) - expect(result2).not.toEqual(messagesWithSmallContent) - expect(result2.length).toBe(3) // Truncated with 0.5 fraction - }) - - it("should use 20% of context window as buffer when maxTokens is undefined", async () => { - const modelInfo = createModelInfo(100000, undefined) - // Max tokens = 100000 - (100000 * 0.2) = 80000 - - // Create messages with very small content in the last one to avoid token overflow - const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }] - - // Account for the dynamic buffer which is 10% of context window (10,000 tokens) - // Below max tokens and buffer - no truncation - const result1 = await truncateConversationIfNeeded({ - messages: messagesWithSmallContent, - totalTokens: 69999, // Well below threshold + dynamic buffer - contextWindow: modelInfo.contextWindow, - maxTokens: modelInfo.maxTokens, - apiHandler: mockApiHandler, - }) - expect(result1).toEqual(messagesWithSmallContent) - - // Above max tokens - truncate - const result2 = await truncateConversationIfNeeded({ - messages: messagesWithSmallContent, - totalTokens: 80001, // Above threshold - contextWindow: modelInfo.contextWindow, - maxTokens: modelInfo.maxTokens, - apiHandler: mockApiHandler, - }) - expect(result2).not.toEqual(messagesWithSmallContent) - expect(result2.length).toBe(3) // Truncated with 0.5 fraction - }) - - it("should handle small context windows appropriately", async () => { - const modelInfo = createModelInfo(50000, 10000) - // Max tokens = 50000 - 10000 = 40000 - - // Create messages with very small content in the last one to avoid token overflow - const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }] - - // Below max tokens and buffer - no truncation - const result1 = await truncateConversationIfNeeded({ - messages: messagesWithSmallContent, - totalTokens: 34999, // Well below threshold + buffer - contextWindow: modelInfo.contextWindow, - maxTokens: modelInfo.maxTokens, - apiHandler: mockApiHandler, - }) - expect(result1).toEqual(messagesWithSmallContent) - - // Above max tokens - truncate - const result2 = await truncateConversationIfNeeded({ - messages: messagesWithSmallContent, - totalTokens: 40001, // Above threshold - contextWindow: modelInfo.contextWindow, - maxTokens: modelInfo.maxTokens, - apiHandler: mockApiHandler, - }) - expect(result2).not.toEqual(messagesWithSmallContent) - expect(result2.length).toBe(3) // Truncated with 0.5 fraction - }) - - it("should handle large context windows appropriately", async () => { - const modelInfo = createModelInfo(200000, 30000) - // Max tokens = 200000 - 30000 = 170000 - - // Create messages with very small content in the last one to avoid token overflow - const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }] - - // Account for the dynamic buffer which is 10% of context window (20,000 tokens for this test) - // Below max tokens and buffer - no truncation - const result1 = await truncateConversationIfNeeded({ - messages: messagesWithSmallContent, - totalTokens: 149999, // Well below threshold + dynamic buffer - contextWindow: modelInfo.contextWindow, - maxTokens: modelInfo.maxTokens, - apiHandler: mockApiHandler, - }) - expect(result1).toEqual(messagesWithSmallContent) - - // Above max tokens - truncate - const result2 = await truncateConversationIfNeeded({ - messages: messagesWithSmallContent, - totalTokens: 170001, // Above threshold - contextWindow: modelInfo.contextWindow, - maxTokens: modelInfo.maxTokens, - apiHandler: mockApiHandler, - }) - expect(result2).not.toEqual(messagesWithSmallContent) - expect(result2.length).toBe(3) // Truncated with 0.5 fraction - }) -}) diff --git a/src/core/sliding-window/index.ts b/src/core/sliding-window/index.ts deleted file mode 100644 index 75395ecd75..0000000000 --- a/src/core/sliding-window/index.ts +++ /dev/null @@ -1,100 +0,0 @@ -import { Anthropic } from "@anthropic-ai/sdk" -import { ApiHandler } from "../../api" - -/** - * Default percentage of the context window to use as a buffer when deciding when to truncate - */ -export const TOKEN_BUFFER_PERCENTAGE = 0.1 - -/** - * Counts tokens for user content using the provider's token counting implementation. - * - * @param {Array} content - The content to count tokens for - * @param {ApiHandler} apiHandler - The API handler to use for token counting - * @returns {Promise} A promise resolving to the token count - */ -export async function estimateTokenCount( - content: Array, - apiHandler: ApiHandler, -): Promise { - if (!content || content.length === 0) return 0 - return apiHandler.countTokens(content) -} - -/** - * Truncates a conversation by removing a fraction of the messages. - * - * The first message is always retained, and a specified fraction (rounded to an even number) - * of messages from the beginning (excluding the first) is removed. - * - * @param {Anthropic.Messages.MessageParam[]} messages - The conversation messages. - * @param {number} fracToRemove - The fraction (between 0 and 1) of messages (excluding the first) to remove. - * @returns {Anthropic.Messages.MessageParam[]} The truncated conversation messages. - */ -export function truncateConversation( - messages: Anthropic.Messages.MessageParam[], - fracToRemove: number, -): Anthropic.Messages.MessageParam[] { - const truncatedMessages = [messages[0]] - const rawMessagesToRemove = Math.floor((messages.length - 1) * fracToRemove) - const messagesToRemove = rawMessagesToRemove - (rawMessagesToRemove % 2) - const remainingMessages = messages.slice(messagesToRemove + 1) - truncatedMessages.push(...remainingMessages) - - return truncatedMessages -} - -/** - * Conditionally truncates the conversation messages if the total token count - * exceeds the model's limit, considering the size of incoming content. - * - * @param {Anthropic.Messages.MessageParam[]} messages - The conversation messages. - * @param {number} totalTokens - The total number of tokens in the conversation (excluding the last user message). - * @param {number} contextWindow - The context window size. - * @param {number} maxTokens - The maximum number of tokens allowed. - * @param {ApiHandler} apiHandler - The API handler to use for token counting. - * @returns {Anthropic.Messages.MessageParam[]} The original or truncated conversation messages. - */ - -type TruncateOptions = { - messages: Anthropic.Messages.MessageParam[] - totalTokens: number - contextWindow: number - maxTokens?: number | null - apiHandler: ApiHandler -} - -/** - * Conditionally truncates the conversation messages if the total token count - * exceeds the model's limit, considering the size of incoming content. - * - * @param {TruncateOptions} options - The options for truncation - * @returns {Promise} The original or truncated conversation messages. - */ -export async function truncateConversationIfNeeded({ - messages, - totalTokens, - contextWindow, - maxTokens, - apiHandler, -}: TruncateOptions): Promise { - // Calculate the maximum tokens reserved for response - const reservedTokens = maxTokens || contextWindow * 0.2 - - // Estimate tokens for the last message (which is always a user message) - const lastMessage = messages[messages.length - 1] - const lastMessageContent = lastMessage.content - const lastMessageTokens = Array.isArray(lastMessageContent) - ? await estimateTokenCount(lastMessageContent, apiHandler) - : await estimateTokenCount([{ type: "text", text: lastMessageContent as string }], apiHandler) - - // Calculate total effective tokens (totalTokens never includes the last message) - const effectiveTokens = totalTokens + lastMessageTokens - - // Calculate available tokens for conversation history - // Truncate if we're within TOKEN_BUFFER_PERCENTAGE of the context window - const allowedTokens = contextWindow * (1 - TOKEN_BUFFER_PERCENTAGE) - reservedTokens - - // Determine if truncation is needed and apply if necessary - return effectiveTokens > allowedTokens ? truncateConversation(messages, 0.5) : messages -} diff --git a/src/core/task-persistence/apiMessages.ts b/src/core/task-persistence/apiMessages.ts index b361016345..6ac36ed08f 100644 --- a/src/core/task-persistence/apiMessages.ts +++ b/src/core/task-persistence/apiMessages.ts @@ -8,7 +8,7 @@ import { fileExistsAtPath } from "../../utils/fs" import { GlobalFileNames } from "../../shared/globalFileNames" import { getTaskDirectoryPath } from "../../shared/storagePathManager" -export type ApiMessage = Anthropic.MessageParam & { ts?: number } +export type ApiMessage = Anthropic.MessageParam & { ts?: number; isSummary?: boolean } export async function readApiMessages({ taskId, diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 2d2180e391..b6025d1842 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -65,7 +65,6 @@ import { parseAssistantMessageV2 as parseAssistantMessage, presentAssistantMessage, } from "../assistant-message" -import { truncateConversationIfNeeded } from "../sliding-window" import { ClineProvider } from "../webview/ClineProvider" import { MultiSearchReplaceDiffStrategy } from "../diff/strategies/multi-search-replace" import { readApiMessages, saveApiMessages, readTaskMessages, saveTaskMessages, taskMetadata } from "../task-persistence" @@ -79,6 +78,8 @@ import { checkpointDiff, } from "../checkpoints" import { processUserContentMentions } from "../mentions/processUserContentMentions" +import { ApiMessage } from "../task-persistence/apiMessages" +import { summarizeConversationIfNeeded } from "../condense" export type ClineEvents = { message: [{ action: "created" | "updated"; message: ClineMessage }] @@ -155,7 +156,7 @@ export class Task extends EventEmitter { didEditFile: boolean = false // LLM Messages & Chat Messages - apiConversationHistory: (Anthropic.MessageParam & { ts?: number })[] = [] + apiConversationHistory: ApiMessage[] = [] clineMessages: ClineMessage[] = [] // Ask @@ -284,7 +285,7 @@ export class Task extends EventEmitter { // API Messages - private async getSavedApiConversationHistory(): Promise { + private async getSavedApiConversationHistory(): Promise { return readApiMessages({ taskId: this.taskId, globalStoragePath: this.globalStoragePath }) } @@ -294,7 +295,7 @@ export class Task extends EventEmitter { await this.saveApiConversationHistory() } - async overwriteApiConversationHistory(newHistory: Anthropic.MessageParam[]) { + async overwriteApiConversationHistory(newHistory: ApiMessage[]) { this.apiConversationHistory = newHistory await this.saveApiConversationHistory() } @@ -697,8 +698,7 @@ export class Task extends EventEmitter { // Make sure that the api conversation history can be resumed by the API, // even if it goes out of sync with cline messages. - let existingApiConversationHistory: Anthropic.Messages.MessageParam[] = - await this.getSavedApiConversationHistory() + let existingApiConversationHistory: ApiMessage[] = await this.getSavedApiConversationHistory() // v2.0 xml tags refactor caveat: since we don't use tools anymore, we need to replace all tool use blocks with a text block since the API disallows conversations with tool uses and no tool schema const conversationWithoutToolBlocks = existingApiConversationHistory.map((message) => { @@ -742,7 +742,7 @@ export class Task extends EventEmitter { // if the last message is a user message, we can need to get the assistant message before it to see if it made tool calls, and if so, fill in the remaining tool responses with 'interrupted' let modifiedOldUserContent: Anthropic.Messages.ContentBlockParam[] // either the last message if its user message, or the user message before the last (assistant) message - let modifiedApiConversationHistory: Anthropic.Messages.MessageParam[] // need to remove the last user message to replace with new modified user message + let modifiedApiConversationHistory: ApiMessage[] // need to remove the last user message to replace with new modified user message if (existingApiConversationHistory.length > 0) { const lastMessage = existingApiConversationHistory[existingApiConversationHistory.length - 1] @@ -768,7 +768,7 @@ export class Task extends EventEmitter { modifiedOldUserContent = [] } } else if (lastMessage.role === "user") { - const previousAssistantMessage: Anthropic.Messages.MessageParam | undefined = + const previousAssistantMessage: ApiMessage | undefined = existingApiConversationHistory[existingApiConversationHistory.length - 2] const existingUserContent: Anthropic.Messages.ContentBlockParam[] = Array.isArray(lastMessage.content) @@ -1468,35 +1468,35 @@ export class Task extends EventEmitter { const totalTokens = tokensIn + tokensOut + cacheWrites + cacheReads - // Default max tokens value for thinking models when no specific - // value is set. - const DEFAULT_THINKING_MODEL_MAX_TOKENS = 16_384 - const modelInfo = this.api.getModel().info - - const maxTokens = modelInfo.thinking - ? this.apiConfiguration.modelMaxTokens || DEFAULT_THINKING_MODEL_MAX_TOKENS - : modelInfo.maxTokens - const contextWindow = modelInfo.contextWindow - const trimmedMessages = await truncateConversationIfNeeded({ - messages: this.apiConversationHistory, + const messagesWithSummary = await summarizeConversationIfNeeded( + this.apiConversationHistory, totalTokens, - maxTokens, contextWindow, - apiHandler: this.api, - }) + this.api, + ) - if (trimmedMessages !== this.apiConversationHistory) { - await this.overwriteApiConversationHistory(trimmedMessages) + if (messagesWithSummary !== this.apiConversationHistory) { + await this.overwriteApiConversationHistory(messagesWithSummary) } } + // Only include messages since the last summary message. + let messagesSinceLastSummary = this.apiConversationHistory + let lastSummaryIndexReverse = [...this.apiConversationHistory] + .reverse() + .findIndex((message) => message.isSummary) + if (lastSummaryIndexReverse !== -1) { + const lastSummaryIndex = this.apiConversationHistory.length - lastSummaryIndexReverse - 1 + messagesSinceLastSummary = this.apiConversationHistory.slice(lastSummaryIndex) + } + // Clean conversation history by: // 1. Converting to Anthropic.MessageParam by spreading only the API-required properties. // 2. Converting image blocks to text descriptions if model doesn't support images. - const cleanConversationHistory = this.apiConversationHistory.map(({ role, content }) => { + const cleanConversationHistory = messagesSinceLastSummary.map(({ role, content }) => { // Handle array content (could contain image blocks). if (Array.isArray(content)) { if (!this.api.getModel().info.supportsImages) { From 8d6b71fdcc59795b4e1787aab8b165f921ce5728 Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Tue, 13 May 2025 17:03:58 -0700 Subject: [PATCH 02/25] use actual messages rather than injecting them in the prompt --- src/core/condense/index.ts | 42 +++++++++++++------------------------- 1 file changed, 14 insertions(+), 28 deletions(-) diff --git a/src/core/condense/index.ts b/src/core/condense/index.ts index f194a20152..2bd2df34b5 100644 --- a/src/core/condense/index.ts +++ b/src/core/condense/index.ts @@ -1,3 +1,4 @@ +import Anthropic from "@anthropic-ai/sdk" import { ApiHandler } from "../../api" import { ApiMessage } from "../task-persistence/apiMessages" @@ -40,10 +41,7 @@ Example summary structure: - [Task 2 details & next steps] - [...] -The conversation history which you should summarize is included below. Output only the summary, without any additional commentary or explanation. -### BEGIN CONVERSATION HISTORY -{messages} -### END CONVERSATION HISTORY +Output only the summary of the conversation so far, without any additional commentary or explanation. ` /** @@ -69,12 +67,19 @@ export async function summarizeConversationIfNeeded( } async function summarizeConversation(messages: ApiMessage[], apiHandler: ApiHandler): Promise { - if (messages.length < 2) { + if (messages.length <= 2) { return messages } - const messagesToSummarize = messages.slice(0, -1) - const summaryPrompt = getSummaryPrompt(messagesToSummarize) - const stream = apiHandler.createMessage(summaryPrompt, []) + if (messages[messages.length - 2].isSummary || messages[messages.length - 1].isSummary) { + return messages + } + const finalRequestMessage: Anthropic.MessageParam = { + role: "user", + content: "Summarize the conversation so far, as described in the prompt instructions.", + } + const messagesToSummarize = [...messages.slice(0, -1), finalRequestMessage] + + const stream = apiHandler.createMessage(SUMMARY_PROMPT, messagesToSummarize) let summary = "" for await (const chunk of stream) { if (chunk.type === "text") { @@ -93,24 +98,5 @@ async function summarizeConversation(messages: ApiMessage[], apiHandler: ApiHand isSummary: true, } - return [...messagesToSummarize, summaryMessage, messages[messages.length - 1]] -} - -function getSummaryPrompt(messages: ApiMessage[]): string { - return SUMMARY_PROMPT.replace("{messages}", getMessageStr(messages)) -} - -function getMessageStr(messages: ApiMessage[]): string { - let messageStr = "" - for (const message of messages) { - if (message.role === "user") { - messageStr += "# User:" - } else if (message.role === "assistant") { - messageStr += "# Assistant:" - } else { - continue - } - messageStr += `\n${message.content}\n\n` - } - return messageStr + return [...messages.slice(0, -1), summaryMessage, messages[messages.length - 1]] } From f7a9cd1428a5830b7c2d2c5ad1e4278b9c5a398b Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Tue, 13 May 2025 18:09:21 -0700 Subject: [PATCH 03/25] keep multiple messages --- src/core/condense/index.ts | 29 +++++++++++++++++++++-------- src/core/task/Task.ts | 12 ++---------- 2 files changed, 23 insertions(+), 18 deletions(-) diff --git a/src/core/condense/index.ts b/src/core/condense/index.ts index 2bd2df34b5..c9c9e0b520 100644 --- a/src/core/condense/index.ts +++ b/src/core/condense/index.ts @@ -3,6 +3,7 @@ import { ApiHandler } from "../../api" import { ApiMessage } from "../task-persistence/apiMessages" const CONTEXT_FRAC_FOR_SUMMARY = 0.5 // TODO(canyon): make this configurable +const N_MESSAGES_TO_KEEP = 3 const SUMMARY_PROMPT = `\ Your task is to create a detailed summary of the conversation so far, paying close attention to the user's explicit requests and your previous actions. @@ -67,19 +68,21 @@ export async function summarizeConversationIfNeeded( } async function summarizeConversation(messages: ApiMessage[], apiHandler: ApiHandler): Promise { - if (messages.length <= 2) { - return messages + const messagesToSummarize = getMessagesSinceLastSummary(messages.slice(0, -N_MESSAGES_TO_KEEP)) + if (messagesToSummarize.length <= 1) { + return messages // Not enough messages to warrant a summary } - if (messages[messages.length - 2].isSummary || messages[messages.length - 1].isSummary) { - return messages + const keepMessages = messages.slice(-N_MESSAGES_TO_KEEP) + for (const message of keepMessages) { + if (message.isSummary) { + return messages // We recently summarized these messages; it's too soon to summarize again. + } } const finalRequestMessage: Anthropic.MessageParam = { role: "user", content: "Summarize the conversation so far, as described in the prompt instructions.", } - const messagesToSummarize = [...messages.slice(0, -1), finalRequestMessage] - - const stream = apiHandler.createMessage(SUMMARY_PROMPT, messagesToSummarize) + const stream = apiHandler.createMessage(SUMMARY_PROMPT, [...messagesToSummarize, finalRequestMessage]) let summary = "" for await (const chunk of stream) { if (chunk.type === "text") { @@ -98,5 +101,15 @@ async function summarizeConversation(messages: ApiMessage[], apiHandler: ApiHand isSummary: true, } - return [...messages.slice(0, -1), summaryMessage, messages[messages.length - 1]] + return [...messages.slice(0, -N_MESSAGES_TO_KEEP), summaryMessage, ...keepMessages] +} + +/* Returns the list of all messages since the last summary message, including the summary. Returns all messages if there is no summary. */ +export function getMessagesSinceLastSummary(messages: ApiMessage[]): ApiMessage[] { + let lastSummaryIndexReverse = [...messages].reverse().findIndex((message) => message.isSummary) + if (lastSummaryIndexReverse === -1) { + return messages + } + const lastSummaryIndex = messages.length - lastSummaryIndexReverse - 1 + return messages.slice(lastSummaryIndex) } diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index b6025d1842..5fa38753bf 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -79,7 +79,7 @@ import { } from "../checkpoints" import { processUserContentMentions } from "../mentions/processUserContentMentions" import { ApiMessage } from "../task-persistence/apiMessages" -import { summarizeConversationIfNeeded } from "../condense" +import { getMessagesSinceLastSummary, summarizeConversationIfNeeded } from "../condense" export type ClineEvents = { message: [{ action: "created" | "updated"; message: ClineMessage }] @@ -1483,15 +1483,7 @@ export class Task extends EventEmitter { } } - // Only include messages since the last summary message. - let messagesSinceLastSummary = this.apiConversationHistory - let lastSummaryIndexReverse = [...this.apiConversationHistory] - .reverse() - .findIndex((message) => message.isSummary) - if (lastSummaryIndexReverse !== -1) { - const lastSummaryIndex = this.apiConversationHistory.length - lastSummaryIndexReverse - 1 - messagesSinceLastSummary = this.apiConversationHistory.slice(lastSummaryIndex) - } + const messagesSinceLastSummary = getMessagesSinceLastSummary(this.apiConversationHistory) // Clean conversation history by: // 1. Converting to Anthropic.MessageParam by spreading only the API-required properties. From e43c50be042dd5d10bd0f8a5dece030c72bbba73 Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Tue, 13 May 2025 18:28:05 -0700 Subject: [PATCH 04/25] add image handling code --- src/api/transform/image-cleaning.ts | 28 +++++++++++++++++++++++++++ src/core/condense/index.ts | 6 +++++- src/core/task/Task.ts | 30 ++++------------------------- 3 files changed, 37 insertions(+), 27 deletions(-) create mode 100644 src/api/transform/image-cleaning.ts diff --git a/src/api/transform/image-cleaning.ts b/src/api/transform/image-cleaning.ts new file mode 100644 index 0000000000..e5987bb59e --- /dev/null +++ b/src/api/transform/image-cleaning.ts @@ -0,0 +1,28 @@ +import { ApiHandler } from ".." +import { ApiMessage } from "../../core/task-persistence/apiMessages" + +/* Removes image blocks from messages if they are not supported by the Api Handler */ +export function maybeRemoveImageBlocks(messages: ApiMessage[], apiHandler: ApiHandler): ApiMessage[] { + return messages.map((message) => { + // Handle array content (could contain image blocks). + let { content } = message + if (Array.isArray(content)) { + if (!apiHandler.getModel().info.supportsImages) { + // Convert image blocks to text descriptions. + content = content.map((block) => { + if (block.type === "image") { + // Convert image blocks to text descriptions. + // Note: We can't access the actual image content/url due to API limitations, + // but we can indicate that an image was present in the conversation. + return { + type: "text", + text: "[Referenced image in conversation]", + } + } + return block + }) + } + } + return { ...message, content } + }) +} diff --git a/src/core/condense/index.ts b/src/core/condense/index.ts index c9c9e0b520..8c4cf91d18 100644 --- a/src/core/condense/index.ts +++ b/src/core/condense/index.ts @@ -1,6 +1,7 @@ import Anthropic from "@anthropic-ai/sdk" import { ApiHandler } from "../../api" import { ApiMessage } from "../task-persistence/apiMessages" +import { maybeRemoveImageBlocks } from "../../api/transform/image-cleaning" const CONTEXT_FRAC_FOR_SUMMARY = 0.5 // TODO(canyon): make this configurable const N_MESSAGES_TO_KEEP = 3 @@ -82,7 +83,10 @@ async function summarizeConversation(messages: ApiMessage[], apiHandler: ApiHand role: "user", content: "Summarize the conversation so far, as described in the prompt instructions.", } - const stream = apiHandler.createMessage(SUMMARY_PROMPT, [...messagesToSummarize, finalRequestMessage]) + const requestMessages = maybeRemoveImageBlocks([...messagesToSummarize, finalRequestMessage], apiHandler).map( + ({ role, content }) => ({ role, content }), + ) + const stream = apiHandler.createMessage(SUMMARY_PROMPT, requestMessages) let summary = "" for await (const chunk of stream) { if (chunk.type === "text") { diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 5fa38753bf..6035e0745b 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -80,6 +80,7 @@ import { import { processUserContentMentions } from "../mentions/processUserContentMentions" import { ApiMessage } from "../task-persistence/apiMessages" import { getMessagesSinceLastSummary, summarizeConversationIfNeeded } from "../condense" +import { maybeRemoveImageBlocks } from "../../api/transform/image-cleaning" export type ClineEvents = { message: [{ action: "created" | "updated"; message: ClineMessage }] @@ -1484,32 +1485,9 @@ export class Task extends EventEmitter { } const messagesSinceLastSummary = getMessagesSinceLastSummary(this.apiConversationHistory) - - // Clean conversation history by: - // 1. Converting to Anthropic.MessageParam by spreading only the API-required properties. - // 2. Converting image blocks to text descriptions if model doesn't support images. - const cleanConversationHistory = messagesSinceLastSummary.map(({ role, content }) => { - // Handle array content (could contain image blocks). - if (Array.isArray(content)) { - if (!this.api.getModel().info.supportsImages) { - // Convert image blocks to text descriptions. - content = content.map((block) => { - if (block.type === "image") { - // Convert image blocks to text descriptions. - // Note: We can't access the actual image content/url due to API limitations, - // but we can indicate that an image was present in the conversation. - return { - type: "text", - text: "[Referenced image in conversation]", - } - } - return block - }) - } - } - - return { role, content } - }) + const cleanConversationHistory = maybeRemoveImageBlocks(messagesSinceLastSummary, this.api).map( + ({ role, content }) => ({ role, content }), + ) const stream = this.api.createMessage(systemPrompt, cleanConversationHistory) const iterator = stream[Symbol.asyncIterator]() From aa8e1d39d6a249fb8ece5b75d3ee4c8ca3c51295 Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Tue, 13 May 2025 18:54:43 -0700 Subject: [PATCH 05/25] Update src/core/condense/index.ts Co-authored-by: Matt Rubens --- src/core/condense/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/condense/index.ts b/src/core/condense/index.ts index 8c4cf91d18..af476f519c 100644 --- a/src/core/condense/index.ts +++ b/src/core/condense/index.ts @@ -10,7 +10,7 @@ const SUMMARY_PROMPT = `\ Your task is to create a detailed summary of the conversation so far, paying close attention to the user's explicit requests and your previous actions. This summary should be thorough in capturing technical details, code patterns, and architectural decisions that would be essential for continuing with the conversation and supporting any continuing tasks. -Your summary should be structure as follows: +Your summary should be structured as follows: Context: The context to continue the conversation with. If applicable based on the current task, this should include: 1. Previous Conversation: High level details about what was discussed throughout the entire conversation with the user. This should be written to allow someone to be able to follow the general overarching conversation flow. 2. Current Work: Describe in detail what was being worked on prior to this request to summarize the conversation. Pay special attention to the more recent messages in the conversation. From a97114a62141b3bbc1803edf570a3c3d76037303 Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Tue, 13 May 2025 19:05:06 -0700 Subject: [PATCH 06/25] add back sliding window as default implementation behind a flag --- .../__tests__/sliding-window.test.ts | 553 ++++++++++++++++++ src/core/sliding-window/index.ts | 100 ++++ src/core/task/Task.ts | 39 +- 3 files changed, 684 insertions(+), 8 deletions(-) create mode 100644 src/core/sliding-window/__tests__/sliding-window.test.ts create mode 100644 src/core/sliding-window/index.ts diff --git a/src/core/sliding-window/__tests__/sliding-window.test.ts b/src/core/sliding-window/__tests__/sliding-window.test.ts new file mode 100644 index 0000000000..16af2d4630 --- /dev/null +++ b/src/core/sliding-window/__tests__/sliding-window.test.ts @@ -0,0 +1,553 @@ +// npx jest src/core/sliding-window/__tests__/sliding-window.test.ts + +import { Anthropic } from "@anthropic-ai/sdk" + +import { ModelInfo } from "../../../shared/api" +import { BaseProvider } from "../../../api/providers/base-provider" +import { + TOKEN_BUFFER_PERCENTAGE, + estimateTokenCount, + truncateConversation, + truncateConversationIfNeeded, +} from "../index" + +// Create a mock ApiHandler for testing +class MockApiHandler extends BaseProvider { + createMessage(): any { + throw new Error("Method not implemented.") + } + + getModel(): { id: string; info: ModelInfo } { + return { + id: "test-model", + info: { + contextWindow: 100000, + maxTokens: 50000, + supportsPromptCache: true, + supportsImages: false, + inputPrice: 0, + outputPrice: 0, + description: "Test model", + }, + } + } +} + +// Create a singleton instance for tests +const mockApiHandler = new MockApiHandler() + +/** + * Tests for the truncateConversation function + */ +describe("truncateConversation", () => { + it("should retain the first message", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: "First message" }, + { role: "assistant", content: "Second message" }, + { role: "user", content: "Third message" }, + ] + + const result = truncateConversation(messages, 0.5) + + // With 2 messages after the first, 0.5 fraction means remove 1 message + // But 1 is odd, so it rounds down to 0 (to make it even) + expect(result.length).toBe(3) // First message + 2 remaining messages + expect(result[0]).toEqual(messages[0]) + expect(result[1]).toEqual(messages[1]) + expect(result[2]).toEqual(messages[2]) + }) + + it("should remove the specified fraction of messages (rounded to even number)", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: "First message" }, + { role: "assistant", content: "Second message" }, + { role: "user", content: "Third message" }, + { role: "assistant", content: "Fourth message" }, + { role: "user", content: "Fifth message" }, + ] + + // 4 messages excluding first, 0.5 fraction = 2 messages to remove + // 2 is already even, so no rounding needed + const result = truncateConversation(messages, 0.5) + + expect(result.length).toBe(3) + expect(result[0]).toEqual(messages[0]) + expect(result[1]).toEqual(messages[3]) + expect(result[2]).toEqual(messages[4]) + }) + + it("should round to an even number of messages to remove", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: "First message" }, + { role: "assistant", content: "Second message" }, + { role: "user", content: "Third message" }, + { role: "assistant", content: "Fourth message" }, + { role: "user", content: "Fifth message" }, + { role: "assistant", content: "Sixth message" }, + { role: "user", content: "Seventh message" }, + ] + + // 6 messages excluding first, 0.3 fraction = 1.8 messages to remove + // 1.8 rounds down to 1, then to 0 to make it even + const result = truncateConversation(messages, 0.3) + + expect(result.length).toBe(7) // No messages removed + expect(result).toEqual(messages) + }) + + it("should handle edge case with fracToRemove = 0", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: "First message" }, + { role: "assistant", content: "Second message" }, + { role: "user", content: "Third message" }, + ] + + const result = truncateConversation(messages, 0) + + expect(result).toEqual(messages) + }) + + it("should handle edge case with fracToRemove = 1", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: "First message" }, + { role: "assistant", content: "Second message" }, + { role: "user", content: "Third message" }, + { role: "assistant", content: "Fourth message" }, + ] + + // 3 messages excluding first, 1.0 fraction = 3 messages to remove + // But 3 is odd, so it rounds down to 2 to make it even + const result = truncateConversation(messages, 1) + + expect(result.length).toBe(2) + expect(result[0]).toEqual(messages[0]) + expect(result[1]).toEqual(messages[3]) + }) +}) + +/** + * Tests for the estimateTokenCount function + */ +describe("estimateTokenCount", () => { + it("should return 0 for empty or undefined content", async () => { + expect(await estimateTokenCount([], mockApiHandler)).toBe(0) + // @ts-ignore - Testing with undefined + expect(await estimateTokenCount(undefined, mockApiHandler)).toBe(0) + }) + + it("should estimate tokens for text blocks", async () => { + const content: Array = [ + { type: "text", text: "This is a text block with 36 characters" }, + ] + + // With tiktoken, the exact token count may differ from character-based estimation + // Instead of expecting an exact number, we verify it's a reasonable positive number + const result = await estimateTokenCount(content, mockApiHandler) + expect(result).toBeGreaterThan(0) + + // We can also verify that longer text results in more tokens + const longerContent: Array = [ + { + type: "text", + text: "This is a longer text block with significantly more characters to encode into tokens", + }, + ] + const longerResult = await estimateTokenCount(longerContent, mockApiHandler) + expect(longerResult).toBeGreaterThan(result) + }) + + it("should estimate tokens for image blocks based on data size", async () => { + // Small image + const smallImage: Array = [ + { type: "image", source: { type: "base64", media_type: "image/jpeg", data: "small_dummy_data" } }, + ] + // Larger image with more data + const largerImage: Array = [ + { type: "image", source: { type: "base64", media_type: "image/png", data: "X".repeat(1000) } }, + ] + + // Verify the token count scales with the size of the image data + const smallImageTokens = await estimateTokenCount(smallImage, mockApiHandler) + const largerImageTokens = await estimateTokenCount(largerImage, mockApiHandler) + + // Small image should have some tokens + expect(smallImageTokens).toBeGreaterThan(0) + + // Larger image should have proportionally more tokens + expect(largerImageTokens).toBeGreaterThan(smallImageTokens) + + // Verify the larger image calculation matches our formula including the 50% fudge factor + expect(largerImageTokens).toBe(48) + }) + + it("should estimate tokens for mixed content blocks", async () => { + const content: Array = [ + { type: "text", text: "A text block with 30 characters" }, + { type: "image", source: { type: "base64", media_type: "image/jpeg", data: "dummy_data" } }, + { type: "text", text: "Another text with 24 chars" }, + ] + + // We know image tokens calculation should be consistent + const imageTokens = Math.ceil(Math.sqrt("dummy_data".length)) * 1.5 + + // With tiktoken, we can't predict exact text token counts, + // but we can verify the total is greater than just the image tokens + const result = await estimateTokenCount(content, mockApiHandler) + expect(result).toBeGreaterThan(imageTokens) + + // Also test against a version with only the image to verify text adds tokens + const imageOnlyContent: Array = [ + { type: "image", source: { type: "base64", media_type: "image/jpeg", data: "dummy_data" } }, + ] + const imageOnlyResult = await estimateTokenCount(imageOnlyContent, mockApiHandler) + expect(result).toBeGreaterThan(imageOnlyResult) + }) + + it("should handle empty text blocks", async () => { + const content: Array = [{ type: "text", text: "" }] + expect(await estimateTokenCount(content, mockApiHandler)).toBe(0) + }) + + it("should handle plain string messages", async () => { + const content = "This is a plain text message" + expect(await estimateTokenCount([{ type: "text", text: content }], mockApiHandler)).toBeGreaterThan(0) + }) +}) + +/** + * Tests for the truncateConversationIfNeeded function + */ +describe("truncateConversationIfNeeded", () => { + const createModelInfo = (contextWindow: number, maxTokens?: number): ModelInfo => ({ + contextWindow, + supportsPromptCache: true, + maxTokens, + }) + + const messages: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: "First message" }, + { role: "assistant", content: "Second message" }, + { role: "user", content: "Third message" }, + { role: "assistant", content: "Fourth message" }, + { role: "user", content: "Fifth message" }, + ] + + it("should not truncate if tokens are below max tokens threshold", async () => { + const modelInfo = createModelInfo(100000, 30000) + const dynamicBuffer = modelInfo.contextWindow * TOKEN_BUFFER_PERCENTAGE // 10000 + const totalTokens = 70000 - dynamicBuffer - 1 // Just below threshold - buffer + + // Create messages with very small content in the last one to avoid token overflow + const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }] + + const result = await truncateConversationIfNeeded({ + messages: messagesWithSmallContent, + totalTokens, + contextWindow: modelInfo.contextWindow, + maxTokens: modelInfo.maxTokens, + apiHandler: mockApiHandler, + }) + expect(result).toEqual(messagesWithSmallContent) // No truncation occurs + }) + + it("should truncate if tokens are above max tokens threshold", async () => { + const modelInfo = createModelInfo(100000, 30000) + const totalTokens = 70001 // Above threshold + + // Create messages with very small content in the last one to avoid token overflow + const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }] + + // When truncating, always uses 0.5 fraction + // With 4 messages after the first, 0.5 fraction means remove 2 messages + const expectedResult = [messagesWithSmallContent[0], messagesWithSmallContent[3], messagesWithSmallContent[4]] + + const result = await truncateConversationIfNeeded({ + messages: messagesWithSmallContent, + totalTokens, + contextWindow: modelInfo.contextWindow, + maxTokens: modelInfo.maxTokens, + apiHandler: mockApiHandler, + }) + expect(result).toEqual(expectedResult) + }) + + it("should work with non-prompt caching models the same as prompt caching models", async () => { + // The implementation no longer differentiates between prompt caching and non-prompt caching models + const modelInfo1 = createModelInfo(100000, 30000) + const modelInfo2 = createModelInfo(100000, 30000) + + // Create messages with very small content in the last one to avoid token overflow + const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }] + + // Test below threshold + const belowThreshold = 69999 + const result1 = await truncateConversationIfNeeded({ + messages: messagesWithSmallContent, + totalTokens: belowThreshold, + contextWindow: modelInfo1.contextWindow, + maxTokens: modelInfo1.maxTokens, + apiHandler: mockApiHandler, + }) + + const result2 = await truncateConversationIfNeeded({ + messages: messagesWithSmallContent, + totalTokens: belowThreshold, + contextWindow: modelInfo2.contextWindow, + maxTokens: modelInfo2.maxTokens, + apiHandler: mockApiHandler, + }) + + expect(result1).toEqual(result2) + + // Test above threshold + const aboveThreshold = 70001 + const result3 = await truncateConversationIfNeeded({ + messages: messagesWithSmallContent, + totalTokens: aboveThreshold, + contextWindow: modelInfo1.contextWindow, + maxTokens: modelInfo1.maxTokens, + apiHandler: mockApiHandler, + }) + + const result4 = await truncateConversationIfNeeded({ + messages: messagesWithSmallContent, + totalTokens: aboveThreshold, + contextWindow: modelInfo2.contextWindow, + maxTokens: modelInfo2.maxTokens, + apiHandler: mockApiHandler, + }) + + expect(result3).toEqual(result4) + }) + + it("should consider incoming content when deciding to truncate", async () => { + const modelInfo = createModelInfo(100000, 30000) + const maxTokens = 30000 + const availableTokens = modelInfo.contextWindow - maxTokens + + // Test case 1: Small content that won't push us over the threshold + const smallContent = [{ type: "text" as const, text: "Small content" }] + const smallContentTokens = await estimateTokenCount(smallContent, mockApiHandler) + const messagesWithSmallContent: Anthropic.Messages.MessageParam[] = [ + ...messages.slice(0, -1), + { role: messages[messages.length - 1].role, content: smallContent }, + ] + + // Set base tokens so total is well below threshold + buffer even with small content added + const dynamicBuffer = modelInfo.contextWindow * TOKEN_BUFFER_PERCENTAGE + const baseTokensForSmall = availableTokens - smallContentTokens - dynamicBuffer - 10 + const resultWithSmall = await truncateConversationIfNeeded({ + messages: messagesWithSmallContent, + totalTokens: baseTokensForSmall, + contextWindow: modelInfo.contextWindow, + maxTokens, + apiHandler: mockApiHandler, + }) + expect(resultWithSmall).toEqual(messagesWithSmallContent) // No truncation + + // Test case 2: Large content that will push us over the threshold + const largeContent = [ + { + type: "text" as const, + text: "A very large incoming message that would consume a significant number of tokens and push us over the threshold", + }, + ] + const largeContentTokens = await estimateTokenCount(largeContent, mockApiHandler) + const messagesWithLargeContent: Anthropic.Messages.MessageParam[] = [ + ...messages.slice(0, -1), + { role: messages[messages.length - 1].role, content: largeContent }, + ] + + // Set base tokens so we're just below threshold without content, but over with content + const baseTokensForLarge = availableTokens - Math.floor(largeContentTokens / 2) + const resultWithLarge = await truncateConversationIfNeeded({ + messages: messagesWithLargeContent, + totalTokens: baseTokensForLarge, + contextWindow: modelInfo.contextWindow, + maxTokens, + apiHandler: mockApiHandler, + }) + expect(resultWithLarge).not.toEqual(messagesWithLargeContent) // Should truncate + + // Test case 3: Very large content that will definitely exceed threshold + const veryLargeContent = [{ type: "text" as const, text: "X".repeat(1000) }] + const veryLargeContentTokens = await estimateTokenCount(veryLargeContent, mockApiHandler) + const messagesWithVeryLargeContent: Anthropic.Messages.MessageParam[] = [ + ...messages.slice(0, -1), + { role: messages[messages.length - 1].role, content: veryLargeContent }, + ] + + // Set base tokens so we're just below threshold without content + const baseTokensForVeryLarge = availableTokens - Math.floor(veryLargeContentTokens / 2) + const resultWithVeryLarge = await truncateConversationIfNeeded({ + messages: messagesWithVeryLargeContent, + totalTokens: baseTokensForVeryLarge, + contextWindow: modelInfo.contextWindow, + maxTokens, + apiHandler: mockApiHandler, + }) + expect(resultWithVeryLarge).not.toEqual(messagesWithVeryLargeContent) // Should truncate + }) + + it("should truncate if tokens are within TOKEN_BUFFER_PERCENTAGE of the threshold", async () => { + const modelInfo = createModelInfo(100000, 30000) + const dynamicBuffer = modelInfo.contextWindow * TOKEN_BUFFER_PERCENTAGE // 10% of 100000 = 10000 + const totalTokens = 70000 - dynamicBuffer + 1 // Just within the dynamic buffer of threshold (70000) + + // Create messages with very small content in the last one to avoid token overflow + const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }] + + // When truncating, always uses 0.5 fraction + // With 4 messages after the first, 0.5 fraction means remove 2 messages + const expectedResult = [messagesWithSmallContent[0], messagesWithSmallContent[3], messagesWithSmallContent[4]] + + const result = await truncateConversationIfNeeded({ + messages: messagesWithSmallContent, + totalTokens, + contextWindow: modelInfo.contextWindow, + maxTokens: modelInfo.maxTokens, + apiHandler: mockApiHandler, + }) + expect(result).toEqual(expectedResult) + }) +}) + +/** + * Tests for the getMaxTokens function (private but tested through truncateConversationIfNeeded) + */ +describe("getMaxTokens", () => { + // We'll test this indirectly through truncateConversationIfNeeded + const createModelInfo = (contextWindow: number, maxTokens?: number): ModelInfo => ({ + contextWindow, + supportsPromptCache: true, // Not relevant for getMaxTokens + maxTokens, + }) + + // Reuse across tests for consistency + const messages: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: "First message" }, + { role: "assistant", content: "Second message" }, + { role: "user", content: "Third message" }, + { role: "assistant", content: "Fourth message" }, + { role: "user", content: "Fifth message" }, + ] + + it("should use maxTokens as buffer when specified", async () => { + const modelInfo = createModelInfo(100000, 50000) + // Max tokens = 100000 - 50000 = 50000 + + // Create messages with very small content in the last one to avoid token overflow + const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }] + + // Account for the dynamic buffer which is 10% of context window (10,000 tokens) + // Below max tokens and buffer - no truncation + const result1 = await truncateConversationIfNeeded({ + messages: messagesWithSmallContent, + totalTokens: 39999, // Well below threshold + dynamic buffer + contextWindow: modelInfo.contextWindow, + maxTokens: modelInfo.maxTokens, + apiHandler: mockApiHandler, + }) + expect(result1).toEqual(messagesWithSmallContent) + + // Above max tokens - truncate + const result2 = await truncateConversationIfNeeded({ + messages: messagesWithSmallContent, + totalTokens: 50001, // Above threshold + contextWindow: modelInfo.contextWindow, + maxTokens: modelInfo.maxTokens, + apiHandler: mockApiHandler, + }) + expect(result2).not.toEqual(messagesWithSmallContent) + expect(result2.length).toBe(3) // Truncated with 0.5 fraction + }) + + it("should use 20% of context window as buffer when maxTokens is undefined", async () => { + const modelInfo = createModelInfo(100000, undefined) + // Max tokens = 100000 - (100000 * 0.2) = 80000 + + // Create messages with very small content in the last one to avoid token overflow + const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }] + + // Account for the dynamic buffer which is 10% of context window (10,000 tokens) + // Below max tokens and buffer - no truncation + const result1 = await truncateConversationIfNeeded({ + messages: messagesWithSmallContent, + totalTokens: 69999, // Well below threshold + dynamic buffer + contextWindow: modelInfo.contextWindow, + maxTokens: modelInfo.maxTokens, + apiHandler: mockApiHandler, + }) + expect(result1).toEqual(messagesWithSmallContent) + + // Above max tokens - truncate + const result2 = await truncateConversationIfNeeded({ + messages: messagesWithSmallContent, + totalTokens: 80001, // Above threshold + contextWindow: modelInfo.contextWindow, + maxTokens: modelInfo.maxTokens, + apiHandler: mockApiHandler, + }) + expect(result2).not.toEqual(messagesWithSmallContent) + expect(result2.length).toBe(3) // Truncated with 0.5 fraction + }) + + it("should handle small context windows appropriately", async () => { + const modelInfo = createModelInfo(50000, 10000) + // Max tokens = 50000 - 10000 = 40000 + + // Create messages with very small content in the last one to avoid token overflow + const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }] + + // Below max tokens and buffer - no truncation + const result1 = await truncateConversationIfNeeded({ + messages: messagesWithSmallContent, + totalTokens: 34999, // Well below threshold + buffer + contextWindow: modelInfo.contextWindow, + maxTokens: modelInfo.maxTokens, + apiHandler: mockApiHandler, + }) + expect(result1).toEqual(messagesWithSmallContent) + + // Above max tokens - truncate + const result2 = await truncateConversationIfNeeded({ + messages: messagesWithSmallContent, + totalTokens: 40001, // Above threshold + contextWindow: modelInfo.contextWindow, + maxTokens: modelInfo.maxTokens, + apiHandler: mockApiHandler, + }) + expect(result2).not.toEqual(messagesWithSmallContent) + expect(result2.length).toBe(3) // Truncated with 0.5 fraction + }) + + it("should handle large context windows appropriately", async () => { + const modelInfo = createModelInfo(200000, 30000) + // Max tokens = 200000 - 30000 = 170000 + + // Create messages with very small content in the last one to avoid token overflow + const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }] + + // Account for the dynamic buffer which is 10% of context window (20,000 tokens for this test) + // Below max tokens and buffer - no truncation + const result1 = await truncateConversationIfNeeded({ + messages: messagesWithSmallContent, + totalTokens: 149999, // Well below threshold + dynamic buffer + contextWindow: modelInfo.contextWindow, + maxTokens: modelInfo.maxTokens, + apiHandler: mockApiHandler, + }) + expect(result1).toEqual(messagesWithSmallContent) + + // Above max tokens - truncate + const result2 = await truncateConversationIfNeeded({ + messages: messagesWithSmallContent, + totalTokens: 170001, // Above threshold + contextWindow: modelInfo.contextWindow, + maxTokens: modelInfo.maxTokens, + apiHandler: mockApiHandler, + }) + expect(result2).not.toEqual(messagesWithSmallContent) + expect(result2.length).toBe(3) // Truncated with 0.5 fraction + }) +}) diff --git a/src/core/sliding-window/index.ts b/src/core/sliding-window/index.ts new file mode 100644 index 0000000000..75395ecd75 --- /dev/null +++ b/src/core/sliding-window/index.ts @@ -0,0 +1,100 @@ +import { Anthropic } from "@anthropic-ai/sdk" +import { ApiHandler } from "../../api" + +/** + * Default percentage of the context window to use as a buffer when deciding when to truncate + */ +export const TOKEN_BUFFER_PERCENTAGE = 0.1 + +/** + * Counts tokens for user content using the provider's token counting implementation. + * + * @param {Array} content - The content to count tokens for + * @param {ApiHandler} apiHandler - The API handler to use for token counting + * @returns {Promise} A promise resolving to the token count + */ +export async function estimateTokenCount( + content: Array, + apiHandler: ApiHandler, +): Promise { + if (!content || content.length === 0) return 0 + return apiHandler.countTokens(content) +} + +/** + * Truncates a conversation by removing a fraction of the messages. + * + * The first message is always retained, and a specified fraction (rounded to an even number) + * of messages from the beginning (excluding the first) is removed. + * + * @param {Anthropic.Messages.MessageParam[]} messages - The conversation messages. + * @param {number} fracToRemove - The fraction (between 0 and 1) of messages (excluding the first) to remove. + * @returns {Anthropic.Messages.MessageParam[]} The truncated conversation messages. + */ +export function truncateConversation( + messages: Anthropic.Messages.MessageParam[], + fracToRemove: number, +): Anthropic.Messages.MessageParam[] { + const truncatedMessages = [messages[0]] + const rawMessagesToRemove = Math.floor((messages.length - 1) * fracToRemove) + const messagesToRemove = rawMessagesToRemove - (rawMessagesToRemove % 2) + const remainingMessages = messages.slice(messagesToRemove + 1) + truncatedMessages.push(...remainingMessages) + + return truncatedMessages +} + +/** + * Conditionally truncates the conversation messages if the total token count + * exceeds the model's limit, considering the size of incoming content. + * + * @param {Anthropic.Messages.MessageParam[]} messages - The conversation messages. + * @param {number} totalTokens - The total number of tokens in the conversation (excluding the last user message). + * @param {number} contextWindow - The context window size. + * @param {number} maxTokens - The maximum number of tokens allowed. + * @param {ApiHandler} apiHandler - The API handler to use for token counting. + * @returns {Anthropic.Messages.MessageParam[]} The original or truncated conversation messages. + */ + +type TruncateOptions = { + messages: Anthropic.Messages.MessageParam[] + totalTokens: number + contextWindow: number + maxTokens?: number | null + apiHandler: ApiHandler +} + +/** + * Conditionally truncates the conversation messages if the total token count + * exceeds the model's limit, considering the size of incoming content. + * + * @param {TruncateOptions} options - The options for truncation + * @returns {Promise} The original or truncated conversation messages. + */ +export async function truncateConversationIfNeeded({ + messages, + totalTokens, + contextWindow, + maxTokens, + apiHandler, +}: TruncateOptions): Promise { + // Calculate the maximum tokens reserved for response + const reservedTokens = maxTokens || contextWindow * 0.2 + + // Estimate tokens for the last message (which is always a user message) + const lastMessage = messages[messages.length - 1] + const lastMessageContent = lastMessage.content + const lastMessageTokens = Array.isArray(lastMessageContent) + ? await estimateTokenCount(lastMessageContent, apiHandler) + : await estimateTokenCount([{ type: "text", text: lastMessageContent as string }], apiHandler) + + // Calculate total effective tokens (totalTokens never includes the last message) + const effectiveTokens = totalTokens + lastMessageTokens + + // Calculate available tokens for conversation history + // Truncate if we're within TOKEN_BUFFER_PERCENTAGE of the context window + const allowedTokens = contextWindow * (1 - TOKEN_BUFFER_PERCENTAGE) - reservedTokens + + // Determine if truncation is needed and apply if necessary + return effectiveTokens > allowedTokens ? truncateConversation(messages, 0.5) : messages +} diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 6035e0745b..332aa9c3b3 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -65,6 +65,7 @@ import { parseAssistantMessageV2 as parseAssistantMessage, presentAssistantMessage, } from "../assistant-message" +import { truncateConversationIfNeeded } from "../sliding-window" import { ClineProvider } from "../webview/ClineProvider" import { MultiSearchReplaceDiffStrategy } from "../diff/strategies/multi-search-replace" import { readApiMessages, saveApiMessages, readTaskMessages, saveTaskMessages, taskMetadata } from "../task-persistence" @@ -82,6 +83,8 @@ import { ApiMessage } from "../task-persistence/apiMessages" import { getMessagesSinceLastSummary, summarizeConversationIfNeeded } from "../condense" import { maybeRemoveImageBlocks } from "../../api/transform/image-cleaning" +const enableSummaries = false // TODO(canyon): Replace with a config option + export type ClineEvents = { message: [{ action: "created" | "updated"; message: ClineMessage }] taskStarted: [] @@ -1469,18 +1472,38 @@ export class Task extends EventEmitter { const totalTokens = tokensIn + tokensOut + cacheWrites + cacheReads + // Default max tokens value for thinking models when no specific + // value is set. + const DEFAULT_THINKING_MODEL_MAX_TOKENS = 16_384 + const modelInfo = this.api.getModel().info + + const maxTokens = modelInfo.thinking + ? this.apiConfiguration.modelMaxTokens || DEFAULT_THINKING_MODEL_MAX_TOKENS + : modelInfo.maxTokens + const contextWindow = modelInfo.contextWindow - const messagesWithSummary = await summarizeConversationIfNeeded( - this.apiConversationHistory, - totalTokens, - contextWindow, - this.api, - ) + let condensedMessages + if (!enableSummaries) { + condensedMessages = await truncateConversationIfNeeded({ + messages: this.apiConversationHistory, + totalTokens, + maxTokens, + contextWindow, + apiHandler: this.api, + }) + } else { + condensedMessages = await summarizeConversationIfNeeded( + this.apiConversationHistory, + totalTokens, + contextWindow, + this.api, + ) + } - if (messagesWithSummary !== this.apiConversationHistory) { - await this.overwriteApiConversationHistory(messagesWithSummary) + if (condensedMessages !== this.apiConversationHistory) { + await this.overwriteApiConversationHistory(condensedMessages) } } From 1248a93c71a6ffc1acf1cbb5090e81d67667fa7d Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Tue, 13 May 2025 19:11:52 -0700 Subject: [PATCH 07/25] use same timestamp as next message --- src/core/condense/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/condense/index.ts b/src/core/condense/index.ts index af476f519c..9e9b60596e 100644 --- a/src/core/condense/index.ts +++ b/src/core/condense/index.ts @@ -101,7 +101,7 @@ async function summarizeConversation(messages: ApiMessage[], apiHandler: ApiHand const summaryMessage: ApiMessage = { role: "assistant", content: summary, - ts: Date.now(), + ts: keepMessages[0].ts, isSummary: true, } From 11353c5b29ed48b20b216f32cf8126bb6262cbf5 Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Tue, 13 May 2025 20:05:04 -0700 Subject: [PATCH 08/25] reverse logic --- src/core/task/Task.ts | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 332aa9c3b3..2220daa156 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -83,8 +83,6 @@ import { ApiMessage } from "../task-persistence/apiMessages" import { getMessagesSinceLastSummary, summarizeConversationIfNeeded } from "../condense" import { maybeRemoveImageBlocks } from "../../api/transform/image-cleaning" -const enableSummaries = false // TODO(canyon): Replace with a config option - export type ClineEvents = { message: [{ action: "created" | "updated"; message: ClineMessage }] taskStarted: [] @@ -1423,6 +1421,7 @@ export class Task extends EventEmitter { enableMcpServerCreation, browserToolEnabled, language, + enableAutoContextCondensing, } = (await this.providerRef.deref()?.getState()) ?? {} const { customModes } = (await this.providerRef.deref()?.getState()) ?? {} @@ -1485,7 +1484,14 @@ export class Task extends EventEmitter { const contextWindow = modelInfo.contextWindow let condensedMessages - if (!enableSummaries) { + if (enableAutoContextCondensing) { + condensedMessages = await summarizeConversationIfNeeded( + this.apiConversationHistory, + totalTokens, + contextWindow, + this.api, + ) + } else { condensedMessages = await truncateConversationIfNeeded({ messages: this.apiConversationHistory, totalTokens, @@ -1493,13 +1499,6 @@ export class Task extends EventEmitter { contextWindow, apiHandler: this.api, }) - } else { - condensedMessages = await summarizeConversationIfNeeded( - this.apiConversationHistory, - totalTokens, - contextWindow, - this.api, - ) } if (condensedMessages !== this.apiConversationHistory) { From 9d40c3b506596c641fa4343f570782bfa4a040b7 Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Wed, 14 May 2025 09:37:22 -0700 Subject: [PATCH 09/25] wip on checkbox setting --- src/core/task/Task.ts | 4 ++-- src/core/webview/ClineProvider.ts | 1 + src/core/webview/__tests__/ClineProvider.test.ts | 11 +++++++++++ src/exports/roo-code.d.ts | 3 +++ src/exports/types.ts | 3 +++ src/schemas/index.ts | 2 ++ src/shared/ExtensionMessage.ts | 1 + src/shared/WebviewMessage.ts | 2 ++ webview-ui/src/context/ExtensionStateContext.tsx | 3 +++ 9 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 2220daa156..167f0cf2c0 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -1421,7 +1421,7 @@ export class Task extends EventEmitter { enableMcpServerCreation, browserToolEnabled, language, - enableAutoContextCondensing, + autoCondenseContext, } = (await this.providerRef.deref()?.getState()) ?? {} const { customModes } = (await this.providerRef.deref()?.getState()) ?? {} @@ -1484,7 +1484,7 @@ export class Task extends EventEmitter { const contextWindow = modelInfo.contextWindow let condensedMessages - if (enableAutoContextCondensing) { + if (autoCondenseContext) { condensedMessages = await summarizeConversationIfNeeded( this.apiConversationHistory, totalTokens, diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index e9953052f6..c2802d81dc 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -1387,6 +1387,7 @@ export class ClineProvider extends EventEmitter implements alwaysAllowBrowser: stateValues.alwaysAllowBrowser ?? false, alwaysAllowMcp: stateValues.alwaysAllowMcp ?? false, alwaysAllowModeSwitch: stateValues.alwaysAllowModeSwitch ?? false, + autoCondenseContext: stateValues.autoCondenseContext ?? false, alwaysAllowSubtasks: stateValues.alwaysAllowSubtasks ?? false, taskHistory: stateValues.taskHistory, allowedCommands: stateValues.allowedCommands, diff --git a/src/core/webview/__tests__/ClineProvider.test.ts b/src/core/webview/__tests__/ClineProvider.test.ts index 3bd3a11db4..04068a06d7 100644 --- a/src/core/webview/__tests__/ClineProvider.test.ts +++ b/src/core/webview/__tests__/ClineProvider.test.ts @@ -385,6 +385,7 @@ describe("ClineProvider", () => { clineMessages: [], taskHistory: [], shouldShowAnnouncement: false, + autoCondenseContext: false, apiConfiguration: { apiProvider: "openrouter", }, @@ -482,6 +483,16 @@ describe("ClineProvider", () => { expect(provider.getCurrentCline()).toBe(mockCline2) }) + test("autoCondenseContext setting persists and updates state", async () => { + const initialState = await provider.getState() + expect(initialState).toHaveProperty("autoCondenseContext", false) + + // Update the setting + await provider.contextProxy.updateGlobalState("autoCondenseContext", true) + const updatedState = await provider.getState() + expect(updatedState).toHaveProperty("autoCondenseContext", true) + }) + test("getState returns correct initial state", async () => { const state = await provider.getState() diff --git a/src/exports/roo-code.d.ts b/src/exports/roo-code.d.ts index 98290336ce..2f7407264b 100644 --- a/src/exports/roo-code.d.ts +++ b/src/exports/roo-code.d.ts @@ -70,6 +70,7 @@ type GlobalSettings = { alwaysAllowSubtasks?: boolean | undefined alwaysAllowExecute?: boolean | undefined allowedCommands?: string[] | undefined + autoCondenseContext?: boolean | undefined browserToolEnabled?: boolean | undefined browserViewportSize?: string | undefined screenshotQuality?: number | undefined @@ -759,6 +760,7 @@ type IpcMessage = alwaysAllowSubtasks?: boolean | undefined alwaysAllowExecute?: boolean | undefined allowedCommands?: string[] | undefined + autoCondenseContext?: boolean | undefined browserToolEnabled?: boolean | undefined browserViewportSize?: string | undefined screenshotQuality?: number | undefined @@ -1221,6 +1223,7 @@ type TaskCommand = alwaysAllowSubtasks?: boolean | undefined alwaysAllowExecute?: boolean | undefined allowedCommands?: string[] | undefined + autoCondenseContext?: boolean | undefined browserToolEnabled?: boolean | undefined browserViewportSize?: string | undefined screenshotQuality?: number | undefined diff --git a/src/exports/types.ts b/src/exports/types.ts index 4b71ad5d0b..bff26c68dd 100644 --- a/src/exports/types.ts +++ b/src/exports/types.ts @@ -70,6 +70,7 @@ type GlobalSettings = { alwaysAllowSubtasks?: boolean | undefined alwaysAllowExecute?: boolean | undefined allowedCommands?: string[] | undefined + autoCondenseContext?: boolean | undefined browserToolEnabled?: boolean | undefined browserViewportSize?: string | undefined screenshotQuality?: number | undefined @@ -771,6 +772,7 @@ type IpcMessage = alwaysAllowSubtasks?: boolean | undefined alwaysAllowExecute?: boolean | undefined allowedCommands?: string[] | undefined + autoCondenseContext?: boolean | undefined browserToolEnabled?: boolean | undefined browserViewportSize?: string | undefined screenshotQuality?: number | undefined @@ -1235,6 +1237,7 @@ type TaskCommand = alwaysAllowSubtasks?: boolean | undefined alwaysAllowExecute?: boolean | undefined allowedCommands?: string[] | undefined + autoCondenseContext?: boolean | undefined browserToolEnabled?: boolean | undefined browserViewportSize?: string | undefined screenshotQuality?: number | undefined diff --git a/src/schemas/index.ts b/src/schemas/index.ts index 70ea39f624..c9925d251c 100644 --- a/src/schemas/index.ts +++ b/src/schemas/index.ts @@ -676,6 +676,7 @@ export const globalSettingsSchema = z.object({ alwaysAllowSubtasks: z.boolean().optional(), alwaysAllowExecute: z.boolean().optional(), allowedCommands: z.array(z.string()).optional(), + autoCondenseContext: z.boolean().optional(), browserToolEnabled: z.boolean().optional(), browserViewportSize: z.string().optional(), @@ -755,6 +756,7 @@ const globalSettingsRecord: GlobalSettingsRecord = { alwaysAllowSubtasks: undefined, alwaysAllowExecute: undefined, allowedCommands: undefined, + autoCondenseContext: undefined, browserToolEnabled: undefined, browserViewportSize: undefined, diff --git a/src/shared/ExtensionMessage.ts b/src/shared/ExtensionMessage.ts index 6330556024..9f9365e71a 100644 --- a/src/shared/ExtensionMessage.ts +++ b/src/shared/ExtensionMessage.ts @@ -168,6 +168,7 @@ export type ExtensionState = Pick< | "customModePrompts" | "customSupportPrompts" | "enhancementApiConfigId" + | "autoCondenseContext" > & { version: string clineMessages: ClineMessage[] diff --git a/src/shared/WebviewMessage.ts b/src/shared/WebviewMessage.ts index 22fe5c7d3e..2dc2e4038d 100644 --- a/src/shared/WebviewMessage.ts +++ b/src/shared/WebviewMessage.ts @@ -129,6 +129,7 @@ export interface WebviewMessage { | "maxReadFileLine" | "searchFiles" | "toggleApiConfigPin" + | "autoCondenseContext" | "setHistoryPreviewCollapsed" text?: string disabled?: boolean @@ -159,6 +160,7 @@ export interface WebviewMessage { hasSystemPromptOverride?: boolean terminalOperation?: "continue" | "abort" historyPreviewCollapsed?: boolean + autoCondenseContext?: boolean } export const checkoutDiffPayloadSchema = z.object({ diff --git a/webview-ui/src/context/ExtensionStateContext.tsx b/webview-ui/src/context/ExtensionStateContext.tsx index 63c895f034..99efa2cf14 100644 --- a/webview-ui/src/context/ExtensionStateContext.tsx +++ b/webview-ui/src/context/ExtensionStateContext.tsx @@ -35,6 +35,7 @@ export interface ExtensionStateContextType extends ExtensionState { setAlwaysAllowMcp: (value: boolean) => void setAlwaysAllowModeSwitch: (value: boolean) => void setAlwaysAllowSubtasks: (value: boolean) => void + setAutoCondenseContext: (value: boolean) => void setBrowserToolEnabled: (value: boolean) => void setShowRooIgnoredFiles: (value: boolean) => void setShowAnnouncement: (value: boolean) => void @@ -173,6 +174,7 @@ export const ExtensionStateContextProvider: React.FC<{ children: React.ReactNode terminalZdotdir: false, // Default ZDOTDIR handling setting terminalCompressProgressBar: true, // Default to compress progress bar output historyPreviewCollapsed: false, // Initialize the new state (default to expanded) + autoCondenseContext: false, }) const [didHydrateState, setDidHydrateState] = useState(false) @@ -288,6 +290,7 @@ export const ExtensionStateContextProvider: React.FC<{ children: React.ReactNode setAlwaysAllowSubtasks: (value) => setState((prevState) => ({ ...prevState, alwaysAllowSubtasks: value })), setShowAnnouncement: (value) => setState((prevState) => ({ ...prevState, shouldShowAnnouncement: value })), setAllowedCommands: (value) => setState((prevState) => ({ ...prevState, allowedCommands: value })), + setAutoCondenseContext: (value) => setState((prevState) => ({ ...prevState, autoCondenseContext: value })), setSoundEnabled: (value) => setState((prevState) => ({ ...prevState, soundEnabled: value })), setSoundVolume: (value) => setState((prevState) => ({ ...prevState, soundVolume: value })), setTtsEnabled: (value) => setState((prevState) => ({ ...prevState, ttsEnabled: value })), From cdf69e523d7e2a7381f7c1ccc4222e5cf3e25fc6 Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Wed, 14 May 2025 09:55:38 -0700 Subject: [PATCH 10/25] wip on checkbox --- src/core/webview/ClineProvider.ts | 2 ++ src/core/webview/webviewMessageHandler.ts | 4 ++++ .../src/components/settings/ExperimentalSettings.tsx | 8 ++++++++ 3 files changed, 14 insertions(+) diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index c2802d81dc..c23e760118 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -1215,6 +1215,7 @@ export class ClineProvider extends EventEmitter implements alwaysAllowMcp, alwaysAllowModeSwitch, alwaysAllowSubtasks, + autoCondenseContext, soundEnabled, ttsEnabled, ttsSpeed, @@ -1285,6 +1286,7 @@ export class ClineProvider extends EventEmitter implements alwaysAllowMcp: alwaysAllowMcp ?? false, alwaysAllowModeSwitch: alwaysAllowModeSwitch ?? false, alwaysAllowSubtasks: alwaysAllowSubtasks ?? false, + autoCondenseContext: autoCondenseContext ?? false, uriScheme: vscode.env.uriScheme, currentTaskItem: this.getCurrentCline()?.taskId ? (taskHistory || []).find((item: HistoryItem) => item.id === this.getCurrentCline()?.taskId) diff --git a/src/core/webview/webviewMessageHandler.ts b/src/core/webview/webviewMessageHandler.ts index 1f17d221a6..fdf4b91d92 100644 --- a/src/core/webview/webviewMessageHandler.ts +++ b/src/core/webview/webviewMessageHandler.ts @@ -165,6 +165,10 @@ export const webviewMessageHandler = async (provider: ClineProvider, message: We await updateGlobalState("alwaysAllowSubtasks", message.bool) await provider.postStateToWebview() break + case "autoCondenseContext": + await updateGlobalState("autoCondenseContext", message.bool) + await provider.postStateToWebview() + break case "askResponse": provider.getCurrentCline()?.handleWebviewAskResponse(message.askResponse!, message.text, message.images) break diff --git a/webview-ui/src/components/settings/ExperimentalSettings.tsx b/webview-ui/src/components/settings/ExperimentalSettings.tsx index 6270ded6e8..1c34fcb920 100644 --- a/webview-ui/src/components/settings/ExperimentalSettings.tsx +++ b/webview-ui/src/components/settings/ExperimentalSettings.tsx @@ -46,6 +46,14 @@ export const ExperimentalSettings = ({ } /> ))} + + setExperimentEnabled(EXPERIMENT_IDS[config[0] as keyof typeof EXPERIMENT_IDS], enabled) + } + /> ) From 1765c6e24630db638032d6c8316578753a4ac54f Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Wed, 14 May 2025 09:56:13 -0700 Subject: [PATCH 11/25] revert checkbox --- src/core/task/Task.ts | 4 ++-- src/core/webview/ClineProvider.ts | 3 --- src/core/webview/__tests__/ClineProvider.test.ts | 11 ----------- src/core/webview/webviewMessageHandler.ts | 4 ---- src/exports/roo-code.d.ts | 3 --- src/exports/types.ts | 3 --- src/schemas/index.ts | 2 -- src/shared/ExtensionMessage.ts | 1 - src/shared/WebviewMessage.ts | 2 -- .../src/components/settings/ExperimentalSettings.tsx | 8 -------- webview-ui/src/context/ExtensionStateContext.tsx | 3 --- 11 files changed, 2 insertions(+), 42 deletions(-) diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 167f0cf2c0..2220daa156 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -1421,7 +1421,7 @@ export class Task extends EventEmitter { enableMcpServerCreation, browserToolEnabled, language, - autoCondenseContext, + enableAutoContextCondensing, } = (await this.providerRef.deref()?.getState()) ?? {} const { customModes } = (await this.providerRef.deref()?.getState()) ?? {} @@ -1484,7 +1484,7 @@ export class Task extends EventEmitter { const contextWindow = modelInfo.contextWindow let condensedMessages - if (autoCondenseContext) { + if (enableAutoContextCondensing) { condensedMessages = await summarizeConversationIfNeeded( this.apiConversationHistory, totalTokens, diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index c23e760118..e9953052f6 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -1215,7 +1215,6 @@ export class ClineProvider extends EventEmitter implements alwaysAllowMcp, alwaysAllowModeSwitch, alwaysAllowSubtasks, - autoCondenseContext, soundEnabled, ttsEnabled, ttsSpeed, @@ -1286,7 +1285,6 @@ export class ClineProvider extends EventEmitter implements alwaysAllowMcp: alwaysAllowMcp ?? false, alwaysAllowModeSwitch: alwaysAllowModeSwitch ?? false, alwaysAllowSubtasks: alwaysAllowSubtasks ?? false, - autoCondenseContext: autoCondenseContext ?? false, uriScheme: vscode.env.uriScheme, currentTaskItem: this.getCurrentCline()?.taskId ? (taskHistory || []).find((item: HistoryItem) => item.id === this.getCurrentCline()?.taskId) @@ -1389,7 +1387,6 @@ export class ClineProvider extends EventEmitter implements alwaysAllowBrowser: stateValues.alwaysAllowBrowser ?? false, alwaysAllowMcp: stateValues.alwaysAllowMcp ?? false, alwaysAllowModeSwitch: stateValues.alwaysAllowModeSwitch ?? false, - autoCondenseContext: stateValues.autoCondenseContext ?? false, alwaysAllowSubtasks: stateValues.alwaysAllowSubtasks ?? false, taskHistory: stateValues.taskHistory, allowedCommands: stateValues.allowedCommands, diff --git a/src/core/webview/__tests__/ClineProvider.test.ts b/src/core/webview/__tests__/ClineProvider.test.ts index 04068a06d7..3bd3a11db4 100644 --- a/src/core/webview/__tests__/ClineProvider.test.ts +++ b/src/core/webview/__tests__/ClineProvider.test.ts @@ -385,7 +385,6 @@ describe("ClineProvider", () => { clineMessages: [], taskHistory: [], shouldShowAnnouncement: false, - autoCondenseContext: false, apiConfiguration: { apiProvider: "openrouter", }, @@ -483,16 +482,6 @@ describe("ClineProvider", () => { expect(provider.getCurrentCline()).toBe(mockCline2) }) - test("autoCondenseContext setting persists and updates state", async () => { - const initialState = await provider.getState() - expect(initialState).toHaveProperty("autoCondenseContext", false) - - // Update the setting - await provider.contextProxy.updateGlobalState("autoCondenseContext", true) - const updatedState = await provider.getState() - expect(updatedState).toHaveProperty("autoCondenseContext", true) - }) - test("getState returns correct initial state", async () => { const state = await provider.getState() diff --git a/src/core/webview/webviewMessageHandler.ts b/src/core/webview/webviewMessageHandler.ts index fdf4b91d92..1f17d221a6 100644 --- a/src/core/webview/webviewMessageHandler.ts +++ b/src/core/webview/webviewMessageHandler.ts @@ -165,10 +165,6 @@ export const webviewMessageHandler = async (provider: ClineProvider, message: We await updateGlobalState("alwaysAllowSubtasks", message.bool) await provider.postStateToWebview() break - case "autoCondenseContext": - await updateGlobalState("autoCondenseContext", message.bool) - await provider.postStateToWebview() - break case "askResponse": provider.getCurrentCline()?.handleWebviewAskResponse(message.askResponse!, message.text, message.images) break diff --git a/src/exports/roo-code.d.ts b/src/exports/roo-code.d.ts index 2f7407264b..98290336ce 100644 --- a/src/exports/roo-code.d.ts +++ b/src/exports/roo-code.d.ts @@ -70,7 +70,6 @@ type GlobalSettings = { alwaysAllowSubtasks?: boolean | undefined alwaysAllowExecute?: boolean | undefined allowedCommands?: string[] | undefined - autoCondenseContext?: boolean | undefined browserToolEnabled?: boolean | undefined browserViewportSize?: string | undefined screenshotQuality?: number | undefined @@ -760,7 +759,6 @@ type IpcMessage = alwaysAllowSubtasks?: boolean | undefined alwaysAllowExecute?: boolean | undefined allowedCommands?: string[] | undefined - autoCondenseContext?: boolean | undefined browserToolEnabled?: boolean | undefined browserViewportSize?: string | undefined screenshotQuality?: number | undefined @@ -1223,7 +1221,6 @@ type TaskCommand = alwaysAllowSubtasks?: boolean | undefined alwaysAllowExecute?: boolean | undefined allowedCommands?: string[] | undefined - autoCondenseContext?: boolean | undefined browserToolEnabled?: boolean | undefined browserViewportSize?: string | undefined screenshotQuality?: number | undefined diff --git a/src/exports/types.ts b/src/exports/types.ts index bff26c68dd..4b71ad5d0b 100644 --- a/src/exports/types.ts +++ b/src/exports/types.ts @@ -70,7 +70,6 @@ type GlobalSettings = { alwaysAllowSubtasks?: boolean | undefined alwaysAllowExecute?: boolean | undefined allowedCommands?: string[] | undefined - autoCondenseContext?: boolean | undefined browserToolEnabled?: boolean | undefined browserViewportSize?: string | undefined screenshotQuality?: number | undefined @@ -772,7 +771,6 @@ type IpcMessage = alwaysAllowSubtasks?: boolean | undefined alwaysAllowExecute?: boolean | undefined allowedCommands?: string[] | undefined - autoCondenseContext?: boolean | undefined browserToolEnabled?: boolean | undefined browserViewportSize?: string | undefined screenshotQuality?: number | undefined @@ -1237,7 +1235,6 @@ type TaskCommand = alwaysAllowSubtasks?: boolean | undefined alwaysAllowExecute?: boolean | undefined allowedCommands?: string[] | undefined - autoCondenseContext?: boolean | undefined browserToolEnabled?: boolean | undefined browserViewportSize?: string | undefined screenshotQuality?: number | undefined diff --git a/src/schemas/index.ts b/src/schemas/index.ts index c9925d251c..70ea39f624 100644 --- a/src/schemas/index.ts +++ b/src/schemas/index.ts @@ -676,7 +676,6 @@ export const globalSettingsSchema = z.object({ alwaysAllowSubtasks: z.boolean().optional(), alwaysAllowExecute: z.boolean().optional(), allowedCommands: z.array(z.string()).optional(), - autoCondenseContext: z.boolean().optional(), browserToolEnabled: z.boolean().optional(), browserViewportSize: z.string().optional(), @@ -756,7 +755,6 @@ const globalSettingsRecord: GlobalSettingsRecord = { alwaysAllowSubtasks: undefined, alwaysAllowExecute: undefined, allowedCommands: undefined, - autoCondenseContext: undefined, browserToolEnabled: undefined, browserViewportSize: undefined, diff --git a/src/shared/ExtensionMessage.ts b/src/shared/ExtensionMessage.ts index 9f9365e71a..6330556024 100644 --- a/src/shared/ExtensionMessage.ts +++ b/src/shared/ExtensionMessage.ts @@ -168,7 +168,6 @@ export type ExtensionState = Pick< | "customModePrompts" | "customSupportPrompts" | "enhancementApiConfigId" - | "autoCondenseContext" > & { version: string clineMessages: ClineMessage[] diff --git a/src/shared/WebviewMessage.ts b/src/shared/WebviewMessage.ts index 2dc2e4038d..22fe5c7d3e 100644 --- a/src/shared/WebviewMessage.ts +++ b/src/shared/WebviewMessage.ts @@ -129,7 +129,6 @@ export interface WebviewMessage { | "maxReadFileLine" | "searchFiles" | "toggleApiConfigPin" - | "autoCondenseContext" | "setHistoryPreviewCollapsed" text?: string disabled?: boolean @@ -160,7 +159,6 @@ export interface WebviewMessage { hasSystemPromptOverride?: boolean terminalOperation?: "continue" | "abort" historyPreviewCollapsed?: boolean - autoCondenseContext?: boolean } export const checkoutDiffPayloadSchema = z.object({ diff --git a/webview-ui/src/components/settings/ExperimentalSettings.tsx b/webview-ui/src/components/settings/ExperimentalSettings.tsx index 1c34fcb920..6270ded6e8 100644 --- a/webview-ui/src/components/settings/ExperimentalSettings.tsx +++ b/webview-ui/src/components/settings/ExperimentalSettings.tsx @@ -46,14 +46,6 @@ export const ExperimentalSettings = ({ } /> ))} - - setExperimentEnabled(EXPERIMENT_IDS[config[0] as keyof typeof EXPERIMENT_IDS], enabled) - } - /> ) diff --git a/webview-ui/src/context/ExtensionStateContext.tsx b/webview-ui/src/context/ExtensionStateContext.tsx index 99efa2cf14..63c895f034 100644 --- a/webview-ui/src/context/ExtensionStateContext.tsx +++ b/webview-ui/src/context/ExtensionStateContext.tsx @@ -35,7 +35,6 @@ export interface ExtensionStateContextType extends ExtensionState { setAlwaysAllowMcp: (value: boolean) => void setAlwaysAllowModeSwitch: (value: boolean) => void setAlwaysAllowSubtasks: (value: boolean) => void - setAutoCondenseContext: (value: boolean) => void setBrowserToolEnabled: (value: boolean) => void setShowRooIgnoredFiles: (value: boolean) => void setShowAnnouncement: (value: boolean) => void @@ -174,7 +173,6 @@ export const ExtensionStateContextProvider: React.FC<{ children: React.ReactNode terminalZdotdir: false, // Default ZDOTDIR handling setting terminalCompressProgressBar: true, // Default to compress progress bar output historyPreviewCollapsed: false, // Initialize the new state (default to expanded) - autoCondenseContext: false, }) const [didHydrateState, setDidHydrateState] = useState(false) @@ -290,7 +288,6 @@ export const ExtensionStateContextProvider: React.FC<{ children: React.ReactNode setAlwaysAllowSubtasks: (value) => setState((prevState) => ({ ...prevState, alwaysAllowSubtasks: value })), setShowAnnouncement: (value) => setState((prevState) => ({ ...prevState, shouldShowAnnouncement: value })), setAllowedCommands: (value) => setState((prevState) => ({ ...prevState, allowedCommands: value })), - setAutoCondenseContext: (value) => setState((prevState) => ({ ...prevState, autoCondenseContext: value })), setSoundEnabled: (value) => setState((prevState) => ({ ...prevState, soundEnabled: value })), setSoundVolume: (value) => setState((prevState) => ({ ...prevState, soundVolume: value })), setTtsEnabled: (value) => setState((prevState) => ({ ...prevState, ttsEnabled: value })), From 63a82bfcf4989bae27411bf24ffd893b20623766 Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Wed, 14 May 2025 10:11:41 -0700 Subject: [PATCH 12/25] use experimental setting instead --- evals/packages/types/src/roo-code.ts | 3 +- src/core/task/Task.ts | 3 +- src/exports/roo-code.d.ts | 3 ++ src/exports/types.ts | 3 ++ src/schemas/index.ts | 3 +- src/shared/__tests__/experiments.test.ts | 32 +++++++++++++++++-- src/shared/experiments.ts | 2 ++ .../__tests__/ExtensionStateContext.test.tsx | 2 ++ webview-ui/src/i18n/locales/en/settings.json | 4 +++ 9 files changed, 49 insertions(+), 6 deletions(-) diff --git a/evals/packages/types/src/roo-code.ts b/evals/packages/types/src/roo-code.ts index cef4056ce7..0e1675b1de 100644 --- a/evals/packages/types/src/roo-code.ts +++ b/evals/packages/types/src/roo-code.ts @@ -297,7 +297,7 @@ export type CommandExecutionStatus = z.infer */ const experimentsSchema = z.object({ + autoCondenseContext: z.boolean(), powerSteering: z.boolean(), }) diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 2220daa156..697f07a22b 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -1421,7 +1421,6 @@ export class Task extends EventEmitter { enableMcpServerCreation, browserToolEnabled, language, - enableAutoContextCondensing, } = (await this.providerRef.deref()?.getState()) ?? {} const { customModes } = (await this.providerRef.deref()?.getState()) ?? {} @@ -1484,7 +1483,7 @@ export class Task extends EventEmitter { const contextWindow = modelInfo.contextWindow let condensedMessages - if (enableAutoContextCondensing) { + if (experiments?.autoCondenseContext) { condensedMessages = await summarizeConversationIfNeeded( this.apiConversationHistory, totalTokens, diff --git a/src/exports/roo-code.d.ts b/src/exports/roo-code.d.ts index 98290336ce..d8ee8edd34 100644 --- a/src/exports/roo-code.d.ts +++ b/src/exports/roo-code.d.ts @@ -100,6 +100,7 @@ type GlobalSettings = { fuzzyMatchThreshold?: number | undefined experiments?: | { + autoCondenseContext: boolean powerSteering: boolean } | undefined @@ -786,6 +787,7 @@ type IpcMessage = terminalCompressProgressBar?: boolean | undefined experiments?: | { + autoCondenseContext: boolean powerSteering: boolean } | undefined @@ -1248,6 +1250,7 @@ type TaskCommand = terminalCompressProgressBar?: boolean | undefined experiments?: | { + autoCondenseContext: boolean powerSteering: boolean } | undefined diff --git a/src/exports/types.ts b/src/exports/types.ts index 4b71ad5d0b..87e9af0603 100644 --- a/src/exports/types.ts +++ b/src/exports/types.ts @@ -100,6 +100,7 @@ type GlobalSettings = { fuzzyMatchThreshold?: number | undefined experiments?: | { + autoCondenseContext: boolean powerSteering: boolean } | undefined @@ -798,6 +799,7 @@ type IpcMessage = terminalCompressProgressBar?: boolean | undefined experiments?: | { + autoCondenseContext: boolean powerSteering: boolean } | undefined @@ -1262,6 +1264,7 @@ type TaskCommand = terminalCompressProgressBar?: boolean | undefined experiments?: | { + autoCondenseContext: boolean powerSteering: boolean } | undefined diff --git a/src/schemas/index.ts b/src/schemas/index.ts index 70ea39f624..4ac5c8806b 100644 --- a/src/schemas/index.ts +++ b/src/schemas/index.ts @@ -312,7 +312,7 @@ export type CommandExecutionStatus = z.infer */ const experimentsSchema = z.object({ + autoCondenseContext: z.boolean(), powerSteering: z.boolean(), }) diff --git a/src/shared/__tests__/experiments.test.ts b/src/shared/__tests__/experiments.test.ts index b68de1ced1..9d0e6dab9c 100644 --- a/src/shared/__tests__/experiments.test.ts +++ b/src/shared/__tests__/experiments.test.ts @@ -10,17 +10,28 @@ describe("experiments", () => { }) }) + describe("AUTO_CONDENSE_CONTEXT", () => { + it("is configured correctly", () => { + expect(EXPERIMENT_IDS.AUTO_CONDENSE_CONTEXT).toBe("autoCondenseContext") + expect(experimentConfigsMap.AUTO_CONDENSE_CONTEXT).toMatchObject({ + enabled: false, + }) + }) + }) + describe("isEnabled", () => { - it("returns false when experiment is not enabled", () => { + it("returns false when POWER_STEERING experiment is not enabled", () => { const experiments: Record = { powerSteering: false, + autoCondenseContext: false, } expect(Experiments.isEnabled(experiments, EXPERIMENT_IDS.POWER_STEERING)).toBe(false) }) - it("returns true when experiment is enabled", () => { + it("returns true when experiment POWER_STEERING is enabled", () => { const experiments: Record = { powerSteering: true, + autoCondenseContext: false, } expect(Experiments.isEnabled(experiments, EXPERIMENT_IDS.POWER_STEERING)).toBe(true) }) @@ -28,8 +39,25 @@ describe("experiments", () => { it("returns false when experiment is not present", () => { const experiments: Record = { powerSteering: false, + autoCondenseContext: false, } expect(Experiments.isEnabled(experiments, EXPERIMENT_IDS.POWER_STEERING)).toBe(false) }) + + it("returns false when AUTO_CONDENSE_CONTEXT experiment is not enabled", () => { + const experiments: Record = { + powerSteering: false, + autoCondenseContext: false, + } + expect(Experiments.isEnabled(experiments, EXPERIMENT_IDS.AUTO_CONDENSE_CONTEXT)).toBe(false) + }) + + it("returns true when AUTO_CONDENSE_CONTEXT experiment is enabled", () => { + const experiments: Record = { + powerSteering: false, + autoCondenseContext: true, + } + expect(Experiments.isEnabled(experiments, EXPERIMENT_IDS.AUTO_CONDENSE_CONTEXT)).toBe(true) + }) }) }) diff --git a/src/shared/experiments.ts b/src/shared/experiments.ts index b8917d8b10..0c9cba9c67 100644 --- a/src/shared/experiments.ts +++ b/src/shared/experiments.ts @@ -4,6 +4,7 @@ import { AssertEqual, Equals, Keys, Values } from "../utils/type-fu" export type { ExperimentId } export const EXPERIMENT_IDS = { + AUTO_CONDENSE_CONTEXT: "autoCondenseContext", POWER_STEERING: "powerSteering", } as const satisfies Record @@ -16,6 +17,7 @@ interface ExperimentConfig { } export const experimentConfigsMap: Record = { + AUTO_CONDENSE_CONTEXT: { enabled: false }, POWER_STEERING: { enabled: false }, } diff --git a/webview-ui/src/context/__tests__/ExtensionStateContext.test.tsx b/webview-ui/src/context/__tests__/ExtensionStateContext.test.tsx index 02e57b5e35..dbac052944 100644 --- a/webview-ui/src/context/__tests__/ExtensionStateContext.test.tsx +++ b/webview-ui/src/context/__tests__/ExtensionStateContext.test.tsx @@ -215,6 +215,7 @@ describe("mergeExtensionState", () => { apiConfiguration: { modelMaxThinkingTokens: 456, modelTemperature: 0.3 }, experiments: { powerSteering: true, + autoCondenseContext: true, } as Record, } @@ -227,6 +228,7 @@ describe("mergeExtensionState", () => { expect(result.experiments).toEqual({ powerSteering: true, + autoCondenseContext: true, }) }) }) diff --git a/webview-ui/src/i18n/locales/en/settings.json b/webview-ui/src/i18n/locales/en/settings.json index 12d94b4337..e01714bad9 100644 --- a/webview-ui/src/i18n/locales/en/settings.json +++ b/webview-ui/src/i18n/locales/en/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "Automatically condense the context window", + "description": "Uses an LLM call to summarize the past conversation when half of the task's context window is used." + }, "DIFF_STRATEGY_UNIFIED": { "name": "Use experimental unified diff strategy", "description": "Enable the experimental unified diff strategy. This strategy might reduce the number of retries caused by model errors but may cause unexpected behavior or incorrect edits. Only enable if you understand the risks and are willing to carefully review all changes." From 547d4d7618bdadc97d39e1fa70b35fb97210b45f Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Wed, 14 May 2025 10:24:43 -0700 Subject: [PATCH 13/25] add image-cleaning.test.ts --- .../__tests__/image-cleaning.test.ts | 336 ++++++++++++++++++ 1 file changed, 336 insertions(+) create mode 100644 src/api/transform/__tests__/image-cleaning.test.ts diff --git a/src/api/transform/__tests__/image-cleaning.test.ts b/src/api/transform/__tests__/image-cleaning.test.ts new file mode 100644 index 0000000000..cbb318531a --- /dev/null +++ b/src/api/transform/__tests__/image-cleaning.test.ts @@ -0,0 +1,336 @@ +import { ApiHandler } from "../.." +import { ApiMessage } from "../../../core/task-persistence/apiMessages" +import { maybeRemoveImageBlocks } from "../image-cleaning" +import { ModelInfo } from "../../../shared/api" + +describe("maybeRemoveImageBlocks", () => { + // Mock ApiHandler factory function + const createMockApiHandler = (supportsImages: boolean): ApiHandler => { + return { + getModel: jest.fn().mockReturnValue({ + id: "test-model", + info: { + supportsImages, + } as ModelInfo, + }), + createMessage: jest.fn(), + countTokens: jest.fn(), + } + } + + it("should handle empty messages array", () => { + const apiHandler = createMockApiHandler(true) + const messages: ApiMessage[] = [] + + const result = maybeRemoveImageBlocks(messages, apiHandler) + + expect(result).toEqual([]) + // No need to check if getModel was called since there are no messages to process + }) + + it("should not modify messages with no image blocks", () => { + const apiHandler = createMockApiHandler(true) + const messages: ApiMessage[] = [ + { + role: "user", + content: "Hello, world!", + }, + { + role: "assistant", + content: "Hi there!", + }, + ] + + const result = maybeRemoveImageBlocks(messages, apiHandler) + + expect(result).toEqual(messages) + // getModel is only called when content is an array, which is not the case here + }) + + it("should not modify messages with array content but no image blocks", () => { + const apiHandler = createMockApiHandler(true) + const messages: ApiMessage[] = [ + { + role: "user", + content: [ + { + type: "text", + text: "Hello, world!", + }, + { + type: "text", + text: "How are you?", + }, + ], + }, + ] + + const result = maybeRemoveImageBlocks(messages, apiHandler) + + expect(result).toEqual(messages) + expect(apiHandler.getModel).toHaveBeenCalled() + }) + + it("should not modify image blocks when API handler supports images", () => { + const apiHandler = createMockApiHandler(true) + const messages: ApiMessage[] = [ + { + role: "user", + content: [ + { + type: "text", + text: "Check out this image:", + }, + { + type: "image", + source: { + type: "base64", + media_type: "image/jpeg", + data: "base64-encoded-image-data", + }, + }, + ], + }, + ] + + const result = maybeRemoveImageBlocks(messages, apiHandler) + + // Should not modify the messages since the API handler supports images + expect(result).toEqual(messages) + expect(apiHandler.getModel).toHaveBeenCalled() + }) + + it("should convert image blocks to text descriptions when API handler doesn't support images", () => { + const apiHandler = createMockApiHandler(false) + const messages: ApiMessage[] = [ + { + role: "user", + content: [ + { + type: "text", + text: "Check out this image:", + }, + { + type: "image", + source: { + type: "base64", + media_type: "image/jpeg", + data: "base64-encoded-image-data", + }, + }, + ], + }, + ] + + const result = maybeRemoveImageBlocks(messages, apiHandler) + + // Should convert image blocks to text descriptions + expect(result).toEqual([ + { + role: "user", + content: [ + { + type: "text", + text: "Check out this image:", + }, + { + type: "text", + text: "[Referenced image in conversation]", + }, + ], + }, + ]) + expect(apiHandler.getModel).toHaveBeenCalled() + }) + + it("should handle mixed content messages with multiple text and image blocks", () => { + const apiHandler = createMockApiHandler(false) + const messages: ApiMessage[] = [ + { + role: "user", + content: [ + { + type: "text", + text: "Here are some images:", + }, + { + type: "image", + source: { + type: "base64", + media_type: "image/jpeg", + data: "image-data-1", + }, + }, + { + type: "text", + text: "And another one:", + }, + { + type: "image", + source: { + type: "base64", + media_type: "image/png", + data: "image-data-2", + }, + }, + ], + }, + ] + + const result = maybeRemoveImageBlocks(messages, apiHandler) + + // Should convert all image blocks to text descriptions + expect(result).toEqual([ + { + role: "user", + content: [ + { + type: "text", + text: "Here are some images:", + }, + { + type: "text", + text: "[Referenced image in conversation]", + }, + { + type: "text", + text: "And another one:", + }, + { + type: "text", + text: "[Referenced image in conversation]", + }, + ], + }, + ]) + expect(apiHandler.getModel).toHaveBeenCalled() + }) + + it("should handle multiple messages with image blocks", () => { + const apiHandler = createMockApiHandler(false) + const messages: ApiMessage[] = [ + { + role: "user", + content: [ + { + type: "text", + text: "Here's an image:", + }, + { + type: "image", + source: { + type: "base64", + media_type: "image/jpeg", + data: "image-data-1", + }, + }, + ], + }, + { + role: "assistant", + content: "I see the image!", + }, + { + role: "user", + content: [ + { + type: "text", + text: "Here's another image:", + }, + { + type: "image", + source: { + type: "base64", + media_type: "image/png", + data: "image-data-2", + }, + }, + ], + }, + ] + + const result = maybeRemoveImageBlocks(messages, apiHandler) + + // Should convert all image blocks to text descriptions + expect(result).toEqual([ + { + role: "user", + content: [ + { + type: "text", + text: "Here's an image:", + }, + { + type: "text", + text: "[Referenced image in conversation]", + }, + ], + }, + { + role: "assistant", + content: "I see the image!", + }, + { + role: "user", + content: [ + { + type: "text", + text: "Here's another image:", + }, + { + type: "text", + text: "[Referenced image in conversation]", + }, + ], + }, + ]) + expect(apiHandler.getModel).toHaveBeenCalled() + }) + + it("should preserve additional message properties", () => { + const apiHandler = createMockApiHandler(false) + const messages: ApiMessage[] = [ + { + role: "user", + content: [ + { + type: "text", + text: "Here's an image:", + }, + { + type: "image", + source: { + type: "base64", + media_type: "image/jpeg", + data: "image-data", + }, + }, + ], + ts: 1620000000000, + isSummary: true, + }, + ] + + const result = maybeRemoveImageBlocks(messages, apiHandler) + + // Should convert image blocks to text descriptions while preserving additional properties + expect(result).toEqual([ + { + role: "user", + content: [ + { + type: "text", + text: "Here's an image:", + }, + { + type: "text", + text: "[Referenced image in conversation]", + }, + ], + ts: 1620000000000, + isSummary: true, + }, + ]) + expect(apiHandler.getModel).toHaveBeenCalled() + }) +}) From 2b453ee74bcf1787e6fbfe2a5690a192767c2232 Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Wed, 14 May 2025 10:57:49 -0700 Subject: [PATCH 14/25] add a test for condensing --- src/core/condense/__tests__/index.test.ts | 276 ++++++++++++++++++++++ src/core/condense/index.ts | 4 +- 2 files changed, 278 insertions(+), 2 deletions(-) create mode 100644 src/core/condense/__tests__/index.test.ts diff --git a/src/core/condense/__tests__/index.test.ts b/src/core/condense/__tests__/index.test.ts new file mode 100644 index 0000000000..966f35ec0d --- /dev/null +++ b/src/core/condense/__tests__/index.test.ts @@ -0,0 +1,276 @@ +import { jest } from "@jest/globals" +import { ApiHandler } from "../../../api" +import { ApiMessage } from "../../task-persistence/apiMessages" +import { maybeRemoveImageBlocks } from "../../../api/transform/image-cleaning" +import { summarizeConversationIfNeeded, getMessagesSinceLastSummary } from "../index" +import { ApiStream, ApiStreamChunk } from "../../../api/transform/stream" +import { CONTEXT_FRAC_FOR_SUMMARY, N_MESSAGES_TO_KEEP } from "../index" + +const CONTEXT_WINDOW_SIZE = 1000 +const OVER_THRESHOLD_TOTAL_TOKENS = Math.ceil(CONTEXT_WINDOW_SIZE * CONTEXT_FRAC_FOR_SUMMARY) + 1 + +// Mock dependencies +jest.mock("../../../api/transform/image-cleaning", () => ({ + maybeRemoveImageBlocks: jest.fn((messages) => messages), +})) + +// Mock Anthropic SDK +jest.mock("@anthropic-ai/sdk", () => { + return { + default: jest.fn().mockImplementation(() => ({ + messages: { + create: jest.fn(), + }, + })), + } +}) + +describe("Conversation Condensing", () => { + // Mock API handler + const mockApiHandler: jest.Mocked = { + createMessage: jest.fn(), + } as unknown as jest.Mocked + + // Reset mocks before each test + beforeEach(() => { + jest.clearAllMocks() + + // Setup default mock for createMessage + mockApiHandler.createMessage.mockImplementation((): ApiStream => { + return (async function* (): AsyncGenerator { + yield { type: "text", text: "This is a summary of the conversation." } + })() + }) + }) + + describe("getMessagesSinceLastSummary", () => { + it("should return all messages if there is no summary", () => { + const messages: ApiMessage[] = [ + { role: "user", content: "Hello", ts: 1 }, + { role: "assistant", content: "Hi there", ts: 2 }, + ] + + const result = getMessagesSinceLastSummary(messages) + expect(result).toEqual(messages) + }) + + it("should return messages since the last summary", () => { + const messages: ApiMessage[] = [ + { role: "user", content: "First message", ts: 1 }, + { role: "assistant", content: "Summary of conversation", ts: 2, isSummary: true }, + { role: "user", content: "New message", ts: 3 }, + { role: "assistant", content: "Response", ts: 4 }, + ] + + const result = getMessagesSinceLastSummary(messages) + expect(result).toEqual([ + { role: "assistant", content: "Summary of conversation", ts: 2, isSummary: true }, + { role: "user", content: "New message", ts: 3 }, + { role: "assistant", content: "Response", ts: 4 }, + ]) + }) + + it("should handle multiple summary messages and return since the last one", () => { + const messages: ApiMessage[] = [ + { role: "user", content: "First message", ts: 1 }, + { role: "assistant", content: "First summary", ts: 2, isSummary: true }, + { role: "user", content: "Second message", ts: 3 }, + { role: "assistant", content: "Second summary", ts: 4, isSummary: true }, + { role: "user", content: "New message", ts: 5 }, + ] + + const result = getMessagesSinceLastSummary(messages) + expect(result).toEqual([ + { role: "assistant", content: "Second summary", ts: 4, isSummary: true }, + { role: "user", content: "New message", ts: 5 }, + ]) + }) + + it("should handle empty message array", () => { + const messages: ApiMessage[] = [] + const result = getMessagesSinceLastSummary(messages) + expect(result).toEqual([]) + }) + }) + + describe("summarizeConversationIfNeeded", () => { + it("should not summarize when below token threshold", async () => { + const messages: ApiMessage[] = [ + { role: "user", content: "Hello", ts: 1 }, + { role: "assistant", content: "Hi there", ts: 2 }, + ] + + const totalTokens = 100 + const result = await summarizeConversationIfNeeded( + messages, + totalTokens, + CONTEXT_WINDOW_SIZE, + mockApiHandler, + ) + + expect(result).toBe(messages) + expect(mockApiHandler.createMessage).not.toHaveBeenCalled() + }) + + it("should summarize when above token threshold", async () => { + const messages: ApiMessage[] = [ + { role: "user", content: "Message 1", ts: 1 }, + { role: "assistant", content: "Response 1", ts: 2 }, + { role: "user", content: "Message 2", ts: 3 }, + { role: "assistant", content: "Response 2", ts: 4 }, + { role: "user", content: "Message 3", ts: 5 }, + { role: "assistant", content: "Response 3", ts: 6 }, + { role: "user", content: "Message 4", ts: 7 }, + ] + + const result = await summarizeConversationIfNeeded( + messages, + OVER_THRESHOLD_TOTAL_TOKENS, + CONTEXT_WINDOW_SIZE, + mockApiHandler, + ) + + // Should have called createMessage + expect(mockApiHandler.createMessage).toHaveBeenCalled() + + // Should have a summary message inserted + expect(result.some((msg) => msg.isSummary)).toBe(true) + + // Should preserve the last N_MESSAGES_TO_KEEP messages + for (let i = 1; i <= N_MESSAGES_TO_KEEP; i++) { + expect(result).toContainEqual(messages[messages.length - i]) + } + }) + + it("should not summarize if there are not enough messages", async () => { + const messages: ApiMessage[] = [{ role: "user", content: "Hello", ts: 1 }] + + const result = await summarizeConversationIfNeeded( + messages, + OVER_THRESHOLD_TOTAL_TOKENS, + CONTEXT_WINDOW_SIZE, + mockApiHandler, + ) + + expect(result).toBe(messages) + expect(mockApiHandler.createMessage).not.toHaveBeenCalled() + }) + + it("should not summarize if we recently summarized", async () => { + const messages: ApiMessage[] = [ + { role: "user", content: "Message 1", ts: 1 }, + { role: "assistant", content: "Response 1", ts: 2 }, + { role: "user", content: "Message 2", ts: 3 }, + { role: "assistant", content: "Summary", ts: 4, isSummary: true }, + { role: "user", content: "Message 3", ts: 5 }, + ] + + const result = await summarizeConversationIfNeeded( + messages, + OVER_THRESHOLD_TOTAL_TOKENS, + CONTEXT_WINDOW_SIZE, + mockApiHandler, + ) + + // Should not have called createMessage because one of the last 3 messages is already a summary + expect(mockApiHandler.createMessage).not.toHaveBeenCalled() + expect(result).toBe(messages) + }) + + it("should handle empty API response", async () => { + // Setup mock to return empty summary + mockApiHandler.createMessage.mockImplementation((): ApiStream => { + return (async function* (): AsyncGenerator { + yield { type: "text", text: "" } + })() + }) + + const messages: ApiMessage[] = [ + { role: "user", content: "Message 1", ts: 1 }, + { role: "assistant", content: "Response 1", ts: 2 }, + { role: "user", content: "Message 2", ts: 3 }, + { role: "assistant", content: "Response 2", ts: 4 }, + { role: "user", content: "Message 3", ts: 5 }, + { role: "assistant", content: "Response 3", ts: 6 }, + { role: "user", content: "Message 4", ts: 7 }, + ] + + const consoleSpy = jest.spyOn(console, "warn").mockImplementation(() => {}) + + const result = await summarizeConversationIfNeeded( + messages, + OVER_THRESHOLD_TOTAL_TOKENS, + CONTEXT_WINDOW_SIZE, + mockApiHandler, + ) + + // Should have called createMessage + expect(mockApiHandler.createMessage).toHaveBeenCalled() + + // Should have logged a warning + expect(consoleSpy).toHaveBeenCalledWith("Received empty summary from API") + + // Should return original messages + expect(result).toBe(messages) + + consoleSpy.mockRestore() + }) + + it("should correctly handle non-text chunks in API response", async () => { + // Setup mock to return mixed chunks + mockApiHandler.createMessage.mockImplementation((): ApiStream => { + return (async function* (): AsyncGenerator { + yield { type: "text", text: "This is " } as ApiStreamChunk + yield { type: "text", text: "a summary." } as ApiStreamChunk + })() + }) + + const messages: ApiMessage[] = [ + { role: "user", content: "Message 1", ts: 1 }, + { role: "assistant", content: "Response 1", ts: 2 }, + { role: "user", content: "Message 2", ts: 3 }, + { role: "assistant", content: "Response 2", ts: 4 }, + { role: "user", content: "Message 3", ts: 5 }, + { role: "assistant", content: "Response 3", ts: 6 }, + { role: "user", content: "Message 4", ts: 7 }, + ] + + const result = await summarizeConversationIfNeeded( + messages, + OVER_THRESHOLD_TOTAL_TOKENS, + CONTEXT_WINDOW_SIZE, + mockApiHandler, + ) + + // Should have called createMessage + expect(mockApiHandler.createMessage).toHaveBeenCalled() + + // Should have a summary message with the correct content + const summaryMessage = result.find((msg) => msg.isSummary) + expect(summaryMessage).toBeDefined() + expect(summaryMessage?.content).toBe("This is a summary.") + }) + + it("should use maybeRemoveImageBlocks when preparing messages for summarization", async () => { + const messages: ApiMessage[] = [ + { role: "user", content: "Message 1", ts: 1 }, + { role: "assistant", content: "Response 1", ts: 2 }, + { role: "user", content: "Message 2", ts: 3 }, + { role: "assistant", content: "Response 2", ts: 4 }, + { role: "user", content: "Message 3", ts: 5 }, + { role: "assistant", content: "Response 3", ts: 6 }, + { role: "user", content: "Message 4", ts: 7 }, + ] + + await summarizeConversationIfNeeded( + messages, + OVER_THRESHOLD_TOTAL_TOKENS, + CONTEXT_WINDOW_SIZE, + mockApiHandler, + ) + + // Should have called maybeRemoveImageBlocks + expect(maybeRemoveImageBlocks).toHaveBeenCalled() + }) + }) +}) diff --git a/src/core/condense/index.ts b/src/core/condense/index.ts index 9e9b60596e..c4e20bb65d 100644 --- a/src/core/condense/index.ts +++ b/src/core/condense/index.ts @@ -3,8 +3,8 @@ import { ApiHandler } from "../../api" import { ApiMessage } from "../task-persistence/apiMessages" import { maybeRemoveImageBlocks } from "../../api/transform/image-cleaning" -const CONTEXT_FRAC_FOR_SUMMARY = 0.5 // TODO(canyon): make this configurable -const N_MESSAGES_TO_KEEP = 3 +export const CONTEXT_FRAC_FOR_SUMMARY = 0.5 +export const N_MESSAGES_TO_KEEP = 3 const SUMMARY_PROMPT = `\ Your task is to create a detailed summary of the conversation so far, paying close attention to the user's explicit requests and your previous actions. From 7679ad351ef10214edc152bd3c651f324a374d62 Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Wed, 14 May 2025 11:43:52 -0700 Subject: [PATCH 15/25] update translations, thanks Roo --- webview-ui/src/i18n/locales/ca/settings.json | 4 ++++ webview-ui/src/i18n/locales/de/settings.json | 4 ++++ webview-ui/src/i18n/locales/es/settings.json | 4 ++++ webview-ui/src/i18n/locales/fr/settings.json | 4 ++++ webview-ui/src/i18n/locales/hi/settings.json | 4 ++++ webview-ui/src/i18n/locales/it/settings.json | 4 ++++ webview-ui/src/i18n/locales/ja/settings.json | 4 ++++ webview-ui/src/i18n/locales/ko/settings.json | 4 ++++ webview-ui/src/i18n/locales/nl/settings.json | 4 ++++ webview-ui/src/i18n/locales/pl/settings.json | 4 ++++ webview-ui/src/i18n/locales/pt-BR/settings.json | 4 ++++ webview-ui/src/i18n/locales/ru/settings.json | 4 ++++ webview-ui/src/i18n/locales/tr/settings.json | 4 ++++ webview-ui/src/i18n/locales/vi/settings.json | 4 ++++ webview-ui/src/i18n/locales/zh-CN/settings.json | 4 ++++ webview-ui/src/i18n/locales/zh-TW/settings.json | 4 ++++ 16 files changed, 64 insertions(+) diff --git a/webview-ui/src/i18n/locales/ca/settings.json b/webview-ui/src/i18n/locales/ca/settings.json index c9e1cb5606..33ca855c40 100644 --- a/webview-ui/src/i18n/locales/ca/settings.json +++ b/webview-ui/src/i18n/locales/ca/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "Condensar automàticament la finestra de context", + "description": "Utilitza una crida LLM per resumir la conversa anterior quan s'utilitza la meitat de la finestra de context de la tasca." + }, "DIFF_STRATEGY_UNIFIED": { "name": "Utilitzar estratègia diff unificada experimental", "description": "Activar l'estratègia diff unificada experimental. Aquesta estratègia podria reduir el nombre de reintents causats per errors del model, però pot causar comportaments inesperats o edicions incorrectes. Activeu-la només si enteneu els riscos i esteu disposats a revisar acuradament tots els canvis." diff --git a/webview-ui/src/i18n/locales/de/settings.json b/webview-ui/src/i18n/locales/de/settings.json index aac308c4a1..f338e7d7e9 100644 --- a/webview-ui/src/i18n/locales/de/settings.json +++ b/webview-ui/src/i18n/locales/de/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "Kontextfenster automatisch komprimieren", + "description": "Verwendet einen LLM-Aufruf, um das vergangene Gespräch zusammenzufassen, wenn die Hälfte des Aufgabenkontextfensters verwendet wird." + }, "DIFF_STRATEGY_UNIFIED": { "name": "Experimentelle einheitliche Diff-Strategie verwenden", "description": "Aktiviert die experimentelle einheitliche Diff-Strategie. Diese Strategie könnte die Anzahl der durch Modellfehler verursachten Wiederholungen reduzieren, kann aber unerwartetes Verhalten oder falsche Bearbeitungen verursachen. Nur aktivieren, wenn du die Risiken verstehst und bereit bist, alle Änderungen sorgfältig zu überprüfen." diff --git a/webview-ui/src/i18n/locales/es/settings.json b/webview-ui/src/i18n/locales/es/settings.json index f862f13405..40c468b5bb 100644 --- a/webview-ui/src/i18n/locales/es/settings.json +++ b/webview-ui/src/i18n/locales/es/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "Condensar automáticamente la ventana de contexto", + "description": "Utiliza una llamada LLM para resumir la conversación anterior cuando se utiliza la mitad de la ventana de contexto de la tarea." + }, "DIFF_STRATEGY_UNIFIED": { "name": "Usar estrategia de diff unificada experimental", "description": "Habilitar la estrategia de diff unificada experimental. Esta estrategia podría reducir el número de reintentos causados por errores del modelo, pero puede causar comportamientos inesperados o ediciones incorrectas. Habilítela solo si comprende los riesgos y está dispuesto a revisar cuidadosamente todos los cambios." diff --git a/webview-ui/src/i18n/locales/fr/settings.json b/webview-ui/src/i18n/locales/fr/settings.json index 9f694c5aa3..68efaf7788 100644 --- a/webview-ui/src/i18n/locales/fr/settings.json +++ b/webview-ui/src/i18n/locales/fr/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "Condenser automatiquement la fenêtre de contexte", + "description": "Utilise un appel LLM pour résumer la conversation passée lorsque la moitié de la fenêtre de contexte de la tâche est utilisée." + }, "DIFF_STRATEGY_UNIFIED": { "name": "Utiliser la stratégie diff unifiée expérimentale", "description": "Activer la stratégie diff unifiée expérimentale. Cette stratégie pourrait réduire le nombre de tentatives causées par des erreurs de modèle, mais peut provoquer des comportements inattendus ou des modifications incorrectes. Activez-la uniquement si vous comprenez les risques et êtes prêt à examiner attentivement tous les changements." diff --git a/webview-ui/src/i18n/locales/hi/settings.json b/webview-ui/src/i18n/locales/hi/settings.json index 0b21ed906b..27396792b8 100644 --- a/webview-ui/src/i18n/locales/hi/settings.json +++ b/webview-ui/src/i18n/locales/hi/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "स्वचालित रूप से संदर्भ विंडो को संघनित करें", + "description": "कार्य के संदर्भ विंडो का आधा उपयोग होने पर पिछली बातचीत को सारांशित करने के लिए एक LLM कॉल का उपयोग करता है।" + }, "DIFF_STRATEGY_UNIFIED": { "name": "प्रायोगिक एकीकृत diff रणनीति का उपयोग करें", "description": "प्रायोगिक एकीकृत diff रणनीति सक्षम करें। यह रणनीति मॉडल त्रुटियों के कारण पुनः प्रयासों की संख्या को कम कर सकती है, लेकिन अप्रत्याशित व्यवहार या गलत संपादन का कारण बन सकती है। केवल तभी सक्षम करें जब आप जोखिमों को समझते हों और सभी परिवर्तनों की सावधानीपूर्वक समीक्षा करने के लिए तैयार हों।" diff --git a/webview-ui/src/i18n/locales/it/settings.json b/webview-ui/src/i18n/locales/it/settings.json index a3e210c480..8e192dee42 100644 --- a/webview-ui/src/i18n/locales/it/settings.json +++ b/webview-ui/src/i18n/locales/it/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "Condensa automaticamente la finestra di contesto", + "description": "Utilizza una chiamata LLM per riassumere la conversazione passata quando viene utilizzata metà della finestra di contesto dell'attività." + }, "DIFF_STRATEGY_UNIFIED": { "name": "Usa strategia diff unificata sperimentale", "description": "Abilita la strategia diff unificata sperimentale. Questa strategia potrebbe ridurre il numero di tentativi causati da errori del modello, ma può causare comportamenti imprevisti o modifiche errate. Abilitala solo se comprendi i rischi e sei disposto a rivedere attentamente tutte le modifiche." diff --git a/webview-ui/src/i18n/locales/ja/settings.json b/webview-ui/src/i18n/locales/ja/settings.json index 1f1ec12c54..1c0246f694 100644 --- a/webview-ui/src/i18n/locales/ja/settings.json +++ b/webview-ui/src/i18n/locales/ja/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "コンテキストウィンドウを自動的に要約する", + "description": "タスクのコンテキストウィンドウの半分が使用されると、LLM呼び出しを使用して過去の会話を要約します。" + }, "DIFF_STRATEGY_UNIFIED": { "name": "実験的な統合diff戦略を使用する", "description": "実験的な統合diff戦略を有効にします。この戦略はモデルエラーによる再試行の回数を減らす可能性がありますが、予期しない動作や不正確な編集を引き起こす可能性があります。リスクを理解し、すべての変更を注意深く確認する準備がある場合にのみ有効にしてください。" diff --git a/webview-ui/src/i18n/locales/ko/settings.json b/webview-ui/src/i18n/locales/ko/settings.json index 95195ff0a2..cd533e9ef2 100644 --- a/webview-ui/src/i18n/locales/ko/settings.json +++ b/webview-ui/src/i18n/locales/ko/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "컨텍스트 창을 자동으로 요약", + "description": "작업의 컨텍스트 창의 절반이 사용되면 LLM 호출을 사용하여 과거 대화를 요약합니다." + }, "DIFF_STRATEGY_UNIFIED": { "name": "실험적 통합 diff 전략 사용", "description": "실험적 통합 diff 전략을 활성화합니다. 이 전략은 모델 오류로 인한 재시도 횟수를 줄일 수 있지만 예기치 않은 동작이나 잘못된 편집을 일으킬 수 있습니다. 위험을 이해하고 모든 변경 사항을 신중하게 검토할 의향이 있는 경우에만 활성화하십시오." diff --git a/webview-ui/src/i18n/locales/nl/settings.json b/webview-ui/src/i18n/locales/nl/settings.json index e812d78f26..e2ba15a8c8 100644 --- a/webview-ui/src/i18n/locales/nl/settings.json +++ b/webview-ui/src/i18n/locales/nl/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "Automatisch het contextvenster comprimeren", + "description": "Gebruikt een LLM-aanroep om het eerdere gesprek samen te vatten wanneer de helft van het contextvenster van de taak is gebruikt." + }, "DIFF_STRATEGY_UNIFIED": { "name": "Experimentele unified diff-strategie gebruiken", "description": "Schakel de experimentele unified diff-strategie in. Deze strategie kan het aantal herhalingen door model fouten verminderen, maar kan onverwacht gedrag of onjuiste bewerkingen veroorzaken. Alleen inschakelen als je de risico's begrijpt en wijzigingen zorgvuldig wilt controleren." diff --git a/webview-ui/src/i18n/locales/pl/settings.json b/webview-ui/src/i18n/locales/pl/settings.json index 343ce01397..5576ed0ce4 100644 --- a/webview-ui/src/i18n/locales/pl/settings.json +++ b/webview-ui/src/i18n/locales/pl/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "Automatycznie kondensuj okno kontekstu", + "description": "Używa wywołania LLM do podsumowania poprzedniej konwersacji, gdy wykorzystana jest połowa okna kontekstu zadania." + }, "DIFF_STRATEGY_UNIFIED": { "name": "Użyj eksperymentalnej ujednoliconej strategii diff", "description": "Włącz eksperymentalną ujednoliconą strategię diff. Ta strategia może zmniejszyć liczbę ponownych prób spowodowanych błędami modelu, ale może powodować nieoczekiwane zachowanie lub nieprawidłowe edycje. Włącz tylko jeśli rozumiesz ryzyko i jesteś gotów dokładnie przeglądać wszystkie zmiany." diff --git a/webview-ui/src/i18n/locales/pt-BR/settings.json b/webview-ui/src/i18n/locales/pt-BR/settings.json index 0c66a21847..1d74ea61a9 100644 --- a/webview-ui/src/i18n/locales/pt-BR/settings.json +++ b/webview-ui/src/i18n/locales/pt-BR/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "Condensar automaticamente a janela de contexto", + "description": "Usa uma chamada LLM para resumir a conversa anterior quando metade da janela de contexto da tarefa é usada." + }, "DIFF_STRATEGY_UNIFIED": { "name": "Usar estratégia diff unificada experimental", "description": "Ativar a estratégia diff unificada experimental. Esta estratégia pode reduzir o número de novas tentativas causadas por erros do modelo, mas pode causar comportamento inesperado ou edições incorretas. Ative apenas se compreender os riscos e estiver disposto a revisar cuidadosamente todas as alterações." diff --git a/webview-ui/src/i18n/locales/ru/settings.json b/webview-ui/src/i18n/locales/ru/settings.json index 376763a7d8..796725a3c7 100644 --- a/webview-ui/src/i18n/locales/ru/settings.json +++ b/webview-ui/src/i18n/locales/ru/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "Автоматически сжимать окно контекста", + "description": "Использует вызов LLM для обобщения прошлого разговора, когда используется половина окна контекста задачи." + }, "DIFF_STRATEGY_UNIFIED": { "name": "Использовать экспериментальную стратегию унифицированного диффа", "description": "Включает экспериментальную стратегию унифицированного диффа. Может уменьшить количество повторных попыток из-за ошибок модели, но может привести к неожиданному поведению или неверным правкам. Включайте только если готовы внимательно проверять все изменения." diff --git a/webview-ui/src/i18n/locales/tr/settings.json b/webview-ui/src/i18n/locales/tr/settings.json index 1d23b7e04e..b32344c383 100644 --- a/webview-ui/src/i18n/locales/tr/settings.json +++ b/webview-ui/src/i18n/locales/tr/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "Bağlam penceresini otomatik olarak yoğunlaştır", + "description": "Görevin bağlam penceresinin yarısı kullanıldığında, geçmiş konuşmayı özetlemek için bir LLM çağrısı kullanır." + }, "DIFF_STRATEGY_UNIFIED": { "name": "Deneysel birleştirilmiş diff stratejisini kullan", "description": "Deneysel birleştirilmiş diff stratejisini etkinleştir. Bu strateji, model hatalarından kaynaklanan yeniden deneme sayısını azaltabilir, ancak beklenmeyen davranışlara veya hatalı düzenlemelere neden olabilir. Yalnızca riskleri anlıyorsanız ve tüm değişiklikleri dikkatlice incelemeye istekliyseniz etkinleştirin." diff --git a/webview-ui/src/i18n/locales/vi/settings.json b/webview-ui/src/i18n/locales/vi/settings.json index f964bf4b7a..cf82b0288c 100644 --- a/webview-ui/src/i18n/locales/vi/settings.json +++ b/webview-ui/src/i18n/locales/vi/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "Tự động cô đọng cửa sổ ngữ cảnh", + "description": "Sử dụng cuộc gọi LLM để tóm tắt cuộc hội thoại trước đó khi một nửa cửa sổ ngữ cảnh của tác vụ được sử dụng." + }, "DIFF_STRATEGY_UNIFIED": { "name": "Sử dụng chiến lược diff thống nhất thử nghiệm", "description": "Bật chiến lược diff thống nhất thử nghiệm. Chiến lược này có thể giảm số lần thử lại do lỗi mô hình nhưng có thể gây ra hành vi không mong muốn hoặc chỉnh sửa không chính xác. Chỉ bật nếu bạn hiểu rõ các rủi ro và sẵn sàng xem xét cẩn thận tất cả các thay đổi." diff --git a/webview-ui/src/i18n/locales/zh-CN/settings.json b/webview-ui/src/i18n/locales/zh-CN/settings.json index ba14e48762..ec179ee77f 100644 --- a/webview-ui/src/i18n/locales/zh-CN/settings.json +++ b/webview-ui/src/i18n/locales/zh-CN/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "自动压缩上下文窗口", + "description": "当任务的上下文窗口使用了一半时,使用LLM调用来总结过去的对话。" + }, "DIFF_STRATEGY_UNIFIED": { "name": "启用diff更新工具", "description": "可减少因模型错误导致的重复尝试,但可能引发意外操作。启用前请确保理解风险并会仔细检查所有修改。" diff --git a/webview-ui/src/i18n/locales/zh-TW/settings.json b/webview-ui/src/i18n/locales/zh-TW/settings.json index 78c994457e..067ef7400f 100644 --- a/webview-ui/src/i18n/locales/zh-TW/settings.json +++ b/webview-ui/src/i18n/locales/zh-TW/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "自動壓縮上下文窗口", + "description": "當任務的上下文窗口使用了一半時,使用LLM調用來總結過去的對話。" + }, "DIFF_STRATEGY_UNIFIED": { "name": "使用實驗性統一差異比對策略", "description": "啟用實驗性的統一差異比對策略。此策略可能減少因模型錯誤而導致的重試次數,但也可能導致意外行為或錯誤的編輯。請務必了解風險,並願意仔細檢查所有變更後再啟用。" From 160932af32d4b22ecc2449ed99950adb625cd58e Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Wed, 14 May 2025 12:13:55 -0700 Subject: [PATCH 16/25] Revert "update translations, thanks Roo" This reverts commit 7679ad351ef10214edc152bd3c651f324a374d62. --- webview-ui/src/i18n/locales/ca/settings.json | 4 ---- webview-ui/src/i18n/locales/de/settings.json | 4 ---- webview-ui/src/i18n/locales/es/settings.json | 4 ---- webview-ui/src/i18n/locales/fr/settings.json | 4 ---- webview-ui/src/i18n/locales/hi/settings.json | 4 ---- webview-ui/src/i18n/locales/it/settings.json | 4 ---- webview-ui/src/i18n/locales/ja/settings.json | 4 ---- webview-ui/src/i18n/locales/ko/settings.json | 4 ---- webview-ui/src/i18n/locales/nl/settings.json | 4 ---- webview-ui/src/i18n/locales/pl/settings.json | 4 ---- webview-ui/src/i18n/locales/pt-BR/settings.json | 4 ---- webview-ui/src/i18n/locales/ru/settings.json | 4 ---- webview-ui/src/i18n/locales/tr/settings.json | 4 ---- webview-ui/src/i18n/locales/vi/settings.json | 4 ---- webview-ui/src/i18n/locales/zh-CN/settings.json | 4 ---- webview-ui/src/i18n/locales/zh-TW/settings.json | 4 ---- 16 files changed, 64 deletions(-) diff --git a/webview-ui/src/i18n/locales/ca/settings.json b/webview-ui/src/i18n/locales/ca/settings.json index 33ca855c40..c9e1cb5606 100644 --- a/webview-ui/src/i18n/locales/ca/settings.json +++ b/webview-ui/src/i18n/locales/ca/settings.json @@ -396,10 +396,6 @@ }, "experimental": { "warning": "⚠️", - "AUTO_CONDENSE_CONTEXT": { - "name": "Condensar automàticament la finestra de context", - "description": "Utilitza una crida LLM per resumir la conversa anterior quan s'utilitza la meitat de la finestra de context de la tasca." - }, "DIFF_STRATEGY_UNIFIED": { "name": "Utilitzar estratègia diff unificada experimental", "description": "Activar l'estratègia diff unificada experimental. Aquesta estratègia podria reduir el nombre de reintents causats per errors del model, però pot causar comportaments inesperats o edicions incorrectes. Activeu-la només si enteneu els riscos i esteu disposats a revisar acuradament tots els canvis." diff --git a/webview-ui/src/i18n/locales/de/settings.json b/webview-ui/src/i18n/locales/de/settings.json index f338e7d7e9..aac308c4a1 100644 --- a/webview-ui/src/i18n/locales/de/settings.json +++ b/webview-ui/src/i18n/locales/de/settings.json @@ -396,10 +396,6 @@ }, "experimental": { "warning": "⚠️", - "AUTO_CONDENSE_CONTEXT": { - "name": "Kontextfenster automatisch komprimieren", - "description": "Verwendet einen LLM-Aufruf, um das vergangene Gespräch zusammenzufassen, wenn die Hälfte des Aufgabenkontextfensters verwendet wird." - }, "DIFF_STRATEGY_UNIFIED": { "name": "Experimentelle einheitliche Diff-Strategie verwenden", "description": "Aktiviert die experimentelle einheitliche Diff-Strategie. Diese Strategie könnte die Anzahl der durch Modellfehler verursachten Wiederholungen reduzieren, kann aber unerwartetes Verhalten oder falsche Bearbeitungen verursachen. Nur aktivieren, wenn du die Risiken verstehst und bereit bist, alle Änderungen sorgfältig zu überprüfen." diff --git a/webview-ui/src/i18n/locales/es/settings.json b/webview-ui/src/i18n/locales/es/settings.json index 40c468b5bb..f862f13405 100644 --- a/webview-ui/src/i18n/locales/es/settings.json +++ b/webview-ui/src/i18n/locales/es/settings.json @@ -396,10 +396,6 @@ }, "experimental": { "warning": "⚠️", - "AUTO_CONDENSE_CONTEXT": { - "name": "Condensar automáticamente la ventana de contexto", - "description": "Utiliza una llamada LLM para resumir la conversación anterior cuando se utiliza la mitad de la ventana de contexto de la tarea." - }, "DIFF_STRATEGY_UNIFIED": { "name": "Usar estrategia de diff unificada experimental", "description": "Habilitar la estrategia de diff unificada experimental. Esta estrategia podría reducir el número de reintentos causados por errores del modelo, pero puede causar comportamientos inesperados o ediciones incorrectas. Habilítela solo si comprende los riesgos y está dispuesto a revisar cuidadosamente todos los cambios." diff --git a/webview-ui/src/i18n/locales/fr/settings.json b/webview-ui/src/i18n/locales/fr/settings.json index 68efaf7788..9f694c5aa3 100644 --- a/webview-ui/src/i18n/locales/fr/settings.json +++ b/webview-ui/src/i18n/locales/fr/settings.json @@ -396,10 +396,6 @@ }, "experimental": { "warning": "⚠️", - "AUTO_CONDENSE_CONTEXT": { - "name": "Condenser automatiquement la fenêtre de contexte", - "description": "Utilise un appel LLM pour résumer la conversation passée lorsque la moitié de la fenêtre de contexte de la tâche est utilisée." - }, "DIFF_STRATEGY_UNIFIED": { "name": "Utiliser la stratégie diff unifiée expérimentale", "description": "Activer la stratégie diff unifiée expérimentale. Cette stratégie pourrait réduire le nombre de tentatives causées par des erreurs de modèle, mais peut provoquer des comportements inattendus ou des modifications incorrectes. Activez-la uniquement si vous comprenez les risques et êtes prêt à examiner attentivement tous les changements." diff --git a/webview-ui/src/i18n/locales/hi/settings.json b/webview-ui/src/i18n/locales/hi/settings.json index 27396792b8..0b21ed906b 100644 --- a/webview-ui/src/i18n/locales/hi/settings.json +++ b/webview-ui/src/i18n/locales/hi/settings.json @@ -396,10 +396,6 @@ }, "experimental": { "warning": "⚠️", - "AUTO_CONDENSE_CONTEXT": { - "name": "स्वचालित रूप से संदर्भ विंडो को संघनित करें", - "description": "कार्य के संदर्भ विंडो का आधा उपयोग होने पर पिछली बातचीत को सारांशित करने के लिए एक LLM कॉल का उपयोग करता है।" - }, "DIFF_STRATEGY_UNIFIED": { "name": "प्रायोगिक एकीकृत diff रणनीति का उपयोग करें", "description": "प्रायोगिक एकीकृत diff रणनीति सक्षम करें। यह रणनीति मॉडल त्रुटियों के कारण पुनः प्रयासों की संख्या को कम कर सकती है, लेकिन अप्रत्याशित व्यवहार या गलत संपादन का कारण बन सकती है। केवल तभी सक्षम करें जब आप जोखिमों को समझते हों और सभी परिवर्तनों की सावधानीपूर्वक समीक्षा करने के लिए तैयार हों।" diff --git a/webview-ui/src/i18n/locales/it/settings.json b/webview-ui/src/i18n/locales/it/settings.json index 8e192dee42..a3e210c480 100644 --- a/webview-ui/src/i18n/locales/it/settings.json +++ b/webview-ui/src/i18n/locales/it/settings.json @@ -396,10 +396,6 @@ }, "experimental": { "warning": "⚠️", - "AUTO_CONDENSE_CONTEXT": { - "name": "Condensa automaticamente la finestra di contesto", - "description": "Utilizza una chiamata LLM per riassumere la conversazione passata quando viene utilizzata metà della finestra di contesto dell'attività." - }, "DIFF_STRATEGY_UNIFIED": { "name": "Usa strategia diff unificata sperimentale", "description": "Abilita la strategia diff unificata sperimentale. Questa strategia potrebbe ridurre il numero di tentativi causati da errori del modello, ma può causare comportamenti imprevisti o modifiche errate. Abilitala solo se comprendi i rischi e sei disposto a rivedere attentamente tutte le modifiche." diff --git a/webview-ui/src/i18n/locales/ja/settings.json b/webview-ui/src/i18n/locales/ja/settings.json index 1c0246f694..1f1ec12c54 100644 --- a/webview-ui/src/i18n/locales/ja/settings.json +++ b/webview-ui/src/i18n/locales/ja/settings.json @@ -396,10 +396,6 @@ }, "experimental": { "warning": "⚠️", - "AUTO_CONDENSE_CONTEXT": { - "name": "コンテキストウィンドウを自動的に要約する", - "description": "タスクのコンテキストウィンドウの半分が使用されると、LLM呼び出しを使用して過去の会話を要約します。" - }, "DIFF_STRATEGY_UNIFIED": { "name": "実験的な統合diff戦略を使用する", "description": "実験的な統合diff戦略を有効にします。この戦略はモデルエラーによる再試行の回数を減らす可能性がありますが、予期しない動作や不正確な編集を引き起こす可能性があります。リスクを理解し、すべての変更を注意深く確認する準備がある場合にのみ有効にしてください。" diff --git a/webview-ui/src/i18n/locales/ko/settings.json b/webview-ui/src/i18n/locales/ko/settings.json index cd533e9ef2..95195ff0a2 100644 --- a/webview-ui/src/i18n/locales/ko/settings.json +++ b/webview-ui/src/i18n/locales/ko/settings.json @@ -396,10 +396,6 @@ }, "experimental": { "warning": "⚠️", - "AUTO_CONDENSE_CONTEXT": { - "name": "컨텍스트 창을 자동으로 요약", - "description": "작업의 컨텍스트 창의 절반이 사용되면 LLM 호출을 사용하여 과거 대화를 요약합니다." - }, "DIFF_STRATEGY_UNIFIED": { "name": "실험적 통합 diff 전략 사용", "description": "실험적 통합 diff 전략을 활성화합니다. 이 전략은 모델 오류로 인한 재시도 횟수를 줄일 수 있지만 예기치 않은 동작이나 잘못된 편집을 일으킬 수 있습니다. 위험을 이해하고 모든 변경 사항을 신중하게 검토할 의향이 있는 경우에만 활성화하십시오." diff --git a/webview-ui/src/i18n/locales/nl/settings.json b/webview-ui/src/i18n/locales/nl/settings.json index e2ba15a8c8..e812d78f26 100644 --- a/webview-ui/src/i18n/locales/nl/settings.json +++ b/webview-ui/src/i18n/locales/nl/settings.json @@ -396,10 +396,6 @@ }, "experimental": { "warning": "⚠️", - "AUTO_CONDENSE_CONTEXT": { - "name": "Automatisch het contextvenster comprimeren", - "description": "Gebruikt een LLM-aanroep om het eerdere gesprek samen te vatten wanneer de helft van het contextvenster van de taak is gebruikt." - }, "DIFF_STRATEGY_UNIFIED": { "name": "Experimentele unified diff-strategie gebruiken", "description": "Schakel de experimentele unified diff-strategie in. Deze strategie kan het aantal herhalingen door model fouten verminderen, maar kan onverwacht gedrag of onjuiste bewerkingen veroorzaken. Alleen inschakelen als je de risico's begrijpt en wijzigingen zorgvuldig wilt controleren." diff --git a/webview-ui/src/i18n/locales/pl/settings.json b/webview-ui/src/i18n/locales/pl/settings.json index 5576ed0ce4..343ce01397 100644 --- a/webview-ui/src/i18n/locales/pl/settings.json +++ b/webview-ui/src/i18n/locales/pl/settings.json @@ -396,10 +396,6 @@ }, "experimental": { "warning": "⚠️", - "AUTO_CONDENSE_CONTEXT": { - "name": "Automatycznie kondensuj okno kontekstu", - "description": "Używa wywołania LLM do podsumowania poprzedniej konwersacji, gdy wykorzystana jest połowa okna kontekstu zadania." - }, "DIFF_STRATEGY_UNIFIED": { "name": "Użyj eksperymentalnej ujednoliconej strategii diff", "description": "Włącz eksperymentalną ujednoliconą strategię diff. Ta strategia może zmniejszyć liczbę ponownych prób spowodowanych błędami modelu, ale może powodować nieoczekiwane zachowanie lub nieprawidłowe edycje. Włącz tylko jeśli rozumiesz ryzyko i jesteś gotów dokładnie przeglądać wszystkie zmiany." diff --git a/webview-ui/src/i18n/locales/pt-BR/settings.json b/webview-ui/src/i18n/locales/pt-BR/settings.json index 1d74ea61a9..0c66a21847 100644 --- a/webview-ui/src/i18n/locales/pt-BR/settings.json +++ b/webview-ui/src/i18n/locales/pt-BR/settings.json @@ -396,10 +396,6 @@ }, "experimental": { "warning": "⚠️", - "AUTO_CONDENSE_CONTEXT": { - "name": "Condensar automaticamente a janela de contexto", - "description": "Usa uma chamada LLM para resumir a conversa anterior quando metade da janela de contexto da tarefa é usada." - }, "DIFF_STRATEGY_UNIFIED": { "name": "Usar estratégia diff unificada experimental", "description": "Ativar a estratégia diff unificada experimental. Esta estratégia pode reduzir o número de novas tentativas causadas por erros do modelo, mas pode causar comportamento inesperado ou edições incorretas. Ative apenas se compreender os riscos e estiver disposto a revisar cuidadosamente todas as alterações." diff --git a/webview-ui/src/i18n/locales/ru/settings.json b/webview-ui/src/i18n/locales/ru/settings.json index 796725a3c7..376763a7d8 100644 --- a/webview-ui/src/i18n/locales/ru/settings.json +++ b/webview-ui/src/i18n/locales/ru/settings.json @@ -396,10 +396,6 @@ }, "experimental": { "warning": "⚠️", - "AUTO_CONDENSE_CONTEXT": { - "name": "Автоматически сжимать окно контекста", - "description": "Использует вызов LLM для обобщения прошлого разговора, когда используется половина окна контекста задачи." - }, "DIFF_STRATEGY_UNIFIED": { "name": "Использовать экспериментальную стратегию унифицированного диффа", "description": "Включает экспериментальную стратегию унифицированного диффа. Может уменьшить количество повторных попыток из-за ошибок модели, но может привести к неожиданному поведению или неверным правкам. Включайте только если готовы внимательно проверять все изменения." diff --git a/webview-ui/src/i18n/locales/tr/settings.json b/webview-ui/src/i18n/locales/tr/settings.json index b32344c383..1d23b7e04e 100644 --- a/webview-ui/src/i18n/locales/tr/settings.json +++ b/webview-ui/src/i18n/locales/tr/settings.json @@ -396,10 +396,6 @@ }, "experimental": { "warning": "⚠️", - "AUTO_CONDENSE_CONTEXT": { - "name": "Bağlam penceresini otomatik olarak yoğunlaştır", - "description": "Görevin bağlam penceresinin yarısı kullanıldığında, geçmiş konuşmayı özetlemek için bir LLM çağrısı kullanır." - }, "DIFF_STRATEGY_UNIFIED": { "name": "Deneysel birleştirilmiş diff stratejisini kullan", "description": "Deneysel birleştirilmiş diff stratejisini etkinleştir. Bu strateji, model hatalarından kaynaklanan yeniden deneme sayısını azaltabilir, ancak beklenmeyen davranışlara veya hatalı düzenlemelere neden olabilir. Yalnızca riskleri anlıyorsanız ve tüm değişiklikleri dikkatlice incelemeye istekliyseniz etkinleştirin." diff --git a/webview-ui/src/i18n/locales/vi/settings.json b/webview-ui/src/i18n/locales/vi/settings.json index cf82b0288c..f964bf4b7a 100644 --- a/webview-ui/src/i18n/locales/vi/settings.json +++ b/webview-ui/src/i18n/locales/vi/settings.json @@ -396,10 +396,6 @@ }, "experimental": { "warning": "⚠️", - "AUTO_CONDENSE_CONTEXT": { - "name": "Tự động cô đọng cửa sổ ngữ cảnh", - "description": "Sử dụng cuộc gọi LLM để tóm tắt cuộc hội thoại trước đó khi một nửa cửa sổ ngữ cảnh của tác vụ được sử dụng." - }, "DIFF_STRATEGY_UNIFIED": { "name": "Sử dụng chiến lược diff thống nhất thử nghiệm", "description": "Bật chiến lược diff thống nhất thử nghiệm. Chiến lược này có thể giảm số lần thử lại do lỗi mô hình nhưng có thể gây ra hành vi không mong muốn hoặc chỉnh sửa không chính xác. Chỉ bật nếu bạn hiểu rõ các rủi ro và sẵn sàng xem xét cẩn thận tất cả các thay đổi." diff --git a/webview-ui/src/i18n/locales/zh-CN/settings.json b/webview-ui/src/i18n/locales/zh-CN/settings.json index ec179ee77f..ba14e48762 100644 --- a/webview-ui/src/i18n/locales/zh-CN/settings.json +++ b/webview-ui/src/i18n/locales/zh-CN/settings.json @@ -396,10 +396,6 @@ }, "experimental": { "warning": "⚠️", - "AUTO_CONDENSE_CONTEXT": { - "name": "自动压缩上下文窗口", - "description": "当任务的上下文窗口使用了一半时,使用LLM调用来总结过去的对话。" - }, "DIFF_STRATEGY_UNIFIED": { "name": "启用diff更新工具", "description": "可减少因模型错误导致的重复尝试,但可能引发意外操作。启用前请确保理解风险并会仔细检查所有修改。" diff --git a/webview-ui/src/i18n/locales/zh-TW/settings.json b/webview-ui/src/i18n/locales/zh-TW/settings.json index 067ef7400f..78c994457e 100644 --- a/webview-ui/src/i18n/locales/zh-TW/settings.json +++ b/webview-ui/src/i18n/locales/zh-TW/settings.json @@ -396,10 +396,6 @@ }, "experimental": { "warning": "⚠️", - "AUTO_CONDENSE_CONTEXT": { - "name": "自動壓縮上下文窗口", - "description": "當任務的上下文窗口使用了一半時,使用LLM調用來總結過去的對話。" - }, "DIFF_STRATEGY_UNIFIED": { "name": "使用實驗性統一差異比對策略", "description": "啟用實驗性的統一差異比對策略。此策略可能減少因模型錯誤而導致的重試次數,但也可能導致意外行為或錯誤的編輯。請務必了解風險,並願意仔細檢查所有變更後再啟用。" From 75166f36069474930dc2b699d4d76d00df6df691 Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Wed, 14 May 2025 12:14:20 -0700 Subject: [PATCH 17/25] update setting copy --- src/core/condense/index.ts | 2 ++ webview-ui/src/i18n/locales/en/settings.json | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/core/condense/index.ts b/src/core/condense/index.ts index c4e20bb65d..6954274875 100644 --- a/src/core/condense/index.ts +++ b/src/core/condense/index.ts @@ -86,8 +86,10 @@ async function summarizeConversation(messages: ApiMessage[], apiHandler: ApiHand const requestMessages = maybeRemoveImageBlocks([...messagesToSummarize, finalRequestMessage], apiHandler).map( ({ role, content }) => ({ role, content }), ) + // Note: this doesn't need to be a stream, consider using something like apiHandler.completePrompt const stream = apiHandler.createMessage(SUMMARY_PROMPT, requestMessages) let summary = "" + // TODO(canyon): compute usage and cost for this operation and update the global metrics. for await (const chunk of stream) { if (chunk.type === "text") { summary += chunk.text diff --git a/webview-ui/src/i18n/locales/en/settings.json b/webview-ui/src/i18n/locales/en/settings.json index e01714bad9..84d6c2ef36 100644 --- a/webview-ui/src/i18n/locales/en/settings.json +++ b/webview-ui/src/i18n/locales/en/settings.json @@ -397,8 +397,8 @@ "experimental": { "warning": "⚠️", "AUTO_CONDENSE_CONTEXT": { - "name": "Automatically condense the context window", - "description": "Uses an LLM call to summarize the past conversation when half of the task's context window is used." + "name": "Intelligently condense the context window", + "description": "Uses an LLM call to summarize the past conversation when the task's context window is almost full, rather than dropping old messages. Disclaimer: the cost of summarizing is not currently included in the API costs shown in the UI." }, "DIFF_STRATEGY_UNIFIED": { "name": "Use experimental unified diff strategy", From 00b27d353a4013b5a546ecdbbffc26c6bfd55188 Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Wed, 14 May 2025 12:35:22 -0700 Subject: [PATCH 18/25] use sliding window logic for when to summarize --- src/core/condense/__tests__/index.test.ts | 276 ---------------------- src/core/condense/index.ts | 18 +- src/core/sliding-window/index.ts | 12 +- src/core/task/Task.ts | 33 +-- 4 files changed, 25 insertions(+), 314 deletions(-) delete mode 100644 src/core/condense/__tests__/index.test.ts diff --git a/src/core/condense/__tests__/index.test.ts b/src/core/condense/__tests__/index.test.ts deleted file mode 100644 index 966f35ec0d..0000000000 --- a/src/core/condense/__tests__/index.test.ts +++ /dev/null @@ -1,276 +0,0 @@ -import { jest } from "@jest/globals" -import { ApiHandler } from "../../../api" -import { ApiMessage } from "../../task-persistence/apiMessages" -import { maybeRemoveImageBlocks } from "../../../api/transform/image-cleaning" -import { summarizeConversationIfNeeded, getMessagesSinceLastSummary } from "../index" -import { ApiStream, ApiStreamChunk } from "../../../api/transform/stream" -import { CONTEXT_FRAC_FOR_SUMMARY, N_MESSAGES_TO_KEEP } from "../index" - -const CONTEXT_WINDOW_SIZE = 1000 -const OVER_THRESHOLD_TOTAL_TOKENS = Math.ceil(CONTEXT_WINDOW_SIZE * CONTEXT_FRAC_FOR_SUMMARY) + 1 - -// Mock dependencies -jest.mock("../../../api/transform/image-cleaning", () => ({ - maybeRemoveImageBlocks: jest.fn((messages) => messages), -})) - -// Mock Anthropic SDK -jest.mock("@anthropic-ai/sdk", () => { - return { - default: jest.fn().mockImplementation(() => ({ - messages: { - create: jest.fn(), - }, - })), - } -}) - -describe("Conversation Condensing", () => { - // Mock API handler - const mockApiHandler: jest.Mocked = { - createMessage: jest.fn(), - } as unknown as jest.Mocked - - // Reset mocks before each test - beforeEach(() => { - jest.clearAllMocks() - - // Setup default mock for createMessage - mockApiHandler.createMessage.mockImplementation((): ApiStream => { - return (async function* (): AsyncGenerator { - yield { type: "text", text: "This is a summary of the conversation." } - })() - }) - }) - - describe("getMessagesSinceLastSummary", () => { - it("should return all messages if there is no summary", () => { - const messages: ApiMessage[] = [ - { role: "user", content: "Hello", ts: 1 }, - { role: "assistant", content: "Hi there", ts: 2 }, - ] - - const result = getMessagesSinceLastSummary(messages) - expect(result).toEqual(messages) - }) - - it("should return messages since the last summary", () => { - const messages: ApiMessage[] = [ - { role: "user", content: "First message", ts: 1 }, - { role: "assistant", content: "Summary of conversation", ts: 2, isSummary: true }, - { role: "user", content: "New message", ts: 3 }, - { role: "assistant", content: "Response", ts: 4 }, - ] - - const result = getMessagesSinceLastSummary(messages) - expect(result).toEqual([ - { role: "assistant", content: "Summary of conversation", ts: 2, isSummary: true }, - { role: "user", content: "New message", ts: 3 }, - { role: "assistant", content: "Response", ts: 4 }, - ]) - }) - - it("should handle multiple summary messages and return since the last one", () => { - const messages: ApiMessage[] = [ - { role: "user", content: "First message", ts: 1 }, - { role: "assistant", content: "First summary", ts: 2, isSummary: true }, - { role: "user", content: "Second message", ts: 3 }, - { role: "assistant", content: "Second summary", ts: 4, isSummary: true }, - { role: "user", content: "New message", ts: 5 }, - ] - - const result = getMessagesSinceLastSummary(messages) - expect(result).toEqual([ - { role: "assistant", content: "Second summary", ts: 4, isSummary: true }, - { role: "user", content: "New message", ts: 5 }, - ]) - }) - - it("should handle empty message array", () => { - const messages: ApiMessage[] = [] - const result = getMessagesSinceLastSummary(messages) - expect(result).toEqual([]) - }) - }) - - describe("summarizeConversationIfNeeded", () => { - it("should not summarize when below token threshold", async () => { - const messages: ApiMessage[] = [ - { role: "user", content: "Hello", ts: 1 }, - { role: "assistant", content: "Hi there", ts: 2 }, - ] - - const totalTokens = 100 - const result = await summarizeConversationIfNeeded( - messages, - totalTokens, - CONTEXT_WINDOW_SIZE, - mockApiHandler, - ) - - expect(result).toBe(messages) - expect(mockApiHandler.createMessage).not.toHaveBeenCalled() - }) - - it("should summarize when above token threshold", async () => { - const messages: ApiMessage[] = [ - { role: "user", content: "Message 1", ts: 1 }, - { role: "assistant", content: "Response 1", ts: 2 }, - { role: "user", content: "Message 2", ts: 3 }, - { role: "assistant", content: "Response 2", ts: 4 }, - { role: "user", content: "Message 3", ts: 5 }, - { role: "assistant", content: "Response 3", ts: 6 }, - { role: "user", content: "Message 4", ts: 7 }, - ] - - const result = await summarizeConversationIfNeeded( - messages, - OVER_THRESHOLD_TOTAL_TOKENS, - CONTEXT_WINDOW_SIZE, - mockApiHandler, - ) - - // Should have called createMessage - expect(mockApiHandler.createMessage).toHaveBeenCalled() - - // Should have a summary message inserted - expect(result.some((msg) => msg.isSummary)).toBe(true) - - // Should preserve the last N_MESSAGES_TO_KEEP messages - for (let i = 1; i <= N_MESSAGES_TO_KEEP; i++) { - expect(result).toContainEqual(messages[messages.length - i]) - } - }) - - it("should not summarize if there are not enough messages", async () => { - const messages: ApiMessage[] = [{ role: "user", content: "Hello", ts: 1 }] - - const result = await summarizeConversationIfNeeded( - messages, - OVER_THRESHOLD_TOTAL_TOKENS, - CONTEXT_WINDOW_SIZE, - mockApiHandler, - ) - - expect(result).toBe(messages) - expect(mockApiHandler.createMessage).not.toHaveBeenCalled() - }) - - it("should not summarize if we recently summarized", async () => { - const messages: ApiMessage[] = [ - { role: "user", content: "Message 1", ts: 1 }, - { role: "assistant", content: "Response 1", ts: 2 }, - { role: "user", content: "Message 2", ts: 3 }, - { role: "assistant", content: "Summary", ts: 4, isSummary: true }, - { role: "user", content: "Message 3", ts: 5 }, - ] - - const result = await summarizeConversationIfNeeded( - messages, - OVER_THRESHOLD_TOTAL_TOKENS, - CONTEXT_WINDOW_SIZE, - mockApiHandler, - ) - - // Should not have called createMessage because one of the last 3 messages is already a summary - expect(mockApiHandler.createMessage).not.toHaveBeenCalled() - expect(result).toBe(messages) - }) - - it("should handle empty API response", async () => { - // Setup mock to return empty summary - mockApiHandler.createMessage.mockImplementation((): ApiStream => { - return (async function* (): AsyncGenerator { - yield { type: "text", text: "" } - })() - }) - - const messages: ApiMessage[] = [ - { role: "user", content: "Message 1", ts: 1 }, - { role: "assistant", content: "Response 1", ts: 2 }, - { role: "user", content: "Message 2", ts: 3 }, - { role: "assistant", content: "Response 2", ts: 4 }, - { role: "user", content: "Message 3", ts: 5 }, - { role: "assistant", content: "Response 3", ts: 6 }, - { role: "user", content: "Message 4", ts: 7 }, - ] - - const consoleSpy = jest.spyOn(console, "warn").mockImplementation(() => {}) - - const result = await summarizeConversationIfNeeded( - messages, - OVER_THRESHOLD_TOTAL_TOKENS, - CONTEXT_WINDOW_SIZE, - mockApiHandler, - ) - - // Should have called createMessage - expect(mockApiHandler.createMessage).toHaveBeenCalled() - - // Should have logged a warning - expect(consoleSpy).toHaveBeenCalledWith("Received empty summary from API") - - // Should return original messages - expect(result).toBe(messages) - - consoleSpy.mockRestore() - }) - - it("should correctly handle non-text chunks in API response", async () => { - // Setup mock to return mixed chunks - mockApiHandler.createMessage.mockImplementation((): ApiStream => { - return (async function* (): AsyncGenerator { - yield { type: "text", text: "This is " } as ApiStreamChunk - yield { type: "text", text: "a summary." } as ApiStreamChunk - })() - }) - - const messages: ApiMessage[] = [ - { role: "user", content: "Message 1", ts: 1 }, - { role: "assistant", content: "Response 1", ts: 2 }, - { role: "user", content: "Message 2", ts: 3 }, - { role: "assistant", content: "Response 2", ts: 4 }, - { role: "user", content: "Message 3", ts: 5 }, - { role: "assistant", content: "Response 3", ts: 6 }, - { role: "user", content: "Message 4", ts: 7 }, - ] - - const result = await summarizeConversationIfNeeded( - messages, - OVER_THRESHOLD_TOTAL_TOKENS, - CONTEXT_WINDOW_SIZE, - mockApiHandler, - ) - - // Should have called createMessage - expect(mockApiHandler.createMessage).toHaveBeenCalled() - - // Should have a summary message with the correct content - const summaryMessage = result.find((msg) => msg.isSummary) - expect(summaryMessage).toBeDefined() - expect(summaryMessage?.content).toBe("This is a summary.") - }) - - it("should use maybeRemoveImageBlocks when preparing messages for summarization", async () => { - const messages: ApiMessage[] = [ - { role: "user", content: "Message 1", ts: 1 }, - { role: "assistant", content: "Response 1", ts: 2 }, - { role: "user", content: "Message 2", ts: 3 }, - { role: "assistant", content: "Response 2", ts: 4 }, - { role: "user", content: "Message 3", ts: 5 }, - { role: "assistant", content: "Response 3", ts: 6 }, - { role: "user", content: "Message 4", ts: 7 }, - ] - - await summarizeConversationIfNeeded( - messages, - OVER_THRESHOLD_TOTAL_TOKENS, - CONTEXT_WINDOW_SIZE, - mockApiHandler, - ) - - // Should have called maybeRemoveImageBlocks - expect(maybeRemoveImageBlocks).toHaveBeenCalled() - }) - }) -}) diff --git a/src/core/condense/index.ts b/src/core/condense/index.ts index 6954274875..6aed4f2550 100644 --- a/src/core/condense/index.ts +++ b/src/core/condense/index.ts @@ -3,7 +3,6 @@ import { ApiHandler } from "../../api" import { ApiMessage } from "../task-persistence/apiMessages" import { maybeRemoveImageBlocks } from "../../api/transform/image-cleaning" -export const CONTEXT_FRAC_FOR_SUMMARY = 0.5 export const N_MESSAGES_TO_KEEP = 3 const SUMMARY_PROMPT = `\ @@ -47,8 +46,7 @@ Output only the summary of the conversation so far, without any additional comme ` /** - * Conditionally summarizes the conversation messages if the total token count - * exceeds a set fraction of the context window. + * Summarizes the conversation messages using an LLM call * * @param {ApiMessage[]} messages - The conversation messages * @param {number} totalTokens - The total number of tokens in the conversation, excluding the last user message. @@ -56,19 +54,7 @@ Output only the summary of the conversation so far, without any additional comme * @param {ApiHandler} apiHandler - The API handler to use for token counting. * @returns {ApiMessage[]} - The input messages, potentially including a new summary message before the last message. */ -export async function summarizeConversationIfNeeded( - messages: ApiMessage[], - totalTokens: number, - contextWindow: number, - apiHandler: ApiHandler, -): Promise { - if (totalTokens < contextWindow * CONTEXT_FRAC_FOR_SUMMARY) { - return messages - } - return await summarizeConversation(messages, apiHandler) -} - -async function summarizeConversation(messages: ApiMessage[], apiHandler: ApiHandler): Promise { +export async function summarizeConversation(messages: ApiMessage[], apiHandler: ApiHandler): Promise { const messagesToSummarize = getMessagesSinceLastSummary(messages.slice(0, -N_MESSAGES_TO_KEEP)) if (messagesToSummarize.length <= 1) { return messages // Not enough messages to warrant a summary diff --git a/src/core/sliding-window/index.ts b/src/core/sliding-window/index.ts index 75395ecd75..96e3fcb780 100644 --- a/src/core/sliding-window/index.ts +++ b/src/core/sliding-window/index.ts @@ -1,5 +1,6 @@ import { Anthropic } from "@anthropic-ai/sdk" import { ApiHandler } from "../../api" +import { summarizeConversation } from "../condense" /** * Default percentage of the context window to use as a buffer when deciding when to truncate @@ -53,6 +54,7 @@ export function truncateConversation( * @param {number} contextWindow - The context window size. * @param {number} maxTokens - The maximum number of tokens allowed. * @param {ApiHandler} apiHandler - The API handler to use for token counting. + * @param {boolean} autoCondenseContext - Whether to use LLM summarization or sliding window implementation * @returns {Anthropic.Messages.MessageParam[]} The original or truncated conversation messages. */ @@ -62,6 +64,7 @@ type TruncateOptions = { contextWindow: number maxTokens?: number | null apiHandler: ApiHandler + autoCondenseContext: boolean } /** @@ -77,6 +80,7 @@ export async function truncateConversationIfNeeded({ contextWindow, maxTokens, apiHandler, + autoCondenseContext, }: TruncateOptions): Promise { // Calculate the maximum tokens reserved for response const reservedTokens = maxTokens || contextWindow * 0.2 @@ -96,5 +100,11 @@ export async function truncateConversationIfNeeded({ const allowedTokens = contextWindow * (1 - TOKEN_BUFFER_PERCENTAGE) - reservedTokens // Determine if truncation is needed and apply if necessary - return effectiveTokens > allowedTokens ? truncateConversation(messages, 0.5) : messages + if (effectiveTokens <= allowedTokens) { + return messages + } else if (autoCondenseContext) { + return summarizeConversation(messages, apiHandler) + } else { + return truncateConversation(messages, 0.5) + } } diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 697f07a22b..1b63ba9f4c 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -80,7 +80,7 @@ import { } from "../checkpoints" import { processUserContentMentions } from "../mentions/processUserContentMentions" import { ApiMessage } from "../task-persistence/apiMessages" -import { getMessagesSinceLastSummary, summarizeConversationIfNeeded } from "../condense" +import { getMessagesSinceLastSummary } from "../condense" import { maybeRemoveImageBlocks } from "../../api/transform/image-cleaning" export type ClineEvents = { @@ -1482,26 +1482,17 @@ export class Task extends EventEmitter { const contextWindow = modelInfo.contextWindow - let condensedMessages - if (experiments?.autoCondenseContext) { - condensedMessages = await summarizeConversationIfNeeded( - this.apiConversationHistory, - totalTokens, - contextWindow, - this.api, - ) - } else { - condensedMessages = await truncateConversationIfNeeded({ - messages: this.apiConversationHistory, - totalTokens, - maxTokens, - contextWindow, - apiHandler: this.api, - }) - } - - if (condensedMessages !== this.apiConversationHistory) { - await this.overwriteApiConversationHistory(condensedMessages) + const autoCondenseContext = experiments?.autoCondenseContext ?? false + const trimmedMessages = await truncateConversationIfNeeded({ + messages: this.apiConversationHistory, + totalTokens, + maxTokens, + contextWindow, + apiHandler: this.api, + autoCondenseContext, + }) + if (trimmedMessages !== this.apiConversationHistory) { + await this.overwriteApiConversationHistory(trimmedMessages) } } From 39451dd81799c2d1f12afba5e2af6d4d7e9439eb Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Wed, 14 May 2025 12:38:48 -0700 Subject: [PATCH 19/25] wip --- src/core/sliding-window/index.ts | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/core/sliding-window/index.ts b/src/core/sliding-window/index.ts index 96e3fcb780..7d711fcb48 100644 --- a/src/core/sliding-window/index.ts +++ b/src/core/sliding-window/index.ts @@ -1,6 +1,7 @@ import { Anthropic } from "@anthropic-ai/sdk" import { ApiHandler } from "../../api" import { summarizeConversation } from "../condense" +import { ApiMessage } from "../task-persistence/apiMessages" /** * Default percentage of the context window to use as a buffer when deciding when to truncate @@ -28,14 +29,11 @@ export async function estimateTokenCount( * The first message is always retained, and a specified fraction (rounded to an even number) * of messages from the beginning (excluding the first) is removed. * - * @param {Anthropic.Messages.MessageParam[]} messages - The conversation messages. + * @param {ApiMessage[]} messages - The conversation messages. * @param {number} fracToRemove - The fraction (between 0 and 1) of messages (excluding the first) to remove. - * @returns {Anthropic.Messages.MessageParam[]} The truncated conversation messages. + * @returns {ApiMessage[]} The truncated conversation messages. */ -export function truncateConversation( - messages: Anthropic.Messages.MessageParam[], - fracToRemove: number, -): Anthropic.Messages.MessageParam[] { +export function truncateConversation(messages: ApiMessage[], fracToRemove: number): ApiMessage[] { const truncatedMessages = [messages[0]] const rawMessagesToRemove = Math.floor((messages.length - 1) * fracToRemove) const messagesToRemove = rawMessagesToRemove - (rawMessagesToRemove % 2) @@ -49,17 +47,17 @@ export function truncateConversation( * Conditionally truncates the conversation messages if the total token count * exceeds the model's limit, considering the size of incoming content. * - * @param {Anthropic.Messages.MessageParam[]} messages - The conversation messages. + * @param {ApiMessage[]} messages - The conversation messages. * @param {number} totalTokens - The total number of tokens in the conversation (excluding the last user message). * @param {number} contextWindow - The context window size. * @param {number} maxTokens - The maximum number of tokens allowed. * @param {ApiHandler} apiHandler - The API handler to use for token counting. * @param {boolean} autoCondenseContext - Whether to use LLM summarization or sliding window implementation - * @returns {Anthropic.Messages.MessageParam[]} The original or truncated conversation messages. + * @returns {ApiMessage[]} The original or truncated conversation messages. */ type TruncateOptions = { - messages: Anthropic.Messages.MessageParam[] + messages: ApiMessage[] totalTokens: number contextWindow: number maxTokens?: number | null @@ -72,7 +70,7 @@ type TruncateOptions = { * exceeds the model's limit, considering the size of incoming content. * * @param {TruncateOptions} options - The options for truncation - * @returns {Promise} The original or truncated conversation messages. + * @returns {Promise} The original or truncated conversation messages. */ export async function truncateConversationIfNeeded({ messages, @@ -81,7 +79,7 @@ export async function truncateConversationIfNeeded({ maxTokens, apiHandler, autoCondenseContext, -}: TruncateOptions): Promise { +}: TruncateOptions): Promise { // Calculate the maximum tokens reserved for response const reservedTokens = maxTokens || contextWindow * 0.2 @@ -103,8 +101,10 @@ export async function truncateConversationIfNeeded({ if (effectiveTokens <= allowedTokens) { return messages } else if (autoCondenseContext) { - return summarizeConversation(messages, apiHandler) - } else { - return truncateConversation(messages, 0.5) + const summarizedMessages = await summarizeConversation(messages, apiHandler) + if (messages !== summarizedMessages) { + return summarizedMessages + } } + return truncateConversation(messages, 0.5) } From cbcc5757e155ac83bdbc67e7beb98d62f7f599da Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Wed, 14 May 2025 12:40:03 -0700 Subject: [PATCH 20/25] wip --- .../__tests__/sliding-window.test.ts | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/core/sliding-window/__tests__/sliding-window.test.ts b/src/core/sliding-window/__tests__/sliding-window.test.ts index 16af2d4630..7890b55ec8 100644 --- a/src/core/sliding-window/__tests__/sliding-window.test.ts +++ b/src/core/sliding-window/__tests__/sliding-window.test.ts @@ -10,6 +10,7 @@ import { truncateConversation, truncateConversationIfNeeded, } from "../index" +import { ApiMessage } from "../../task-persistence/apiMessages" // Create a mock ApiHandler for testing class MockApiHandler extends BaseProvider { @@ -41,7 +42,7 @@ const mockApiHandler = new MockApiHandler() */ describe("truncateConversation", () => { it("should retain the first message", () => { - const messages: Anthropic.Messages.MessageParam[] = [ + const messages: ApiMessage[] = [ { role: "user", content: "First message" }, { role: "assistant", content: "Second message" }, { role: "user", content: "Third message" }, @@ -58,7 +59,7 @@ describe("truncateConversation", () => { }) it("should remove the specified fraction of messages (rounded to even number)", () => { - const messages: Anthropic.Messages.MessageParam[] = [ + const messages: ApiMessage[] = [ { role: "user", content: "First message" }, { role: "assistant", content: "Second message" }, { role: "user", content: "Third message" }, @@ -77,7 +78,7 @@ describe("truncateConversation", () => { }) it("should round to an even number of messages to remove", () => { - const messages: Anthropic.Messages.MessageParam[] = [ + const messages: ApiMessage[] = [ { role: "user", content: "First message" }, { role: "assistant", content: "Second message" }, { role: "user", content: "Third message" }, @@ -96,7 +97,7 @@ describe("truncateConversation", () => { }) it("should handle edge case with fracToRemove = 0", () => { - const messages: Anthropic.Messages.MessageParam[] = [ + const messages: ApiMessage[] = [ { role: "user", content: "First message" }, { role: "assistant", content: "Second message" }, { role: "user", content: "Third message" }, @@ -108,7 +109,7 @@ describe("truncateConversation", () => { }) it("should handle edge case with fracToRemove = 1", () => { - const messages: Anthropic.Messages.MessageParam[] = [ + const messages: ApiMessage[] = [ { role: "user", content: "First message" }, { role: "assistant", content: "Second message" }, { role: "user", content: "Third message" }, @@ -224,7 +225,7 @@ describe("truncateConversationIfNeeded", () => { maxTokens, }) - const messages: Anthropic.Messages.MessageParam[] = [ + const messages: ApiMessage[] = [ { role: "user", content: "First message" }, { role: "assistant", content: "Second message" }, { role: "user", content: "Third message" }, @@ -328,7 +329,7 @@ describe("truncateConversationIfNeeded", () => { // Test case 1: Small content that won't push us over the threshold const smallContent = [{ type: "text" as const, text: "Small content" }] const smallContentTokens = await estimateTokenCount(smallContent, mockApiHandler) - const messagesWithSmallContent: Anthropic.Messages.MessageParam[] = [ + const messagesWithSmallContent: ApiMessage[] = [ ...messages.slice(0, -1), { role: messages[messages.length - 1].role, content: smallContent }, ] @@ -353,7 +354,7 @@ describe("truncateConversationIfNeeded", () => { }, ] const largeContentTokens = await estimateTokenCount(largeContent, mockApiHandler) - const messagesWithLargeContent: Anthropic.Messages.MessageParam[] = [ + const messagesWithLargeContent: ApiMessage[] = [ ...messages.slice(0, -1), { role: messages[messages.length - 1].role, content: largeContent }, ] @@ -372,7 +373,7 @@ describe("truncateConversationIfNeeded", () => { // Test case 3: Very large content that will definitely exceed threshold const veryLargeContent = [{ type: "text" as const, text: "X".repeat(1000) }] const veryLargeContentTokens = await estimateTokenCount(veryLargeContent, mockApiHandler) - const messagesWithVeryLargeContent: Anthropic.Messages.MessageParam[] = [ + const messagesWithVeryLargeContent: ApiMessage[] = [ ...messages.slice(0, -1), { role: messages[messages.length - 1].role, content: veryLargeContent }, ] @@ -424,7 +425,7 @@ describe("getMaxTokens", () => { }) // Reuse across tests for consistency - const messages: Anthropic.Messages.MessageParam[] = [ + const messages: ApiMessage[] = [ { role: "user", content: "First message" }, { role: "assistant", content: "Second message" }, { role: "user", content: "Third message" }, From 9c8ba3e7a385101d869fb39f67dcb6cb6e5243c6 Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Wed, 14 May 2025 12:41:39 -0700 Subject: [PATCH 21/25] make param optional --- src/core/sliding-window/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/sliding-window/index.ts b/src/core/sliding-window/index.ts index 7d711fcb48..d17bf7fc57 100644 --- a/src/core/sliding-window/index.ts +++ b/src/core/sliding-window/index.ts @@ -62,7 +62,7 @@ type TruncateOptions = { contextWindow: number maxTokens?: number | null apiHandler: ApiHandler - autoCondenseContext: boolean + autoCondenseContext?: boolean } /** From 4c62fa5e385d8d5ddd337cd5abe6124fe832aa32 Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Wed, 14 May 2025 12:52:08 -0700 Subject: [PATCH 22/25] new tests --- src/core/condense/__tests__/index.test.ts | 228 ++++++++++++++++++++++ 1 file changed, 228 insertions(+) create mode 100644 src/core/condense/__tests__/index.test.ts diff --git a/src/core/condense/__tests__/index.test.ts b/src/core/condense/__tests__/index.test.ts new file mode 100644 index 0000000000..10769589f8 --- /dev/null +++ b/src/core/condense/__tests__/index.test.ts @@ -0,0 +1,228 @@ +import { describe, expect, it, jest, beforeEach } from "@jest/globals" +import { ApiHandler } from "../../../api" +import { ApiMessage } from "../../task-persistence/apiMessages" +import { maybeRemoveImageBlocks } from "../../../api/transform/image-cleaning" +import { summarizeConversation, getMessagesSinceLastSummary, N_MESSAGES_TO_KEEP } from "../index" + +// Mock dependencies +jest.mock("../../../api/transform/image-cleaning", () => ({ + maybeRemoveImageBlocks: jest.fn((messages: ApiMessage[], _apiHandler: ApiHandler) => [...messages]), +})) + +describe("getMessagesSinceLastSummary", () => { + it("should return all messages when there is no summary", () => { + const messages: ApiMessage[] = [ + { role: "user", content: "Hello", ts: 1 }, + { role: "assistant", content: "Hi there", ts: 2 }, + { role: "user", content: "How are you?", ts: 3 }, + ] + + const result = getMessagesSinceLastSummary(messages) + expect(result).toEqual(messages) + }) + + it("should return messages since the last summary", () => { + const messages: ApiMessage[] = [ + { role: "user", content: "Hello", ts: 1 }, + { role: "assistant", content: "Hi there", ts: 2 }, + { role: "assistant", content: "Summary of conversation", ts: 3, isSummary: true }, + { role: "user", content: "How are you?", ts: 4 }, + { role: "assistant", content: "I'm good", ts: 5 }, + ] + + const result = getMessagesSinceLastSummary(messages) + expect(result).toEqual([ + { role: "assistant", content: "Summary of conversation", ts: 3, isSummary: true }, + { role: "user", content: "How are you?", ts: 4 }, + { role: "assistant", content: "I'm good", ts: 5 }, + ]) + }) + + it("should handle multiple summary messages and return since the last one", () => { + const messages: ApiMessage[] = [ + { role: "user", content: "Hello", ts: 1 }, + { role: "assistant", content: "First summary", ts: 2, isSummary: true }, + { role: "user", content: "How are you?", ts: 3 }, + { role: "assistant", content: "Second summary", ts: 4, isSummary: true }, + { role: "user", content: "What's new?", ts: 5 }, + ] + + const result = getMessagesSinceLastSummary(messages) + expect(result).toEqual([ + { role: "assistant", content: "Second summary", ts: 4, isSummary: true }, + { role: "user", content: "What's new?", ts: 5 }, + ]) + }) + + it("should handle empty messages array", () => { + const result = getMessagesSinceLastSummary([]) + expect(result).toEqual([]) + }) +}) + +describe("summarizeConversation", () => { + // Mock ApiHandler + let mockApiHandler: ApiHandler + let mockStream: AsyncGenerator + + beforeEach(() => { + // Reset mocks + jest.clearAllMocks() + + // Setup mock stream + mockStream = (async function* () { + yield { type: "text" as const, text: "This is " } + yield { type: "text" as const, text: "a summary" } + })() + + // Setup mock API handler + mockApiHandler = { + createMessage: jest.fn().mockReturnValue(mockStream), + countTokens: jest.fn().mockImplementation(() => Promise.resolve(100)), + getModel: jest.fn().mockReturnValue({ + id: "test-model", + info: { + contextWindow: 8000, + supportsImages: true, + supportsComputerUse: true, + supportsVision: true, + maxTokens: 4000, + supportsPromptCache: true, + maxCachePoints: 10, + minTokensPerCachePoint: 100, + cachableFields: ["system", "messages"], + }, + }), + } as unknown as ApiHandler + }) + + it("should not summarize when there are not enough messages", async () => { + const messages: ApiMessage[] = [ + { role: "user", content: "Hello", ts: 1 }, + { role: "assistant", content: "Hi there", ts: 2 }, + ] + + const result = await summarizeConversation(messages, mockApiHandler) + expect(result).toEqual(messages) + expect(mockApiHandler.createMessage).not.toHaveBeenCalled() + }) + + it("should not summarize when there was a recent summary", async () => { + const messages: ApiMessage[] = [ + { role: "user", content: "Hello", ts: 1 }, + { role: "assistant", content: "Hi there", ts: 2 }, + { role: "user", content: "How are you?", ts: 3 }, + { role: "assistant", content: "I'm good", ts: 4 }, + { role: "user", content: "What's new?", ts: 5 }, + { role: "assistant", content: "Not much", ts: 6, isSummary: true }, // Recent summary + { role: "user", content: "Tell me more", ts: 7 }, + ] + + const result = await summarizeConversation(messages, mockApiHandler) + expect(result).toEqual(messages) + expect(mockApiHandler.createMessage).not.toHaveBeenCalled() + }) + + it("should summarize conversation and insert summary message", async () => { + const messages: ApiMessage[] = [ + { role: "user", content: "Hello", ts: 1 }, + { role: "assistant", content: "Hi there", ts: 2 }, + { role: "user", content: "How are you?", ts: 3 }, + { role: "assistant", content: "I'm good", ts: 4 }, + { role: "user", content: "What's new?", ts: 5 }, + { role: "assistant", content: "Not much", ts: 6 }, + { role: "user", content: "Tell me more", ts: 7 }, + ] + + const result = await summarizeConversation(messages, mockApiHandler) + + // Check that the API was called correctly + expect(mockApiHandler.createMessage).toHaveBeenCalled() + expect(maybeRemoveImageBlocks).toHaveBeenCalled() + + // Verify the structure of the result + // The result should be: original messages (except last N) + summary + last N messages + expect(result.length).toBe(messages.length + 1) // Original + summary + + // Check that the summary message was inserted correctly + const summaryMessage = result[result.length - N_MESSAGES_TO_KEEP - 1] + expect(summaryMessage.role).toBe("assistant") + expect(summaryMessage.content).toBe("This is a summary") + expect(summaryMessage.isSummary).toBe(true) + + // Check that the last N_MESSAGES_TO_KEEP messages are preserved + const lastMessages = messages.slice(-N_MESSAGES_TO_KEEP) + expect(result.slice(-N_MESSAGES_TO_KEEP)).toEqual(lastMessages) + }) + + it("should handle empty summary response", async () => { + // We need enough messages to trigger summarization + const messages: ApiMessage[] = [ + { role: "user", content: "Hello", ts: 1 }, + { role: "assistant", content: "Hi there", ts: 2 }, + { role: "user", content: "How are you?", ts: 3 }, + { role: "assistant", content: "I'm good", ts: 4 }, + { role: "user", content: "What's new?", ts: 5 }, + { role: "assistant", content: "Not much", ts: 6 }, + { role: "user", content: "Tell me more", ts: 7 }, + ] + + // Mock console.warn before we call the function + const originalWarn = console.warn + const mockWarn = jest.fn() + console.warn = mockWarn + + // Setup empty summary response + const emptyStream = (async function* () { + yield { type: "text" as const, text: "" } + })() + + // Create a new mock for createMessage that returns empty stream + const createMessageMock = jest.fn().mockReturnValue(emptyStream) + mockApiHandler.createMessage = createMessageMock as any + + // We need to mock maybeRemoveImageBlocks to return the expected messages + ;(maybeRemoveImageBlocks as jest.Mock).mockImplementationOnce((messages: any) => { + return messages.map(({ role, content }: { role: string; content: any }) => ({ role, content })) + }) + + const result = await summarizeConversation(messages, mockApiHandler) + + // Should return original messages when summary is empty + expect(result).toEqual(messages) + expect(mockWarn).toHaveBeenCalledWith("Received empty summary from API") + + // Restore console.warn + console.warn = originalWarn + }) + + it("should correctly format the request to the API", async () => { + const messages: ApiMessage[] = [ + { role: "user", content: "Hello", ts: 1 }, + { role: "assistant", content: "Hi there", ts: 2 }, + { role: "user", content: "How are you?", ts: 3 }, + { role: "assistant", content: "I'm good", ts: 4 }, + { role: "user", content: "What's new?", ts: 5 }, + { role: "assistant", content: "Not much", ts: 6 }, + { role: "user", content: "Tell me more", ts: 7 }, + ] + + await summarizeConversation(messages, mockApiHandler) + + // Verify the final request message + const expectedFinalMessage = { + role: "user", + content: "Summarize the conversation so far, as described in the prompt instructions.", + } + + // Verify that createMessage was called with the correct prompt + expect(mockApiHandler.createMessage).toHaveBeenCalledWith( + expect.stringContaining("Your task is to create a detailed summary of the conversation"), + expect.any(Array), + ) + + // Check that maybeRemoveImageBlocks was called with the correct messages + const mockCallArgs = (maybeRemoveImageBlocks as jest.Mock).mock.calls[0][0] as any[] + expect(mockCallArgs[mockCallArgs.length - 1]).toEqual(expectedFinalMessage) + }) +}) From 8ce7830722f346d4cfee674cc663c2ba3b9572e1 Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Wed, 14 May 2025 13:07:59 -0700 Subject: [PATCH 23/25] update translations, thanks Roo --- webview-ui/src/i18n/locales/ca/settings.json | 4 ++++ webview-ui/src/i18n/locales/de/settings.json | 4 ++++ webview-ui/src/i18n/locales/es/settings.json | 4 ++++ webview-ui/src/i18n/locales/fr/settings.json | 4 ++++ webview-ui/src/i18n/locales/hi/settings.json | 4 ++++ webview-ui/src/i18n/locales/it/settings.json | 4 ++++ webview-ui/src/i18n/locales/ja/settings.json | 4 ++++ webview-ui/src/i18n/locales/ko/settings.json | 4 ++++ webview-ui/src/i18n/locales/nl/settings.json | 4 ++++ webview-ui/src/i18n/locales/pl/settings.json | 4 ++++ webview-ui/src/i18n/locales/pt-BR/settings.json | 4 ++++ webview-ui/src/i18n/locales/ru/settings.json | 4 ++++ webview-ui/src/i18n/locales/tr/settings.json | 4 ++++ webview-ui/src/i18n/locales/vi/settings.json | 4 ++++ webview-ui/src/i18n/locales/zh-CN/settings.json | 4 ++++ webview-ui/src/i18n/locales/zh-TW/settings.json | 4 ++++ 16 files changed, 64 insertions(+) diff --git a/webview-ui/src/i18n/locales/ca/settings.json b/webview-ui/src/i18n/locales/ca/settings.json index c9e1cb5606..a6262f5c0d 100644 --- a/webview-ui/src/i18n/locales/ca/settings.json +++ b/webview-ui/src/i18n/locales/ca/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "Condensar intel·ligentment la finestra de context", + "description": "Utilitza una crida LLM per resumir la conversa anterior quan la finestra de context de la tasca està gairebé plena, en lloc d'eliminar missatges antics. Avís: el cost de resumir actualment no s'inclou en els costos d'API mostrats a la interfície." + }, "DIFF_STRATEGY_UNIFIED": { "name": "Utilitzar estratègia diff unificada experimental", "description": "Activar l'estratègia diff unificada experimental. Aquesta estratègia podria reduir el nombre de reintents causats per errors del model, però pot causar comportaments inesperats o edicions incorrectes. Activeu-la només si enteneu els riscos i esteu disposats a revisar acuradament tots els canvis." diff --git a/webview-ui/src/i18n/locales/de/settings.json b/webview-ui/src/i18n/locales/de/settings.json index aac308c4a1..c15e0955b3 100644 --- a/webview-ui/src/i18n/locales/de/settings.json +++ b/webview-ui/src/i18n/locales/de/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "Kontextfenster intelligent komprimieren", + "description": "Verwendet einen LLM-Aufruf, um das vorherige Gespräch zusammenzufassen, wenn das Kontextfenster der Aufgabe fast voll ist, anstatt alte Nachrichten zu verwerfen. Hinweis: Die Kosten für die Zusammenfassung sind derzeit nicht in den in der Benutzeroberfläche angezeigten API-Kosten enthalten." + }, "DIFF_STRATEGY_UNIFIED": { "name": "Experimentelle einheitliche Diff-Strategie verwenden", "description": "Aktiviert die experimentelle einheitliche Diff-Strategie. Diese Strategie könnte die Anzahl der durch Modellfehler verursachten Wiederholungen reduzieren, kann aber unerwartetes Verhalten oder falsche Bearbeitungen verursachen. Nur aktivieren, wenn du die Risiken verstehst und bereit bist, alle Änderungen sorgfältig zu überprüfen." diff --git a/webview-ui/src/i18n/locales/es/settings.json b/webview-ui/src/i18n/locales/es/settings.json index f862f13405..26047fadbf 100644 --- a/webview-ui/src/i18n/locales/es/settings.json +++ b/webview-ui/src/i18n/locales/es/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "Condensar inteligentemente la ventana de contexto", + "description": "Utiliza una llamada LLM para resumir la conversación anterior cuando la ventana de contexto de la tarea está casi llena, en lugar de eliminar mensajes antiguos. Aviso: el costo de resumir actualmente no está incluido en los costos de API mostrados en la interfaz." + }, "DIFF_STRATEGY_UNIFIED": { "name": "Usar estrategia de diff unificada experimental", "description": "Habilitar la estrategia de diff unificada experimental. Esta estrategia podría reducir el número de reintentos causados por errores del modelo, pero puede causar comportamientos inesperados o ediciones incorrectas. Habilítela solo si comprende los riesgos y está dispuesto a revisar cuidadosamente todos los cambios." diff --git a/webview-ui/src/i18n/locales/fr/settings.json b/webview-ui/src/i18n/locales/fr/settings.json index 9f694c5aa3..a5b6a48d78 100644 --- a/webview-ui/src/i18n/locales/fr/settings.json +++ b/webview-ui/src/i18n/locales/fr/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "Condenser intelligemment la fenêtre de contexte", + "description": "Utilise un appel LLM pour résumer la conversation précédente lorsque la fenêtre de contexte de la tâche est presque pleine, plutôt que de supprimer les anciens messages. Avertissement : le coût de la synthèse n'est actuellement pas inclus dans les coûts API affichés dans l'interface." + }, "DIFF_STRATEGY_UNIFIED": { "name": "Utiliser la stratégie diff unifiée expérimentale", "description": "Activer la stratégie diff unifiée expérimentale. Cette stratégie pourrait réduire le nombre de tentatives causées par des erreurs de modèle, mais peut provoquer des comportements inattendus ou des modifications incorrectes. Activez-la uniquement si vous comprenez les risques et êtes prêt à examiner attentivement tous les changements." diff --git a/webview-ui/src/i18n/locales/hi/settings.json b/webview-ui/src/i18n/locales/hi/settings.json index 0b21ed906b..aa5efaaf87 100644 --- a/webview-ui/src/i18n/locales/hi/settings.json +++ b/webview-ui/src/i18n/locales/hi/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "संदर्भ विंडो को बुद्धिमानी से संघनित करें", + "description": "जब कार्य का संदर्भ विंडो लगभग भर जाता है, तो पुराने संदेशों को हटाने के बजाय पिछली बातचीत को संक्षेप में प्रस्तुत करने के लिए LLM कॉल का उपयोग करता है। अस्वीकरण: संक्षेपण की लागत वर्तमान में UI में दिखाए गए API लागतों में शामिल नहीं है।" + }, "DIFF_STRATEGY_UNIFIED": { "name": "प्रायोगिक एकीकृत diff रणनीति का उपयोग करें", "description": "प्रायोगिक एकीकृत diff रणनीति सक्षम करें। यह रणनीति मॉडल त्रुटियों के कारण पुनः प्रयासों की संख्या को कम कर सकती है, लेकिन अप्रत्याशित व्यवहार या गलत संपादन का कारण बन सकती है। केवल तभी सक्षम करें जब आप जोखिमों को समझते हों और सभी परिवर्तनों की सावधानीपूर्वक समीक्षा करने के लिए तैयार हों।" diff --git a/webview-ui/src/i18n/locales/it/settings.json b/webview-ui/src/i18n/locales/it/settings.json index a3e210c480..67dc5c77b1 100644 --- a/webview-ui/src/i18n/locales/it/settings.json +++ b/webview-ui/src/i18n/locales/it/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "Condensa intelligentemente la finestra di contesto", + "description": "Utilizza una chiamata LLM per riassumere la conversazione precedente quando la finestra di contesto dell'attività è quasi piena, invece di eliminare i messaggi vecchi. Avviso: il costo della sintesi non è attualmente incluso nei costi API mostrati nell'interfaccia." + }, "DIFF_STRATEGY_UNIFIED": { "name": "Usa strategia diff unificata sperimentale", "description": "Abilita la strategia diff unificata sperimentale. Questa strategia potrebbe ridurre il numero di tentativi causati da errori del modello, ma può causare comportamenti imprevisti o modifiche errate. Abilitala solo se comprendi i rischi e sei disposto a rivedere attentamente tutte le modifiche." diff --git a/webview-ui/src/i18n/locales/ja/settings.json b/webview-ui/src/i18n/locales/ja/settings.json index 1f1ec12c54..4cf8f6d16e 100644 --- a/webview-ui/src/i18n/locales/ja/settings.json +++ b/webview-ui/src/i18n/locales/ja/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "コンテキストウィンドウをインテリジェントに圧縮する", + "description": "タスクのコンテキストウィンドウがほぼいっぱいになったとき、古いメッセージを削除する代わりに、LLM呼び出しを使用して過去の会話を要約します。免責事項:要約のコストは現在UIに表示されるAPIコストには含まれていません。" + }, "DIFF_STRATEGY_UNIFIED": { "name": "実験的な統合diff戦略を使用する", "description": "実験的な統合diff戦略を有効にします。この戦略はモデルエラーによる再試行の回数を減らす可能性がありますが、予期しない動作や不正確な編集を引き起こす可能性があります。リスクを理解し、すべての変更を注意深く確認する準備がある場合にのみ有効にしてください。" diff --git a/webview-ui/src/i18n/locales/ko/settings.json b/webview-ui/src/i18n/locales/ko/settings.json index 95195ff0a2..72c0e160a1 100644 --- a/webview-ui/src/i18n/locales/ko/settings.json +++ b/webview-ui/src/i18n/locales/ko/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "컨텍스트 창을 지능적으로 압축", + "description": "작업의 컨텍스트 창이 거의 가득 찼을 때 이전 메시지를 삭제하는 대신 LLM 호출을 사용하여 이전 대화를 요약합니다. 참고: 요약 비용은 현재 UI에 표시된 API 비용에 포함되지 않습니다." + }, "DIFF_STRATEGY_UNIFIED": { "name": "실험적 통합 diff 전략 사용", "description": "실험적 통합 diff 전략을 활성화합니다. 이 전략은 모델 오류로 인한 재시도 횟수를 줄일 수 있지만 예기치 않은 동작이나 잘못된 편집을 일으킬 수 있습니다. 위험을 이해하고 모든 변경 사항을 신중하게 검토할 의향이 있는 경우에만 활성화하십시오." diff --git a/webview-ui/src/i18n/locales/nl/settings.json b/webview-ui/src/i18n/locales/nl/settings.json index e812d78f26..54ecc15637 100644 --- a/webview-ui/src/i18n/locales/nl/settings.json +++ b/webview-ui/src/i18n/locales/nl/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "Contextvenster intelligent comprimeren", + "description": "Gebruikt een LLM-aanroep om eerdere gesprekken samen te vatten wanneer het contextvenster van de taak bijna vol is, in plaats van oude berichten te verwijderen. Let op: de kosten van het samenvatten zijn momenteel niet inbegrepen in de API-kosten die in de interface worden getoond." + }, "DIFF_STRATEGY_UNIFIED": { "name": "Experimentele unified diff-strategie gebruiken", "description": "Schakel de experimentele unified diff-strategie in. Deze strategie kan het aantal herhalingen door model fouten verminderen, maar kan onverwacht gedrag of onjuiste bewerkingen veroorzaken. Alleen inschakelen als je de risico's begrijpt en wijzigingen zorgvuldig wilt controleren." diff --git a/webview-ui/src/i18n/locales/pl/settings.json b/webview-ui/src/i18n/locales/pl/settings.json index 343ce01397..14b3a1161b 100644 --- a/webview-ui/src/i18n/locales/pl/settings.json +++ b/webview-ui/src/i18n/locales/pl/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "Inteligentnie kondensuj okno kontekstu", + "description": "Używa wywołania LLM do podsumowania wcześniejszej rozmowy, gdy okno kontekstu zadania jest prawie pełne, zamiast usuwać stare wiadomości. Zastrzeżenie: koszt podsumowania nie jest obecnie uwzględniony w kosztach API pokazywanych w interfejsie." + }, "DIFF_STRATEGY_UNIFIED": { "name": "Użyj eksperymentalnej ujednoliconej strategii diff", "description": "Włącz eksperymentalną ujednoliconą strategię diff. Ta strategia może zmniejszyć liczbę ponownych prób spowodowanych błędami modelu, ale może powodować nieoczekiwane zachowanie lub nieprawidłowe edycje. Włącz tylko jeśli rozumiesz ryzyko i jesteś gotów dokładnie przeglądać wszystkie zmiany." diff --git a/webview-ui/src/i18n/locales/pt-BR/settings.json b/webview-ui/src/i18n/locales/pt-BR/settings.json index 0c66a21847..eeaefd06e8 100644 --- a/webview-ui/src/i18n/locales/pt-BR/settings.json +++ b/webview-ui/src/i18n/locales/pt-BR/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "Condensar inteligentemente a janela de contexto", + "description": "Usa uma chamada LLM para resumir a conversa anterior quando a janela de contexto da tarefa está quase cheia, em vez de descartar mensagens antigas. Aviso: o custo de resumir não está atualmente incluído nos custos de API mostrados na interface." + }, "DIFF_STRATEGY_UNIFIED": { "name": "Usar estratégia diff unificada experimental", "description": "Ativar a estratégia diff unificada experimental. Esta estratégia pode reduzir o número de novas tentativas causadas por erros do modelo, mas pode causar comportamento inesperado ou edições incorretas. Ative apenas se compreender os riscos e estiver disposto a revisar cuidadosamente todas as alterações." diff --git a/webview-ui/src/i18n/locales/ru/settings.json b/webview-ui/src/i18n/locales/ru/settings.json index 376763a7d8..66e74b1332 100644 --- a/webview-ui/src/i18n/locales/ru/settings.json +++ b/webview-ui/src/i18n/locales/ru/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "Интеллектуальное сжатие контекстного окна", + "description": "Использует вызов LLM для обобщения предыдущего разговора, когда контекстное окно задачи почти заполнено, вместо удаления старых сообщений. Примечание: стоимость обобщения в настоящее время не включена в стоимость API, отображаемую в интерфейсе." + }, "DIFF_STRATEGY_UNIFIED": { "name": "Использовать экспериментальную стратегию унифицированного диффа", "description": "Включает экспериментальную стратегию унифицированного диффа. Может уменьшить количество повторных попыток из-за ошибок модели, но может привести к неожиданному поведению или неверным правкам. Включайте только если готовы внимательно проверять все изменения." diff --git a/webview-ui/src/i18n/locales/tr/settings.json b/webview-ui/src/i18n/locales/tr/settings.json index 1d23b7e04e..2856d2eeb2 100644 --- a/webview-ui/src/i18n/locales/tr/settings.json +++ b/webview-ui/src/i18n/locales/tr/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "Bağlam penceresini akıllıca sıkıştır", + "description": "Görevin bağlam penceresi neredeyse dolduğunda, eski mesajları atmak yerine önceki konuşmayı özetlemek için bir LLM çağrısı kullanır. Not: Özetleme maliyeti şu anda arayüzde gösterilen API maliyetlerine dahil değildir." + }, "DIFF_STRATEGY_UNIFIED": { "name": "Deneysel birleştirilmiş diff stratejisini kullan", "description": "Deneysel birleştirilmiş diff stratejisini etkinleştir. Bu strateji, model hatalarından kaynaklanan yeniden deneme sayısını azaltabilir, ancak beklenmeyen davranışlara veya hatalı düzenlemelere neden olabilir. Yalnızca riskleri anlıyorsanız ve tüm değişiklikleri dikkatlice incelemeye istekliyseniz etkinleştirin." diff --git a/webview-ui/src/i18n/locales/vi/settings.json b/webview-ui/src/i18n/locales/vi/settings.json index f964bf4b7a..b2ce14552b 100644 --- a/webview-ui/src/i18n/locales/vi/settings.json +++ b/webview-ui/src/i18n/locales/vi/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "Nén cửa sổ ngữ cảnh một cách thông minh", + "description": "Sử dụng một lệnh gọi LLM để tóm tắt cuộc trò chuyện trước đó khi cửa sổ ngữ cảnh của tác vụ gần đầy, thay vì loại bỏ các tin nhắn cũ. Lưu ý: chi phí tóm tắt hiện không được tính vào chi phí API hiển thị trong giao diện người dùng." + }, "DIFF_STRATEGY_UNIFIED": { "name": "Sử dụng chiến lược diff thống nhất thử nghiệm", "description": "Bật chiến lược diff thống nhất thử nghiệm. Chiến lược này có thể giảm số lần thử lại do lỗi mô hình nhưng có thể gây ra hành vi không mong muốn hoặc chỉnh sửa không chính xác. Chỉ bật nếu bạn hiểu rõ các rủi ro và sẵn sàng xem xét cẩn thận tất cả các thay đổi." diff --git a/webview-ui/src/i18n/locales/zh-CN/settings.json b/webview-ui/src/i18n/locales/zh-CN/settings.json index ba14e48762..cd42bc4bcf 100644 --- a/webview-ui/src/i18n/locales/zh-CN/settings.json +++ b/webview-ui/src/i18n/locales/zh-CN/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "智能压缩上下文窗口", + "description": "当任务上下文窗口接近填满时,使用 LLM 调用来总结过去的对话,而不是删除旧消息。注意:目前 UI 中显示的 API 费用不包括总结的成本。" + }, "DIFF_STRATEGY_UNIFIED": { "name": "启用diff更新工具", "description": "可减少因模型错误导致的重复尝试,但可能引发意外操作。启用前请确保理解风险并会仔细检查所有修改。" diff --git a/webview-ui/src/i18n/locales/zh-TW/settings.json b/webview-ui/src/i18n/locales/zh-TW/settings.json index 78c994457e..4d0ec4420d 100644 --- a/webview-ui/src/i18n/locales/zh-TW/settings.json +++ b/webview-ui/src/i18n/locales/zh-TW/settings.json @@ -396,6 +396,10 @@ }, "experimental": { "warning": "⚠️", + "AUTO_CONDENSE_CONTEXT": { + "name": "智慧壓縮上下文視窗", + "description": "當工作的上下文視窗接近填滿時,使用 LLM 呼叫來摘要過去的對話,而非捨棄舊訊息。注意:目前 UI 中顯示的 API 費用並未包含摘要的成本。" + }, "DIFF_STRATEGY_UNIFIED": { "name": "使用實驗性統一差異比對策略", "description": "啟用實驗性的統一差異比對策略。此策略可能減少因模型錯誤而導致的重試次數,但也可能導致意外行為或錯誤的編輯。請務必了解風險,並願意仔細檢查所有變更後再啟用。" From 76c274f2973f37954939fb86bcb2744fb32d8292 Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Wed, 14 May 2025 13:08:48 -0700 Subject: [PATCH 24/25] nit --- src/core/condense/index.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/core/condense/index.ts b/src/core/condense/index.ts index 6aed4f2550..2a88dbfcce 100644 --- a/src/core/condense/index.ts +++ b/src/core/condense/index.ts @@ -49,8 +49,6 @@ Output only the summary of the conversation so far, without any additional comme * Summarizes the conversation messages using an LLM call * * @param {ApiMessage[]} messages - The conversation messages - * @param {number} totalTokens - The total number of tokens in the conversation, excluding the last user message. - * @param {number} contextWindow - The context window size. * @param {ApiHandler} apiHandler - The API handler to use for token counting. * @returns {ApiMessage[]} - The input messages, potentially including a new summary message before the last message. */ From a11badb620a05cf1d176c454f4a4a97aa299655a Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Wed, 14 May 2025 13:26:28 -0700 Subject: [PATCH 25/25] add changeset --- .changeset/large-bags-send.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/large-bags-send.md diff --git a/.changeset/large-bags-send.md b/.changeset/large-bags-send.md new file mode 100644 index 0000000000..93e49b2388 --- /dev/null +++ b/.changeset/large-bags-send.md @@ -0,0 +1,5 @@ +--- +"roo-cline": patch +--- + +Adds experimental feature to intelligently condense the task context