diff --git a/src/core/sliding-window/__tests__/context-compression.test.ts b/src/core/sliding-window/__tests__/context-compression.test.ts new file mode 100644 index 00000000000..51d8a517a5e --- /dev/null +++ b/src/core/sliding-window/__tests__/context-compression.test.ts @@ -0,0 +1,625 @@ +import { describe, it } from "mocha" +import "should" +import cloneDeep from "clone-deep" +import { Anthropic } from "@anthropic-ai/sdk" +import * as path from "path" +import { compressConversationHistory, compressEnvironmentDetails } from "../core/sliding-window/context-compression" + +// Helper function to generate debug filenames +function getDebugFilenames(testType: string): { before: string; after: string } { + const timestamp = Date.now() + const date = new Date(timestamp) + const dateStr = date.toISOString().split("T")[0].split("-").slice(1).join("-") // Get MM-DD format + const timeStr = date.toTimeString().split(" ")[0].replace(/:/g, "-") // Get HH-MM-SS format + + const baseFilename = `${testType}_${timestamp}_${dateStr}--${timeStr}.json` + return { + before: path.join("debug", `conv_before_compression_${baseFilename}`), + after: path.join("debug", `conv_after_compression_${baseFilename}`), + } +} + +// Helper function to get the last message with environment details +function getLastEnvironmentMessage(messages: Anthropic.Messages.MessageParam[]): string { + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i] + if (Array.isArray(msg.content)) { + for (const block of msg.content) { + if (block.type === "text" && block.text.includes("")) { + return block.text + } + } + } + } + return "" +} + +// Helper function to get all non-last environment messages +function getPriorEnvironmentMessages(messages: Anthropic.Messages.MessageParam[]): string[] { + const result: string[] = [] + let lastEnvIndex = -1 + + // Find last environment message first + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i] + if (Array.isArray(msg.content)) { + for (const block of msg.content) { + if (block.type === "text" && block.text.includes("")) { + lastEnvIndex = i + break + } + } + } + if (lastEnvIndex !== -1) { + break + } + } + + // Collect all prior environment messages + for (let i = 0; i < lastEnvIndex; i++) { + const msg = messages[i] + if (Array.isArray(msg.content)) { + for (const block of msg.content) { + if (block.type === "text" && block.text.includes("")) { + result.push(block.text) + } + } + } + } + + return result +} + +// Helper function to verify environment details compression +function verifyEnvironmentCompression( + beforeObj: Anthropic.Messages.MessageParam[], + afterObj: Anthropic.Messages.MessageParam[], +): void { + // Get last and prior messages after compression + const lastMsg = getLastEnvironmentMessage(afterObj) + const priorMsgs = getPriorEnvironmentMessages(afterObj) + + // Verify prior messages only have UTC UNIX timestamp + for (const msg of priorMsgs) { + should(msg).match(/UTC UNIX: \d+/) + should(msg).not.match(/VSCode Visible Files/) + should(msg).not.match(/VSCode Open Tabs/) + should(msg).not.match(/Current Working Directory/) + } + + // Verify last message has full details + should(lastMsg).match(/\d+\/\d+\/\d+,\s*(?:\d+:\d+:\d+(?:\s*[AP]M)?|\d+:\d+:\d+)/) + + // Verify original structure is preserved in before object + const lastBeforeMsg = getLastEnvironmentMessage(beforeObj) + const priorBeforeMsgs = getPriorEnvironmentMessages(beforeObj) + + // Original messages should have their full content + for (const msg of priorBeforeMsgs) { + should(msg).match(/\d+\/\d+\/\d+,\s*(?:\d+:\d+:\d+(?:\s*[AP]M)?|\d+:\d+:\d+)/) + } + should(lastBeforeMsg).match(/\d+\/\d+\/\d+,\s*(?:\d+:\d+:\d+(?:\s*[AP]M)?|\d+:\d+:\d+)/) +} + +describe("Context Compression", () => { + describe("Conversation History Compression", () => { + it("should compress conversation history correctly", () => { + // Create test messages with environment details + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "text", + text: "message 1\n\n# VSCode Visible Files\nfile1.ts\n\n# Current Time\n2/14/2025, 6:33:59 PM (America/Los_Angeles, UTC-8:00)\n\n# Current Mode\nACT MODE\n", + } as Anthropic.Messages.TextBlockParam, + ], + }, + { + role: "assistant", + content: [ + { + type: "text", + text: "I'll help with that.", + } as Anthropic.Messages.TextBlockParam, + ], + }, + { + role: "user", + content: [ + { + type: "text", + text: "message 2\n\n# VSCode Visible Files\nfile2.ts\n\n# Current Time\n2/14/2025, 6:34:00 PM (America/Los_Angeles, UTC-8:00)\n\n# Current Mode\nPLAN MODE\n", + } as Anthropic.Messages.TextBlockParam, + ], + }, + ] + + // Make a deep copy for comparison + const originalMessages = cloneDeep(messages) + + // Compress and get serialized structures with debug files + const { before: beforeFile, after: afterFile } = getDebugFilenames("basic_compression") + const { before, after } = compressConversationHistory(messages, "test_task", beforeFile, afterFile) + + // Parse the structures + const beforeObj = JSON.parse(before) + const afterObj = JSON.parse(after) + + // Verify compression using helper functions + verifyEnvironmentCompression(beforeObj, afterObj) + + // Additional verification specific to this test + const lastMsg = getLastEnvironmentMessage(afterObj) + should(lastMsg).match(/VSCode Visible Files\s*file2\.ts/) + should(lastMsg).match(/Current Mode\s*PLAN MODE/) + }) + + it("should accumulate sections in last message", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "text", + text: "message 1\n\n# Current Time\n2/14/2025, 6:33:59 PM (America/Los_Angeles, UTC-8:00)\n\n# VSCode Visible Files\nfile1.ts\n", + } as Anthropic.Messages.TextBlockParam, + ], + }, + { + role: "user", + content: [ + { + type: "text", + text: "message 2\n\n# Current Time\n2/14/2025, 6:34:00 PM (America/Los_Angeles, UTC-8:00)\n\n# VSCode Open Tabs\ntab1.ts\n", + } as Anthropic.Messages.TextBlockParam, + ], + }, + { + role: "user", + content: [ + { + type: "text", + text: "message 3\n\n# Current Time\n2/14/2025, 6:34:01 PM (America/Los_Angeles, UTC-8:00)\n", + } as Anthropic.Messages.TextBlockParam, + ], + }, + ] + + // Make a deep copy for comparison + const originalMessages = cloneDeep(messages) + + // Compress and get serialized structures with debug files + const { before: beforeFile, after: afterFile } = getDebugFilenames("section_accumulation") + const { before, after } = compressConversationHistory(messages, "test_task", beforeFile, afterFile) + + // Parse the structures + const beforeObj = JSON.parse(before) + const afterObj = JSON.parse(after) + + // Verify compression using helper functions + verifyEnvironmentCompression(beforeObj, afterObj) + + // Additional verification specific to this test + const lastMsg = getLastEnvironmentMessage(afterObj) + should(lastMsg).match(/VSCode Visible Files\s*file1\.ts/) + should(lastMsg).match(/VSCode Open Tabs\s*tab1\.ts/) + }) + + it("should handle 12-hour and 24-hour time formats", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "text", + text: "message 1\n\n# Current Time\n2/14/2025, 6:33:59 PM (America/Los_Angeles, UTC-8:00)\n", + } as Anthropic.Messages.TextBlockParam, + ], + }, + { + role: "user", + content: [ + { + type: "text", + text: "message 2\n\n# Current Time\n2/14/2025, 18:34:00 (America/Los_Angeles, UTC-8:00)\n", + } as Anthropic.Messages.TextBlockParam, + ], + }, + ] + + // Make a deep copy for comparison + const originalMessages = cloneDeep(messages) + + // Compress and get serialized structures with debug files + const { before: beforeFile, after: afterFile } = getDebugFilenames("time_formats") + const { before, after } = compressConversationHistory(messages, "test_task", beforeFile, afterFile) + + // Parse the structures + const beforeObj = JSON.parse(before) + const afterObj = JSON.parse(after) + + // Verify compression using helper functions + verifyEnvironmentCompression(beforeObj, afterObj) + + // Additional verification specific to this test + const lastMsg = getLastEnvironmentMessage(afterObj) + should(lastMsg).match(/2\/14\/2025,\s*18:34:00/) + }) + + it("should handle working directory content", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "text", + text: "message 1\n\n# Current Time\n2/14/2025, 6:33:59 PM (America/Los_Angeles, UTC-8:00)\n\n# Current Working Directory (/path/to/dir) Files\nfile1.txt\ndir1/\n subfile1.txt\n", + } as Anthropic.Messages.TextBlockParam, + ], + }, + { + role: "user", + content: [ + { + type: "text", + text: "message 2\n\n# Current Time\n2/14/2025, 6:34:00 PM (America/Los_Angeles, UTC-8:00)\n\n# Current Working Directory (/path/to/dir) Files\nfile1.txt\ndir1/\n subfile1.txt\n subfile2.txt\n", + } as Anthropic.Messages.TextBlockParam, + ], + }, + ] + + // Make a deep copy for comparison + const originalMessages = cloneDeep(messages) + + // Compress and get serialized structures with debug files + const { before: beforeFile, after: afterFile } = getDebugFilenames("working_directory") + const { before, after } = compressConversationHistory(messages, "test_task", beforeFile, afterFile) + + // Parse the structures + const beforeObj = JSON.parse(before) + const afterObj = JSON.parse(after) + + // Verify compression using helper functions + verifyEnvironmentCompression(beforeObj, afterObj) + + // Additional verification specific to this test + const lastMsg = getLastEnvironmentMessage(afterObj) + should(lastMsg).match(/Current Working Directory/) + should(lastMsg).match(/file1\.txt/) + should(lastMsg).match(/dir1\/\s*subfile1\.txt\s*subfile2\.txt/) + }) + + it("should exclude empty VSCode sections", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "text", + text: "message 1\n\n# Current Time\n2/14/2025, 6:33:59 PM (America/Los_Angeles, UTC-8:00)\n\n# VSCode Visible Files\n(No visible files)\n\n# VSCode Open Tabs\n(No open tabs)\n\n# Current Mode\nACT MODE\n", + } as Anthropic.Messages.TextBlockParam, + ], + }, + { + role: "user", + content: [ + { + type: "text", + text: "message 2\n\n# Current Time\n2/14/2025, 6:34:00 PM (America/Los_Angeles, UTC-8:00)\n\n# VSCode Visible Files\n(No visible files)\n\n# VSCode Open Tabs\n(No open tabs)\n", + } as Anthropic.Messages.TextBlockParam, + ], + }, + ] + + // Make a deep copy for comparison + const originalMessages = cloneDeep(messages) + + // Compress and get serialized structures with debug files + const { before: beforeFile, after: afterFile } = getDebugFilenames("empty_vscode") + const { before, after } = compressConversationHistory(messages, "test_task", beforeFile, afterFile) + + // Parse the structures + const beforeObj = JSON.parse(before) + const afterObj = JSON.parse(after) + + // Verify compression using helper functions + verifyEnvironmentCompression(beforeObj, afterObj) + + // Additional verification specific to this test + const lastMsg = getLastEnvironmentMessage(afterObj) + should(lastMsg).not.match(/VSCode Visible Files/) + should(lastMsg).not.match(/VSCode Open Tabs/) + }) + + it("should remove VSCode sections that become empty", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "text", + text: "message 1\n\n# Current Time\n2/14/2025, 6:33:59 PM (America/Los_Angeles, UTC-8:00)\n\n# VSCode Visible Files\nfile1.ts\n\n# VSCode Open Tabs\ntab1.ts\n", + } as Anthropic.Messages.TextBlockParam, + ], + }, + { + role: "user", + content: [ + { + type: "text", + text: "message 2\n\n# Current Time\n2/14/2025, 6:34:00 PM (America/Los_Angeles, UTC-8:00)\n\n# VSCode Visible Files\n(No visible files)\n\n# VSCode Open Tabs\n(No open tabs)\n", + } as Anthropic.Messages.TextBlockParam, + ], + }, + ] + + // Make a deep copy for comparison + const originalMessages = cloneDeep(messages) + + // Compress and get serialized structures with debug files + const { before: beforeFile, after: afterFile } = getDebugFilenames("vscode_becomes_empty") + const { before, after } = compressConversationHistory(messages, "test_task", beforeFile, afterFile) + + // Parse the structures + const beforeObj = JSON.parse(before) + const afterObj = JSON.parse(after) + + // Verify compression using helper functions + verifyEnvironmentCompression(beforeObj, afterObj) + + // Additional verification specific to this test + const lastMsg = getLastEnvironmentMessage(afterObj) + should(lastMsg).not.match(/VSCode Visible Files/) + should(lastMsg).not.match(/VSCode Open Tabs/) + }) + + it("should handle mixed VSCode section states", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "text", + text: "message 1\n\n# Current Time\n2/14/2025, 6:33:59 PM (America/Los_Angeles, UTC-8:00)\n\n# VSCode Visible Files\nfile1.ts\n\n# VSCode Open Tabs\n(No open tabs)\n", + } as Anthropic.Messages.TextBlockParam, + ], + }, + { + role: "user", + content: [ + { + type: "text", + text: "message 2\n\n# Current Time\n2/14/2025, 6:34:00 PM (America/Los_Angeles, UTC-8:00)\n\n# VSCode Visible Files\nfile2.ts\n\n# VSCode Open Tabs\n(No open tabs)\n", + } as Anthropic.Messages.TextBlockParam, + ], + }, + ] + + // Make a deep copy for comparison + const originalMessages = cloneDeep(messages) + + // Compress and get serialized structures with debug files + const { before: beforeFile, after: afterFile } = getDebugFilenames("mixed_vscode_states") + const { before, after } = compressConversationHistory(messages, "test_task", beforeFile, afterFile) + + // Parse the structures + const beforeObj = JSON.parse(before) + const afterObj = JSON.parse(after) + + // Verify compression using helper functions + verifyEnvironmentCompression(beforeObj, afterObj) + + // Additional verification specific to this test + const lastMsg = getLastEnvironmentMessage(afterObj) + should(lastMsg).match(/VSCode Visible Files\s*file2\.ts/) + should(lastMsg).not.match(/VSCode Open Tabs/) + }) + + it("should never add empty VSCode placeholders", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "text", + text: "message 1\n\n# Current Time\n2/14/2025, 6:33:59 PM (America/Los_Angeles, UTC-8:00)\n", + } as Anthropic.Messages.TextBlockParam, + ], + }, + { + role: "user", + content: [ + { + type: "text", + text: "message 2\n\n# Current Time\n2/14/2025, 6:34:00 PM (America/Los_Angeles, UTC-8:00)\n\n# VSCode Visible Files\nfile1.ts\n", + } as Anthropic.Messages.TextBlockParam, + ], + }, + { + role: "user", + content: [ + { + type: "text", + text: "message 3\n\n# Current Time\n2/14/2025, 6:34:01 PM (America/Los_Angeles, UTC-8:00)\n\n# VSCode Visible Files\n(No visible files)\n\n# VSCode Open Tabs\n(No open tabs)\n", + } as Anthropic.Messages.TextBlockParam, + ], + }, + ] + + // Make a deep copy for comparison + const originalMessages = cloneDeep(messages) + + // Compress and get serialized structures with debug files + const { before: beforeFile, after: afterFile } = getDebugFilenames("no_empty_placeholders") + const { before, after } = compressConversationHistory(messages, "test_task", beforeFile, afterFile) + + // Parse the structures + const beforeObj = JSON.parse(before) + const afterObj = JSON.parse(after) + + // Verify compression using helper functions + verifyEnvironmentCompression(beforeObj, afterObj) + + // Additional verification specific to this test + const lastMsg = getLastEnvironmentMessage(afterObj) + should(lastMsg).not.match(/\(No visible files\)/) + should(lastMsg).not.match(/\(No open tabs\)/) + should(lastMsg).not.match(/VSCode Visible Files/) + should(lastMsg).not.match(/VSCode Open Tabs/) + }) + + it("should remove all but the last task resumption message when last message is task resumption", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "text", + text: "[TASK RESUMPTION] This task was interrupted 5 minutes ago.\nmessage 1\n\n# Current Time\n2/14/2025, 6:33:59 PM (America/Los_Angeles, UTC-8:00)\n", + } as Anthropic.Messages.TextBlockParam, + ], + }, + { + role: "user", + content: [ + { + type: "text", + text: "[TASK RESUMPTION] This task was interrupted 10 minutes ago.\nmessage 2\n\n# Current Time\n2/14/2025, 6:34:00 PM (America/Los_Angeles, UTC-8:00)\n", + } as Anthropic.Messages.TextBlockParam, + ], + }, + { + role: "user", + content: [ + { + type: "text", + text: "[TASK RESUMPTION] This task was interrupted 15 minutes ago.\nmessage 3\n\n# Current Time\n2/14/2025, 6:34:01 PM (America/Los_Angeles, UTC-8:00)\n", + } as Anthropic.Messages.TextBlockParam, + ], + }, + ] + + // Make a deep copy for comparison + const originalMessages = cloneDeep(messages) + + // Compress and get serialized structures with debug files + const { before: beforeFile, after: afterFile } = getDebugFilenames("task_resumption") + const { before, after } = compressConversationHistory(messages, "test_task", beforeFile, afterFile) + + // Parse the structures + const beforeObj = JSON.parse(before) + const afterObj = JSON.parse(after) + + // Count task resumption messages in before object + let beforeCount = 0 + for (const msg of beforeObj) { + if (Array.isArray(msg.content)) { + for (const block of msg.content) { + if (block.type === "text" && block.text.includes("[TASK RESUMPTION]")) { + beforeCount++ + } + } + } + } + + // Count task resumption messages in after object + let afterCount = 0 + let lastResumptionMsg = "" + for (const msg of afterObj) { + if (Array.isArray(msg.content)) { + for (const block of msg.content) { + if (block.type === "text" && block.text.includes("[TASK RESUMPTION]")) { + afterCount++ + lastResumptionMsg = block.text + } + } + } + } + + // Verify compression + should(beforeCount).equal(3) + should(afterCount).equal(1) + should(lastResumptionMsg).match(/\[TASK RESUMPTION\] This task was interrupted 15 minutes ago/) + }) + + it("should preserve last task resumption message when last message is not task resumption", () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "text", + text: "[TASK RESUMPTION] This task was interrupted 5 minutes ago.\nmessage 1\n\n# Current Time\n2/14/2025, 6:33:59 PM (America/Los_Angeles, UTC-8:00)\n", + } as Anthropic.Messages.TextBlockParam, + ], + }, + { + role: "user", + content: [ + { + type: "text", + text: "[TASK RESUMPTION] This task was interrupted 10 minutes ago.\nmessage 2\n\n# Current Time\n2/14/2025, 6:34:00 PM (America/Los_Angeles, UTC-8:00)\n", + } as Anthropic.Messages.TextBlockParam, + ], + }, + { + role: "user", + content: [ + { + type: "text", + text: "regular message\n\n# Current Time\n2/14/2025, 6:34:01 PM (America/Los_Angeles, UTC-8:00)\n", + } as Anthropic.Messages.TextBlockParam, + ], + }, + ] + + // Make a deep copy for comparison + const originalMessages = cloneDeep(messages) + + // Compress and get serialized structures with debug files + const { before: beforeFile, after: afterFile } = getDebugFilenames("task_resumption_non_last") + const { before, after } = compressConversationHistory(messages, "test_task", beforeFile, afterFile) + + // Parse the structures + const beforeObj = JSON.parse(before) + const afterObj = JSON.parse(after) + + // Count task resumption messages in before object + let beforeCount = 0 + for (const msg of beforeObj) { + if (Array.isArray(msg.content)) { + for (const block of msg.content) { + if (block.type === "text" && block.text.includes("[TASK RESUMPTION]")) { + beforeCount++ + } + } + } + } + + // Count task resumption messages in after object and find last message + let afterCount = 0 + let lastResumptionMsg = "" + let lastMsg = "" + for (const msg of afterObj) { + if (Array.isArray(msg.content)) { + for (const block of msg.content) { + if (block.type === "text") { + lastMsg = block.text + if (block.text.includes("[TASK RESUMPTION]")) { + afterCount++ + lastResumptionMsg = block.text + } + } + } + } + } + + // Verify compression + should(beforeCount).equal(2) + should(afterCount).equal(1) + should(lastResumptionMsg).match(/\[TASK RESUMPTION\] This task was interrupted 10 minutes ago/) + should(lastMsg).match(/regular message/) + }) + }) +}) diff --git a/src/core/sliding-window/context-compression.ts b/src/core/sliding-window/context-compression.ts new file mode 100644 index 00000000000..d757af86d8f --- /dev/null +++ b/src/core/sliding-window/context-compression.ts @@ -0,0 +1,308 @@ +import { Anthropic } from "@anthropic-ai/sdk" +import fs from "fs/promises" +import * as path from "path" + +function parseTimeToUnix(timeStr: string): string { + // Match both formats: + // "2/14/2025, 6:33:59 PM (America/Los_Angeles, UTC-8:00)" // 12-hour + // "2/14/2025, 18:33:59 (America/Los_Angeles, UTC-8:00)" // 24-hour + const match = timeStr.match(/^(\d+)\/(\d+)\/(\d+),\s*(\d+):(\d+):(\d+)(?:\s*(AM|PM))?\s*\(/) + if (!match) { + return timeStr + } + + const [_, month, day, year, hour, minute, second, period] = match + + // Handle hours based on format + let hours = parseInt(hour) + if (period) { + // 12-hour format + if (period === "PM" && hours !== 12) { + hours += 12 + } + if (period === "AM" && hours === 12) { + hours = 0 + } + } + // else: 24-hour format, use hours as-is + + try { + const date = new Date( + parseInt(year), + parseInt(month) - 1, // months are 0-based + parseInt(day), + hours, + parseInt(minute), + parseInt(second), + ) + return `UTC UNIX: ${Math.floor(date.getTime() / 1000)}` + } catch (e) { + return timeStr + } +} + +function hasTaskResumption(message: Anthropic.Messages.MessageParam): boolean { + if (!Array.isArray(message.content)) { + return false + } + + for (const block of message.content) { + if (block.type === "text" && block.text.includes("[TASK RESUMPTION]")) { + return true + } + } + return false +} + +function findLastTaskResumption(messages: Anthropic.Messages.MessageParam[]): number { + let lastIndex = -1 + for (let i = 0; i < messages.length; i++) { + if (hasTaskResumption(messages[i])) { + lastIndex = i + } + } + return lastIndex +} + +function compressTaskResumption(messages: Anthropic.Messages.MessageParam[]): void { + const lastIndex = findLastTaskResumption(messages) + if (lastIndex === -1) { + return + } + + // Remove task resumption messages except for the last one + for (let i = 0; i < messages.length; i++) { + if (i !== lastIndex && hasTaskResumption(messages[i])) { + // Remove the task resumption part but keep environment details + if (Array.isArray(messages[i].content)) { + const newContent: ( + | Anthropic.Messages.TextBlockParam + | Anthropic.Messages.ImageBlockParam + | Anthropic.Messages.ToolUseBlockParam + | Anthropic.Messages.ToolResultBlockParam + )[] = [] + for (const content of messages[i].content) { + if (typeof content === "string" || content.type !== "text") { + continue + } + + // Extract environment details if present + const envMatch = content.text.match(/.*<\/environment_details>/s) + if (envMatch) { + newContent.push({ + type: "text", + text: envMatch[0], + } as Anthropic.Messages.TextBlockParam) + } + } + messages[i].content = newContent + } + } + } +} + +function hasEnvironmentDetails(message: Anthropic.Messages.MessageParam): boolean { + if (!Array.isArray(message.content)) { + return false + } + + for (const block of message.content) { + if (block.type === "text" && block.text.includes("")) { + return true + } + } + return false +} + +function findLastEnvironmentMessage(messages: Anthropic.Messages.MessageParam[]): number { + let lastIndex = -1 + for (let i = 0; i < messages.length; i++) { + if (hasEnvironmentDetails(messages[i])) { + lastIndex = i + } + } + return lastIndex +} + +function compressEnvironmentSection( + text: string, + isLastMessage: boolean, + accumulatedMatches: Map, +): string { + // Match the entire text structure using DOTALL mode + const fullMatch = text.match(/^(.*?)(\n)(.*?)(\n<\/environment_details>)(.*?)$/s) + if (!fullMatch) { + return text + } + + const [_, beforeEnv, envStart, envContent, envEnd, afterEnv] = fullMatch + + if (isLastMessage) { + console.log("Last Message Environment Details Before:", envContent) + } + + function handleSection(heading: string, content: string, isLast: boolean): string { + if (!isLast) { + return "" + } + return `# ${heading}\n${content}\n` + } + + function handleTimeSection(heading: string, content: string, isLast: boolean): string { + if (isLast) { + return `# ${heading}\n${content}\n` + } + return `# ${heading}\n${parseTimeToUnix(content.trim())}\n` + } + + // Define sections with their headings + const sectionMatches = [ + { heading: "Current Working Directory \\([^)]+\\) Files", handle: handleSection }, + { heading: "VSCode Visible Files", handle: handleSection }, + { heading: "VSCode Open Tabs", handle: handleSection }, + { heading: "Current Mode", handle: handleSection }, + { heading: "Current Time", handle: handleTimeSection }, + ] + + // Process each section + let processedContent = envContent + const patternEnd = "(?=\\n# |$)" + + for (const section of sectionMatches) { + // Use DOTALL mode for section matching + const regex = new RegExp(`\\n*#\\s*${section.heading}\\s*?\\n(.*?)${patternEnd}`, "s") + const match = processedContent.match(regex) + + console.log(`Processing section "${section.heading}":`) + console.log("- Regex pattern:", regex) + console.log("- Match found:", !!match) + if (match) { + console.log("- Matched content:", match[0]) + } + + if (match) { + const content = match[1].trim() + // Skip empty VSCode sections + if (section.heading.includes("VSCode") && content.match(/^\(No (?:visible files|open tabs)\)$/)) { + // Remove the section from accumulated matches if it exists + accumulatedMatches.delete(section.heading) + // Remove the section from current content + processedContent = processedContent.replace(match[0], "") + } else { + // Found a match - accumulate it + accumulatedMatches.set(section.heading, match[0]) + console.log(`- Added to accumulated matches: ${section.heading}\n ${match[0]}`) + + if (!isLastMessage) { + processedContent = processedContent.replace(regex, (match, content) => + section.handle(section.heading, content, false), + ) + console.log("- Processed for non-last message") + } + } + } else if (isLastMessage && accumulatedMatches.has(section.heading)) { + // No match in last message but we have accumulated content - append it + // For VSCode sections, only append if the last known state wasn't empty + const lastContent = accumulatedMatches.get(section.heading) + if ( + lastContent && + (!section.heading.includes("VSCode") || !lastContent.match(/\(No (?:visible files|open tabs)\)/)) + ) { + processedContent += lastContent + } + } + } + + // Clean up newlines and reconstruct with regex + processedContent = processedContent.replace(/\n{2,}/g, "\n") + const result = beforeEnv + envStart + processedContent.trim() + envEnd + afterEnv + + if (isLastMessage) { + console.log("Last Message Environment Details After:", result) + console.log("Accumulated Matches (last):", Object.fromEntries(accumulatedMatches)) + } + + return result +} + +export function compressEnvironmentDetails(messages: Anthropic.Messages.MessageParam[]): void { + const lastEnvIndex = findLastEnvironmentMessage(messages) + if (lastEnvIndex === -1) { + console.warn("No were found in any message") + return + } + + const accumulatedMatches = new Map() + + for (let i = 0; i < messages.length; i++) { + if (!Array.isArray(messages[i].content) || messages[i].role !== "user") { + continue + } + + for (let j = 0; j < messages[i].content.length; j++) { + const content = messages[i].content[j] + if (typeof content === "string" || content.type !== "text") { + continue + } + + content.text = compressEnvironmentSection(content.text, i === lastEnvIndex, accumulatedMatches) + } + } +} + +/** + * Compresses conversation history by replacing duplicate content with references + * and converting tool operations to more compact formats. Modifies messages in place. + */ +/** + * Serializes message objects to pretty-printed JSON for debugging + */ +function serializeMessages(messages: Anthropic.Messages.MessageParam[]): string { + return JSON.stringify(messages, null, 2) +} + +/** + * Compresses conversation history and returns the before/after message structures + */ +export function compressConversationHistory( + messages: Anthropic.Messages.MessageParam[], + taskId: string, + beforeName?: string, + afterName?: string, +): { before: string; after: string } { + // Serialize messages BEFORE compression + const beforeJson = serializeMessages(messages) + + // Compress task resumption messages in place + compressTaskResumption(messages) + + // Compress environment details in place + compressEnvironmentDetails(messages) + + // Serialize messages AFTER compression + const afterJson = serializeMessages(messages) + + if (beforeName && afterName) { + // beforeName and afterName are absolute paths + const beforeDir = path.dirname(beforeName) + const afterDir = path.dirname(afterName) + + // Create directories if they don't exist + fs.mkdir(beforeDir, { recursive: true }) + .then(() => fs.mkdir(afterDir, { recursive: true })) + .then(() => { + fs.writeFile(beforeName, beforeJson) + fs.writeFile(afterName, afterJson) + + console.log(`Compression debug files: +Before: ${beforeName} +After: ${afterName}`) + }) + .catch((err) => console.error("Error writing debug files:", err)) + } + + return { + before: beforeJson, + after: afterJson, + } +} diff --git a/src/core/sliding-window/index.ts b/src/core/sliding-window/index.ts index 67c0028fab2..dabfbe9b118 100644 --- a/src/core/sliding-window/index.ts +++ b/src/core/sliding-window/index.ts @@ -95,6 +95,33 @@ export async function truncateConversationIfNeeded({ // Truncate if we're within TOKEN_BUFFER_PERCENTAGE of the context window const allowedTokens = contextWindow * (1 - TOKEN_BUFFER_PERCENTAGE) - reservedTokens + // Apply compression to reduce token usage while preserving information + // Generate filenames with timestamp + const timestamp = Date.now() + const dateStr = new Date(timestamp) + .toLocaleString("en-US", { + month: "short", + day: "2-digit", + year: "numeric", + hour: "2-digit", + minute: "2-digit", + second: "2-digit", + hour12: false, + }) + .toLowerCase() + .replace(/[,:]/g, "-") + .replace(/\s+/g, "-") + + const debugDir = path.join(this.providerRef.deref()?.context.extensionUri.fsPath || "", "debug") + const beforeName = path.join(debugDir, `conversation_before_compression_${timestamp}_${dateStr}.json`) + const afterName = path.join(debugDir, `conversation_after_compression_${timestamp}_${dateStr}.json`) + + await compressConversationHistory(messages, this.taskId, beforeName, afterName) + // Determine if truncation is needed and apply if necessary - return effectiveTokens > allowedTokens ? truncateConversation(messages, 0.5) : messages + if (effectiveTokens > allowedTokens) { + messages = truncateConversation(messages, 0.5) + } + + return messages }