diff --git a/packages/types/src/experiment.ts b/packages/types/src/experiment.ts index 10384db8ed..b455a170b7 100644 --- a/packages/types/src/experiment.ts +++ b/packages/types/src/experiment.ts @@ -6,7 +6,7 @@ import type { Keys, Equals, AssertEqual } from "./type-fu.js" * ExperimentId */ -export const experimentIds = ["powerSteering", "multiFileApplyDiff"] as const +export const experimentIds = ["powerSteering", "multiFileApplyDiff", "readFileDeduplication"] as const export const experimentIdsSchema = z.enum(experimentIds) @@ -19,6 +19,7 @@ export type ExperimentId = z.infer export const experimentsSchema = z.object({ powerSteering: z.boolean().optional(), multiFileApplyDiff: z.boolean().optional(), + readFileDeduplication: z.boolean().optional(), }) export type Experiments = z.infer diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index fe8fd0f68f..57d639b7cd 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -340,6 +340,140 @@ export class Task extends EventEmitter { await this.saveApiConversationHistory() } + public async deduplicateReadFileHistory(): Promise { + // Check if the experimental feature is enabled + const state = await this.providerRef.deref()?.getState() + if (!state?.experiments || !experiments.isEnabled(state.experiments, EXPERIMENT_IDS.READ_FILE_DEDUPLICATION)) { + return + } + + // Track files we've seen and their most recent location + const fileLastSeen = new Map() + const blocksToRemove = new Map>() // messageIndex -> Set of blockIndices to remove + + // Iterate through messages in reverse order (newest first) + for (let i = this.apiConversationHistory.length - 1; i >= 0; i--) { + const message = this.apiConversationHistory[i] + + // Only process user messages + if (message.role !== "user") continue + + const content = Array.isArray(message.content) ? message.content : [{ type: "text", text: message.content }] + + // Track blocks to remove within this message + const blockIndicesToRemove = new Set() + + // Iterate through blocks in reverse order within the message + for (let j = content.length - 1; j >= 0; j--) { + const block = content[j] + if (block.type !== "text") continue + + const text = block.text + + // Check if this is a read_file result + if (!text.startsWith("[read_file") || !text.includes("Result:")) continue + + // Extract file paths from the result + const filePaths = this.extractFilePathsFromReadResult(text) + + // For each file path, check if we've seen it before + for (const filePath of filePaths) { + const lastSeen = fileLastSeen.get(filePath) + if (lastSeen) { + // We've seen this file before + if (lastSeen.messageIndex === i) { + // It's in the same message, mark the older block for removal + blockIndicesToRemove.add(j) + } else { + // It's in a different message, mark this specific block for removal + if (!blocksToRemove.has(i)) { + blocksToRemove.set(i, new Set()) + } + blocksToRemove.get(i)!.add(j) + } + } else { + // First time seeing this file (going backwards), record it + fileLastSeen.set(filePath, { messageIndex: i, blockIndex: j }) + } + } + } + + // If we have blocks to remove from this message, add them to the map + if (blockIndicesToRemove.size > 0) { + if (!blocksToRemove.has(i)) { + blocksToRemove.set(i, new Set()) + } + blockIndicesToRemove.forEach((idx) => blocksToRemove.get(i)!.add(idx)) + } + } + + // Apply the removals + if (blocksToRemove.size > 0) { + let modified = false + + // Create a new conversation history with duplicates removed + this.apiConversationHistory = this.apiConversationHistory + .map((message, messageIndex) => { + const blocksToRemoveForMessage = blocksToRemove.get(messageIndex) + if (!blocksToRemoveForMessage || blocksToRemoveForMessage.size === 0) { + return message + } + + // This message has blocks to remove + const content = Array.isArray(message.content) + ? message.content + : [{ type: "text", text: message.content }] + + // Check if this is a string content (legacy format) + if (!Array.isArray(message.content)) { + // For string content, we can only remove the entire message if it's a duplicate + if (blocksToRemoveForMessage.has(0)) { + modified = true + return null + } + return message + } + + const newContent = content.filter((_, blockIndex) => !blocksToRemoveForMessage.has(blockIndex)) + + // If all content was removed, filter out this message entirely + if (newContent.length === 0) { + modified = true + return null + } + + modified = true + return { ...message, content: newContent } + }) + .filter((message) => message !== null) as ApiMessage[] + + if (modified) { + await this.saveApiConversationHistory() + } + } + } + + private extractFilePathsFromReadResult(text: string): string[] { + const paths: string[] = [] + + // Match file paths in the XML structure + // Handles both single file and multi-file formats + const filePathRegex = /\s*([^<]+)<\/path>/g + let match + + while ((match = filePathRegex.exec(text)) !== null) { + paths.push(match[1].trim()) + } + + // Also handle legacy format where path might be in the header + const headerMatch = text.match(/\[read_file for '([^']+)'\]/) + if (headerMatch && paths.length === 0) { + paths.push(headerMatch[1]) + } + + return paths + } + private async saveApiConversationHistory() { try { await saveApiMessages({ @@ -1254,6 +1388,9 @@ export class Task extends EventEmitter { await this.addToApiConversationHistory({ role: "user", content: finalUserContent }) TelemetryService.instance.captureConversationMessage(this.taskId, "user") + // Deduplicate read_file history after adding new content + await this.deduplicateReadFileHistory() + // Since we sent off a placeholder api_req_started message to update the // webview while waiting to actually start the API request (to load // potential details for example), we need to update the text of that diff --git a/src/core/task/__tests__/Task.spec.ts b/src/core/task/__tests__/Task.spec.ts index 9aa5a8d7a8..130e9a623a 100644 --- a/src/core/task/__tests__/Task.spec.ts +++ b/src/core/task/__tests__/Task.spec.ts @@ -1493,5 +1493,523 @@ describe("Cline", () => { expect(noModelTask.apiConfiguration.apiProvider).toBe("openai") }) }) + + describe("deduplicateReadFileHistory", () => { + let mockProvider: any + let mockApiConfig: any + + beforeEach(() => { + vi.clearAllMocks() + + mockApiConfig = { + apiProvider: "anthropic", + apiKey: "test-key", + } + + mockProvider = { + context: { + globalStorageUri: { fsPath: "/test/storage" }, + }, + getState: vi.fn(), + postStateToWebview: vi.fn().mockResolvedValue(undefined), + postMessageToWebview: vi.fn().mockResolvedValue(undefined), + updateTaskHistory: vi.fn().mockResolvedValue(undefined), + } + }) + + it("should not deduplicate when experiment is disabled", async () => { + mockProvider.getState.mockResolvedValue({ + experiments: { + [EXPERIMENT_IDS.READ_FILE_DEDUPLICATION]: false, + }, + }) + + const task = new Task({ + provider: mockProvider, + apiConfiguration: mockApiConfig, + task: "test task", + startTask: false, + }) + + // Add duplicate read_file results + task.apiConversationHistory = [ + { + role: "user", + content: [ + { + type: "text", + text: "[read_file for 'test.ts'] Result:\ntest.tscontent1", + }, + ], + ts: Date.now(), + }, + { + role: "user", + content: [ + { + type: "text", + text: "[read_file for 'test.ts'] Result:\ntest.tscontent2", + }, + ], + ts: Date.now(), + }, + ] + + await task.deduplicateReadFileHistory() + + // Should not remove any messages when experiment is disabled + expect(task.apiConversationHistory.length).toBe(2) + }) + + it("should deduplicate single file reads keeping the most recent", async () => { + mockProvider.getState.mockResolvedValue({ + experiments: { + [EXPERIMENT_IDS.READ_FILE_DEDUPLICATION]: true, + }, + }) + + const task = new Task({ + provider: mockProvider, + apiConfiguration: mockApiConfig, + task: "test task", + startTask: false, + }) + + // Add duplicate read_file results + task.apiConversationHistory = [ + { + role: "user", + content: [ + { + type: "text", + text: "[read_file for 'test.ts'] Result:\ntest.tsold content", + }, + ], + ts: Date.now() - 10000, + }, + { + role: "assistant", + content: [{ type: "text", text: "I see the file content." }], + ts: Date.now() - 9000, + }, + { + role: "user", + content: [ + { + type: "text", + text: "[read_file for 'test.ts'] Result:\ntest.tsnew content", + }, + ], + ts: Date.now() - 5000, + }, + ] + + await task.deduplicateReadFileHistory() + + // Should remove the older duplicate + expect(task.apiConversationHistory.length).toBe(2) + expect(task.apiConversationHistory[0].role).toBe("assistant") + const content = task.apiConversationHistory[1].content + if (Array.isArray(content)) { + expect((content[0] as any).text).toContain("new content") + } + }) + + it("should handle multi-file reads correctly", async () => { + mockProvider.getState.mockResolvedValue({ + experiments: { + [EXPERIMENT_IDS.READ_FILE_DEDUPLICATION]: true, + }, + }) + + const task = new Task({ + provider: mockProvider, + apiConfiguration: mockApiConfig, + task: "test task", + startTask: false, + }) + + // Add multi-file read results + task.apiConversationHistory = [ + { + role: "user", + content: [ + { + type: "text", + text: "[read_file for 'file1.ts', 'file2.ts'] Result:\nfile1.tscontent1file2.tscontent2", + }, + ], + ts: Date.now() - 10000, + }, + { + role: "user", + content: [ + { + type: "text", + text: "[read_file for 'file1.ts'] Result:\nfile1.tsnew content1", + }, + ], + ts: Date.now() - 5000, + }, + ] + + await task.deduplicateReadFileHistory() + + // Should remove the older multi-file read that contains file1.ts + expect(task.apiConversationHistory.length).toBe(1) + const content = task.apiConversationHistory[0].content + if (Array.isArray(content)) { + expect((content[0] as any).text).toContain("new content1") + } + }) + + it("should handle legacy read_file format", async () => { + mockProvider.getState.mockResolvedValue({ + experiments: { + [EXPERIMENT_IDS.READ_FILE_DEDUPLICATION]: true, + }, + }) + + const task = new Task({ + provider: mockProvider, + apiConfiguration: mockApiConfig, + task: "test task", + startTask: false, + }) + + // Add legacy format read_file results + task.apiConversationHistory = [ + { + role: "user", + content: [ + { + type: "text", + text: "[read_file for 'legacy.ts'] Result:\nFile content without XML wrapper", + }, + ], + ts: Date.now() - 10000, + }, + { + role: "user", + content: [ + { + type: "text", + text: "[read_file for 'legacy.ts'] Result:\nlegacy.tsnew content", + }, + ], + ts: Date.now() - 5000, + }, + ] + + await task.deduplicateReadFileHistory() + + // Should remove the older legacy format + expect(task.apiConversationHistory.length).toBe(1) + const content = task.apiConversationHistory[0].content + if (Array.isArray(content)) { + expect((content[0] as any).text).toContain("new content") + } + }) + + it("should preserve non-read_file messages", async () => { + mockProvider.getState.mockResolvedValue({ + experiments: { + [EXPERIMENT_IDS.READ_FILE_DEDUPLICATION]: true, + }, + }) + + const task = new Task({ + provider: mockProvider, + apiConfiguration: mockApiConfig, + task: "test task", + startTask: false, + }) + + // Add mixed messages + task.apiConversationHistory = [ + { + role: "user", + content: [{ type: "text", text: "Regular user message" }], + ts: Date.now() - 10000, + }, + { + role: "user", + content: [ + { + type: "text", + text: "[read_file for 'test.ts'] Result:\ntest.ts", + }, + { type: "image", source: { type: "base64", media_type: "image/png", data: "base64data" } }, + ], + ts: Date.now() - 9000, + }, + { + role: "assistant", + content: [{ type: "text", text: "Assistant response" }], + ts: Date.now() - 8000, + }, + { + role: "user", + content: [ + { + type: "text", + text: "[read_file for 'test.ts'] Result:\ntest.ts", + }, + ], + ts: Date.now() - 7000, + }, + ] + + await task.deduplicateReadFileHistory() + + // Should preserve all messages but remove only the older duplicate read_file block + expect(task.apiConversationHistory.length).toBe(4) + + // First message should be unchanged + const content0 = task.apiConversationHistory[0].content + if (Array.isArray(content0)) { + expect((content0[0] as any).text).toBe("Regular user message") + } + + // Second message should only have the image block (read_file removed) + const content1 = task.apiConversationHistory[1].content + if (Array.isArray(content1)) { + expect(content1.length).toBe(1) + expect((content1[0] as any).type).toBe("image") + } + + // Third message (assistant) should be unchanged + expect(task.apiConversationHistory[2].role).toBe("assistant") + + // Fourth message should have the most recent read_file + const content3 = task.apiConversationHistory[3].content + if (Array.isArray(content3)) { + expect((content3[0] as any).text).toContain("[read_file for 'test.ts']") + } + }) + + it("should handle multiple different files without deduplication", async () => { + mockProvider.getState.mockResolvedValue({ + experiments: { + [EXPERIMENT_IDS.READ_FILE_DEDUPLICATION]: true, + }, + }) + + const task = new Task({ + provider: mockProvider, + apiConfiguration: mockApiConfig, + task: "test task", + startTask: false, + }) + + // Add different file reads + task.apiConversationHistory = [ + { + role: "user", + content: [ + { + type: "text", + text: "[read_file for 'file1.ts'] Result:\nfile1.tscontent1", + }, + ], + ts: Date.now() - 10000, + }, + { + role: "user", + content: [ + { + type: "text", + text: "[read_file for 'file2.ts'] Result:\nfile2.tscontent2", + }, + ], + ts: Date.now() - 5000, + }, + { + role: "user", + content: [ + { + type: "text", + text: "[read_file for 'file3.ts'] Result:\nfile3.tscontent3", + }, + ], + ts: Date.now() - 1000, + }, + ] + + await task.deduplicateReadFileHistory() + + // Should not remove any messages as they are all different files + expect(task.apiConversationHistory.length).toBe(3) + }) + + it("should handle empty conversation history", async () => { + mockProvider.getState.mockResolvedValue({ + experiments: { + [EXPERIMENT_IDS.READ_FILE_DEDUPLICATION]: true, + }, + }) + + const task = new Task({ + provider: mockProvider, + apiConfiguration: mockApiConfig, + task: "test task", + startTask: false, + }) + + task.apiConversationHistory = [] + + await task.deduplicateReadFileHistory() + + // Should not throw and history should remain empty + expect(task.apiConversationHistory.length).toBe(0) + }) + + it("should handle messages with string content", async () => { + mockProvider.getState.mockResolvedValue({ + experiments: { + [EXPERIMENT_IDS.READ_FILE_DEDUPLICATION]: true, + }, + }) + + const task = new Task({ + provider: mockProvider, + apiConfiguration: mockApiConfig, + task: "test task", + startTask: false, + }) + + // Add messages with string content (legacy format) + task.apiConversationHistory = [ + { + role: "user", + content: + "[read_file for 'test.ts'] Result:\ntest.tsold", + ts: Date.now() - 10000, + }, + { + role: "user", + content: + "[read_file for 'test.ts'] Result:\ntest.tsnew", + ts: Date.now() - 5000, + }, + ] + + await task.deduplicateReadFileHistory() + + // Should handle string content and remove the older duplicate + expect(task.apiConversationHistory.length).toBe(1) + expect(task.apiConversationHistory[0].content).toContain("new") + }) + + it("should handle multiple read_file results in the same message", async () => { + mockProvider.getState.mockResolvedValue({ + experiments: { + [EXPERIMENT_IDS.READ_FILE_DEDUPLICATION]: true, + }, + }) + + const task = new Task({ + provider: mockProvider, + apiConfiguration: mockApiConfig, + task: "test task", + startTask: false, + }) + + // Add a message with multiple read_file results for the same file + task.apiConversationHistory = [ + { + role: "user", + content: [ + { + type: "text", + text: "[read_file for 'test.ts'] Result:\ntest.tscontent1", + }, + { + type: "text", + text: "Some other content", + }, + { + type: "text", + text: "[read_file for 'test.ts'] Result:\ntest.tscontent2", + }, + { + type: "text", + text: "[read_file for 'test.ts'] Result:\ntest.tscontent3", + }, + ], + ts: Date.now(), + }, + ] + + await task.deduplicateReadFileHistory() + + // Should keep only the most recent read (last one) and the non-read_file content + expect(task.apiConversationHistory.length).toBe(1) + const content = task.apiConversationHistory[0].content + if (Array.isArray(content)) { + expect(content.length).toBe(2) // "Some other content" and the last read_file + expect((content[0] as any).text).toBe("Some other content") + expect((content[1] as any).text).toContain("content3") + } + }) + + it("should handle duplicates across different messages and within same message", async () => { + mockProvider.getState.mockResolvedValue({ + experiments: { + [EXPERIMENT_IDS.READ_FILE_DEDUPLICATION]: true, + }, + }) + + const task = new Task({ + provider: mockProvider, + apiConfiguration: mockApiConfig, + task: "test task", + startTask: false, + }) + + // Add messages with duplicates both across messages and within a message + task.apiConversationHistory = [ + { + role: "user", + content: [ + { + type: "text", + text: "[read_file for 'file1.ts'] Result:\nfile1.tsold1", + }, + ], + ts: Date.now() - 10000, + }, + { + role: "user", + content: [ + { + type: "text", + text: "[read_file for 'file1.ts'] Result:\nfile1.tsold2", + }, + { + type: "text", + text: "[read_file for 'file2.ts'] Result:\nfile2.tscontent2", + }, + { + type: "text", + text: "[read_file for 'file1.ts'] Result:\nfile1.tsnewest", + }, + ], + ts: Date.now() - 5000, + }, + ] + + await task.deduplicateReadFileHistory() + + // Should remove the first message entirely and keep only the last read of file1.ts in the second message + expect(task.apiConversationHistory.length).toBe(1) + const content = task.apiConversationHistory[0].content + if (Array.isArray(content)) { + expect(content.length).toBe(2) // file2.ts and the newest file1.ts + expect((content[0] as any).text).toContain("file2.ts") + expect((content[1] as any).text).toContain("newest") + } + }) + }) }) }) diff --git a/src/shared/__tests__/experiments.spec.ts b/src/shared/__tests__/experiments.spec.ts index 4a8f06d62a..cbd0d1d730 100644 --- a/src/shared/__tests__/experiments.spec.ts +++ b/src/shared/__tests__/experiments.spec.ts @@ -23,11 +23,21 @@ describe("experiments", () => { }) }) + describe("READ_FILE_DEDUPLICATION", () => { + it("is configured correctly", () => { + expect(EXPERIMENT_IDS.READ_FILE_DEDUPLICATION).toBe("readFileDeduplication") + expect(experimentConfigsMap.READ_FILE_DEDUPLICATION).toMatchObject({ + enabled: false, + }) + }) + }) + describe("isEnabled", () => { it("returns false when POWER_STEERING experiment is not enabled", () => { const experiments: Record = { powerSteering: false, multiFileApplyDiff: false, + readFileDeduplication: false, } expect(Experiments.isEnabled(experiments, EXPERIMENT_IDS.POWER_STEERING)).toBe(false) }) @@ -36,6 +46,7 @@ describe("experiments", () => { const experiments: Record = { powerSteering: true, multiFileApplyDiff: false, + readFileDeduplication: false, } expect(Experiments.isEnabled(experiments, EXPERIMENT_IDS.POWER_STEERING)).toBe(true) }) @@ -44,8 +55,27 @@ describe("experiments", () => { const experiments: Record = { powerSteering: false, multiFileApplyDiff: false, + readFileDeduplication: false, } expect(Experiments.isEnabled(experiments, EXPERIMENT_IDS.POWER_STEERING)).toBe(false) }) + + it("returns false when READ_FILE_DEDUPLICATION experiment is not enabled", () => { + const experiments: Record = { + powerSteering: false, + multiFileApplyDiff: false, + readFileDeduplication: false, + } + expect(Experiments.isEnabled(experiments, EXPERIMENT_IDS.READ_FILE_DEDUPLICATION)).toBe(false) + }) + + it("returns true when experiment READ_FILE_DEDUPLICATION is enabled", () => { + const experiments: Record = { + powerSteering: false, + multiFileApplyDiff: false, + readFileDeduplication: true, + } + expect(Experiments.isEnabled(experiments, EXPERIMENT_IDS.READ_FILE_DEDUPLICATION)).toBe(true) + }) }) }) diff --git a/src/shared/experiments.ts b/src/shared/experiments.ts index 1edadf654f..f5f490c228 100644 --- a/src/shared/experiments.ts +++ b/src/shared/experiments.ts @@ -3,6 +3,7 @@ import type { AssertEqual, Equals, Keys, Values, ExperimentId, Experiments } fro export const EXPERIMENT_IDS = { MULTI_FILE_APPLY_DIFF: "multiFileApplyDiff", POWER_STEERING: "powerSteering", + READ_FILE_DEDUPLICATION: "readFileDeduplication", } as const satisfies Record type _AssertExperimentIds = AssertEqual>> @@ -16,6 +17,7 @@ interface ExperimentConfig { export const experimentConfigsMap: Record = { MULTI_FILE_APPLY_DIFF: { enabled: false }, POWER_STEERING: { enabled: false }, + READ_FILE_DEDUPLICATION: { enabled: false }, } export const experimentDefault = Object.fromEntries( diff --git a/webview-ui/src/context/__tests__/ExtensionStateContext.spec.tsx b/webview-ui/src/context/__tests__/ExtensionStateContext.spec.tsx index 1e5867d3fc..abe1725003 100644 --- a/webview-ui/src/context/__tests__/ExtensionStateContext.spec.tsx +++ b/webview-ui/src/context/__tests__/ExtensionStateContext.spec.tsx @@ -226,6 +226,7 @@ describe("mergeExtensionState", () => { disableCompletionCommand: false, concurrentFileReads: true, multiFileApplyDiff: true, + readFileDeduplication: false, } as Record, }