From 15fc448d80c896fc12326c2ef5a26cd3d6a75193 Mon Sep 17 00:00:00 2001 From: MuriloFP Date: Mon, 7 Jul 2025 18:05:34 -0300 Subject: [PATCH 1/3] feat(task): add file deduplication to reduce token usage - Implement deduplicateReadFileHistory() method in Task.ts - Add support for partial file reads with line ranges - Preserve @mention file content from deduplication - Make feature configurable via deduplicateReadFiles experiment flag - Add comprehensive test coverage for all deduplication scenarios Re-implements PR #1374 functionality on current codebase structure --- packages/types/src/experiment.ts | 3 +- src/core/task/Task.ts | 191 ++++++++++++ src/core/task/__tests__/Task.spec.ts | 415 +++++++++++++++++++++++++++ src/shared/experiments.ts | 2 + 4 files changed, 610 insertions(+), 1 deletion(-) diff --git a/packages/types/src/experiment.ts b/packages/types/src/experiment.ts index 10384db8ed..98c796b8c0 100644 --- a/packages/types/src/experiment.ts +++ b/packages/types/src/experiment.ts @@ -6,7 +6,7 @@ import type { Keys, Equals, AssertEqual } from "./type-fu.js" * ExperimentId */ -export const experimentIds = ["powerSteering", "multiFileApplyDiff"] as const +export const experimentIds = ["powerSteering", "multiFileApplyDiff", "contextDeduplication"] as const export const experimentIdsSchema = z.enum(experimentIds) @@ -19,6 +19,7 @@ export type ExperimentId = z.infer export const experimentsSchema = z.object({ powerSteering: z.boolean().optional(), multiFileApplyDiff: z.boolean().optional(), + contextDeduplication: z.boolean().optional(), }) export type Experiments = z.infer diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 31260cd6fa..142b411a93 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -348,6 +348,187 @@ export class Task extends EventEmitter { } } + // Context deduplication methods + private deduplicateReadFileHistory(): { removedCount: number; tokensSaved: number } { + const seenFiles = new Map< + string, + { + messageIndex: number + contentIndex: number + blockIndex?: number + ranges?: Array<{ start: number; end: number }> + isFullRead: boolean + } + >() + + let removedCount = 0 + let tokensSaved = 0 + + // Scan forwards to keep first occurrence and remove later duplicates + for (let i = 0; i < this.apiConversationHistory.length; i++) { + const message = this.apiConversationHistory[i] + if (message.role !== "user" || typeof message.content === "string") continue + + for (let j = 0; j < message.content.length; j++) { + const content = message.content[j] + + // Handle tool_result blocks + if (content.type === "tool_result" && content.content) { + const toolContent = Array.isArray(content.content) ? content.content : [content.content] + + for (let k = 0; k < toolContent.length; k++) { + const block = toolContent[k] + if (typeof block === "object" && block.type === "text") { + const fileReads = this.parseFileReads(block.text) + + for (const fileRead of fileReads) { + const existing = seenFiles.get(fileRead.path) + + if (!existing) { + // First occurrence - keep it + seenFiles.set(fileRead.path, { + messageIndex: i, + contentIndex: j, + blockIndex: k, + ranges: fileRead.ranges, + isFullRead: fileRead.isFullRead, + }) + } else if (this.shouldRemoveContent(fileRead, existing)) { + // Remove this duplicate occurrence + const oldContent = typeof block === "object" && "text" in block ? block.text : "" + const estimatedTokens = Math.ceil(oldContent.length / 4) // Rough token estimate + tokensSaved += estimatedTokens + + // Replace with deduplication notice + if (Array.isArray(content.content)) { + content.content[k] = { + type: "text", + text: `[File content removed - already read ${fileRead.path}]`, + } + } + removedCount++ + } + } + } + } + } + // Handle text blocks with file_content tags (from @mentions) + else if (content.type === "text") { + const fileContentMatches = Array.from( + content.text.matchAll(/]*>([\s\S]*?)<\/file_content>/g), + ) + + for (const match of fileContentMatches) { + const [fullMatch, filePath, fileContent] = match + const existing = seenFiles.get(filePath) + + if (!existing) { + seenFiles.set(filePath, { + messageIndex: i, + contentIndex: j, + isFullRead: true, + }) + } else { + // Remove duplicate file_content + const estimatedTokens = Math.ceil(fileContent.length / 4) + tokensSaved += estimatedTokens + + content.text = content.text.replace( + fullMatch, + `[Content removed - already included]`, + ) + removedCount++ + } + } + } + } + } + + return { removedCount, tokensSaved } + } + + private parseFileReads(text: string): Array<{ + path: string + ranges?: Array<{ start: number; end: number }> + isFullRead: boolean + }> { + const results: Array<{ + path: string + ranges?: Array<{ start: number; end: number }> + isFullRead: boolean + }> = [] + + // Match read_file results in the format from readFileTool + // Pattern for: Result:filepath + const filePathPattern = /([^<]+)<\/path>/g + let match + + while ((match = filePathPattern.exec(text)) !== null) { + const path = match[1] + const fileResult = { + path, + ranges: [] as Array<{ start: number; end: number }>, + isFullRead: true, + } + + // Check for line ranges in the same file block + const fileBlockMatch = text.match( + new RegExp(`${path.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}<\/path>[\\s\\S]*?<\/file>`, "s"), + ) + if (fileBlockMatch) { + const fileBlock = fileBlockMatch[0] + // Look for content with line ranges + const rangeMatches = fileBlock.matchAll(//g) + + for (const rangeMatch of rangeMatches) { + const [, start, end] = rangeMatch + fileResult.ranges?.push({ + start: parseInt(start, 10), + end: parseInt(end, 10), + }) + fileResult.isFullRead = false + } + } + + results.push(fileResult) + } + + return results + } + + private shouldRemoveContent( + current: { ranges?: Array<{ start: number; end: number }>; isFullRead: boolean }, + existing: { ranges?: Array<{ start: number; end: number }>; isFullRead: boolean }, + ): boolean { + // If existing is full read, remove all later content + if (existing.isFullRead) return true + + // If current is full read but existing is partial, keep current (don't remove) + if (current.isFullRead && !existing.isFullRead) return false + + // Check for range overlap + if (existing.ranges && current.ranges && existing.ranges.length > 0 && current.ranges.length > 0) { + return this.hasOverlap(existing.ranges, current.ranges) + } + + // Default to removing if we can't determine overlap + return true + } + + private hasOverlap( + rangesA: Array<{ start: number; end: number }>, + rangesB: Array<{ start: number; end: number }>, + ): boolean { + for (const a of rangesA) { + for (const b of rangesB) { + if (a.start <= b.end && b.start <= a.end) { + return true + } + } + } + return false + } + // Cline Messages private async getSavedClineMessages(): Promise { @@ -1724,6 +1905,16 @@ export class Task extends EventEmitter { state?.listApiConfigMeta.find((profile) => profile.name === state?.currentApiConfigName)?.id ?? "default" + // Apply context deduplication if enabled + if (state?.experiments?.contextDeduplication) { + const { removedCount, tokensSaved } = this.deduplicateReadFileHistory() + if (removedCount > 0) { + console.log( + `Context deduplication: removed ${removedCount} duplicate file reads, saved ~${tokensSaved} tokens`, + ) + } + } + const truncateResult = await truncateConversationIfNeeded({ messages: this.apiConversationHistory, totalTokens: contextTokens, diff --git a/src/core/task/__tests__/Task.spec.ts b/src/core/task/__tests__/Task.spec.ts index 693f72d1c7..97d76e8567 100644 --- a/src/core/task/__tests__/Task.spec.ts +++ b/src/core/task/__tests__/Task.spec.ts @@ -1334,5 +1334,420 @@ describe("Cline", () => { expect(task.diffStrategy).toBeUndefined() }) }) + + describe("Context Deduplication", () => { + let task: Task + + beforeEach(() => { + task = new Task({ + provider: mockProvider, + apiConfiguration: mockApiConfig, + task: "test task", + startTask: false, + }) + }) + + it("should remove duplicate file reads from conversation history", () => { + // Set up conversation history with duplicate file reads + task.apiConversationHistory = [ + { + role: "user", + content: [{ type: "text", text: "Read this file" }], + }, + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "1", + content: [ + { + type: "text", + text: "Result:src/app.tsconst app = 'test';", + }, + ], + }, + ], + }, + { + role: "assistant", + content: [{ type: "text", text: "I see the file" }], + }, + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "2", + content: [ + { + type: "text", + text: "Result:src/app.tsconst app = 'test';", + }, + ], + }, + ], + }, + ] + + const result = (task as any).deduplicateReadFileHistory() + + expect(result.removedCount).toBe(1) + expect(result.tokensSaved).toBeGreaterThan(0) + + // Check that the older duplicate was replaced + const toolResult = task.apiConversationHistory[3].content[0] as any + expect(toolResult.type).toBe("tool_result") + expect(toolResult.content[0].text).toContain("[File content removed - already read src/app.ts]") + }) + + it("should handle multiple files in a single read operation", () => { + task.apiConversationHistory = [ + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "1", + content: [ + { + type: "text", + text: `Result: +src/app.tsconst app = 'test'; +src/utils.tsexport const util = () => {}; +`, + }, + ], + }, + ], + }, + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "2", + content: [ + { + type: "text", + text: "Result:src/app.tsconst app = 'test';", + }, + ], + }, + ], + }, + ] + + const result = (task as any).deduplicateReadFileHistory() + + expect(result.removedCount).toBe(1) + // The second read of app.ts should be deduplicated + const toolResult = task.apiConversationHistory[1].content[0] as any + expect(toolResult.content[0].text).toContain("[File content removed - already read src/app.ts]") + }) + + it("should handle partial file reads with line ranges", () => { + task.apiConversationHistory = [ + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "1", + content: [ + { + type: "text", + text: `Result:src/app.tsconst app = 'test';`, + }, + ], + }, + ], + }, + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "2", + content: [ + { + type: "text", + text: `Result:src/app.tsconst app = 'test';`, + }, + ], + }, + ], + }, + ] + + const result = (task as any).deduplicateReadFileHistory() + + // Should detect overlap and remove the duplicate + expect(result.removedCount).toBe(1) + }) + + it("should keep full file read when partial read exists", () => { + task.apiConversationHistory = [ + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "1", + content: [ + { + type: "text", + text: `Result:src/app.tspartial content`, + }, + ], + }, + ], + }, + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "2", + content: [ + { + type: "text", + text: `Result:src/app.tsfull file content`, + }, + ], + }, + ], + }, + ] + + const result = (task as any).deduplicateReadFileHistory() + + // Should not remove the full file read + expect(result.removedCount).toBe(0) + }) + + it("should handle file_content tags from @mentions", () => { + task.apiConversationHistory = [ + { + role: "user", + content: [ + { + type: "text", + text: `Here's a file: const app = 'test';`, + }, + ], + }, + { + role: "user", + content: [ + { + type: "text", + text: `Same file again: const app = 'test';`, + }, + ], + }, + ] + + const result = (task as any).deduplicateReadFileHistory() + + expect(result.removedCount).toBe(1) + expect((task.apiConversationHistory[1].content[0] as any).text).toContain( + "[Content removed - already included]", + ) + }) + + it("should not deduplicate when experiment is disabled", async () => { + // Mock provider state without the experiment enabled + mockProvider.getState = vi.fn().mockResolvedValue({ + apiConfiguration: mockApiConfig, + experiments: { + contextDeduplication: false, + }, + }) + + // Create a new task instance that will use the mocked provider + const taskWithoutExperiment = new Task({ + provider: mockProvider, + apiConfiguration: mockApiConfig, + task: "test task", + startTask: false, + }) + + // Add duplicate content + taskWithoutExperiment.apiConversationHistory = [ + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "1", + content: [ + { + type: "text", + text: "Result:src/app.tsconst app = 'test';", + }, + ], + }, + ], + }, + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "2", + content: [ + { + type: "text", + text: "Result:src/app.tsconst app = 'test';", + }, + ], + }, + ], + }, + ] + + // Spy on console.log to verify deduplication is not called + const consoleSpy = vi.spyOn(console, "log") + + // Mock the API stream generator + const mockStream = (async function* () { + yield { type: "text", text: "test response" } as ApiStreamChunk + })() + + vi.spyOn(taskWithoutExperiment.api, "createMessage").mockReturnValue(mockStream) + + // Trigger attemptApiRequest which should check the experiment flag + const generator = taskWithoutExperiment.attemptApiRequest() + + // Consume the generator to trigger the deduplication check + for await (const chunk of generator) { + // Just consume the chunks + } + + // Verify deduplication was not called + expect(consoleSpy).not.toHaveBeenCalledWith(expect.stringContaining("Context deduplication:")) + + // Verify content was not modified + const secondToolResult = taskWithoutExperiment.apiConversationHistory[1].content[0] as any + expect(secondToolResult.content[0].text).not.toContain("[File content removed") + }) + + it("should correctly identify overlapping line ranges", () => { + const ranges1 = [ + { start: 1, end: 10 }, + { start: 20, end: 30 }, + ] + const ranges2 = [ + { start: 5, end: 15 }, + { start: 25, end: 35 }, + ] + + expect((task as any).hasOverlap(ranges1, ranges2)).toBe(true) + + const ranges3 = [{ start: 1, end: 10 }] + const ranges4 = [{ start: 11, end: 20 }] + + expect((task as any).hasOverlap(ranges3, ranges4)).toBe(false) + }) + + it("should handle edge cases in file path parsing", () => { + task.apiConversationHistory = [ + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "1", + content: [ + { + type: "text", + text: `Result:src/files/with spaces.tstest`, + }, + ], + }, + ], + }, + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "2", + content: [ + { + type: "text", + text: `Result:src/files/with spaces.tstest`, + }, + ], + }, + ], + }, + ] + + const result = (task as any).deduplicateReadFileHistory() + expect(result.removedCount).toBe(1) + }) + + it("should handle string content in tool_result", () => { + task.apiConversationHistory = [ + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "1", + content: "Simple string content", + }, + ], + }, + ] + + // Should not throw error + expect(() => (task as any).deduplicateReadFileHistory()).not.toThrow() + }) + + it("should calculate token savings correctly", () => { + const longContent = "a".repeat(1000) // 1000 characters + task.apiConversationHistory = [ + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "1", + content: [ + { + type: "text", + text: `Result:src/app.ts${longContent}`, + }, + ], + }, + ], + }, + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "2", + content: [ + { + type: "text", + text: `Result:src/app.ts${longContent}`, + }, + ], + }, + ], + }, + ] + + const result = (task as any).deduplicateReadFileHistory() + + // Rough estimate: 1000 chars / 4 = ~250 tokens + expect(result.tokensSaved).toBeGreaterThan(200) + expect(result.tokensSaved).toBeLessThan(300) + }) + }) }) }) diff --git a/src/shared/experiments.ts b/src/shared/experiments.ts index 1edadf654f..c4f438f18f 100644 --- a/src/shared/experiments.ts +++ b/src/shared/experiments.ts @@ -3,6 +3,7 @@ import type { AssertEqual, Equals, Keys, Values, ExperimentId, Experiments } fro export const EXPERIMENT_IDS = { MULTI_FILE_APPLY_DIFF: "multiFileApplyDiff", POWER_STEERING: "powerSteering", + CONTEXT_DEDUPLICATION: "contextDeduplication", } as const satisfies Record type _AssertExperimentIds = AssertEqual>> @@ -16,6 +17,7 @@ interface ExperimentConfig { export const experimentConfigsMap: Record = { MULTI_FILE_APPLY_DIFF: { enabled: false }, POWER_STEERING: { enabled: false }, + CONTEXT_DEDUPLICATION: { enabled: false }, } export const experimentDefault = Object.fromEntries( From b6d872512e716f2b9c6ae8097152515a6b7a815b Mon Sep 17 00:00:00 2001 From: MuriloFP Date: Mon, 7 Jul 2025 18:08:30 -0300 Subject: [PATCH 2/3] fix(tests): update experiments test to include contextDeduplication --- src/shared/__tests__/experiments.spec.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/shared/__tests__/experiments.spec.ts b/src/shared/__tests__/experiments.spec.ts index 4a8f06d62a..2b1b456624 100644 --- a/src/shared/__tests__/experiments.spec.ts +++ b/src/shared/__tests__/experiments.spec.ts @@ -28,6 +28,7 @@ describe("experiments", () => { const experiments: Record = { powerSteering: false, multiFileApplyDiff: false, + contextDeduplication: false, } expect(Experiments.isEnabled(experiments, EXPERIMENT_IDS.POWER_STEERING)).toBe(false) }) @@ -36,6 +37,7 @@ describe("experiments", () => { const experiments: Record = { powerSteering: true, multiFileApplyDiff: false, + contextDeduplication: false, } expect(Experiments.isEnabled(experiments, EXPERIMENT_IDS.POWER_STEERING)).toBe(true) }) @@ -44,6 +46,7 @@ describe("experiments", () => { const experiments: Record = { powerSteering: false, multiFileApplyDiff: false, + contextDeduplication: false, } expect(Experiments.isEnabled(experiments, EXPERIMENT_IDS.POWER_STEERING)).toBe(false) }) From a94957d1d38a119a1bc496fd8dd433c093a9a6f8 Mon Sep 17 00:00:00 2001 From: MuriloFP Date: Mon, 7 Jul 2025 18:10:40 -0300 Subject: [PATCH 3/3] fix(tests): update webview tests to use valid experiment IDs --- .../src/context/__tests__/ExtensionStateContext.spec.tsx | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/webview-ui/src/context/__tests__/ExtensionStateContext.spec.tsx b/webview-ui/src/context/__tests__/ExtensionStateContext.spec.tsx index 1e5867d3fc..1922af031c 100644 --- a/webview-ui/src/context/__tests__/ExtensionStateContext.spec.tsx +++ b/webview-ui/src/context/__tests__/ExtensionStateContext.spec.tsx @@ -222,10 +222,8 @@ describe("mergeExtensionState", () => { apiConfiguration: { modelMaxThinkingTokens: 456, modelTemperature: 0.3 }, experiments: { powerSteering: true, - marketplace: false, - disableCompletionCommand: false, - concurrentFileReads: true, multiFileApplyDiff: true, + contextDeduplication: false, } as Record, }