Skip to content

Commit 2712cba

Browse files
committed
feat: implement read_file history deduplication (#6279)
- Add READ_FILE_DEDUPLICATION experimental feature flag - Implement deduplicateReadFileHistory method in Task class - Integrate deduplication into recursivelyMakeClineRequests flow - Add comprehensive unit tests for deduplication logic - Handle both inter-message and intra-message deduplication - Preserve non-read_file content when removing duplicates
1 parent 7b756e3 commit 2712cba

File tree

6 files changed

+690
-1
lines changed

6 files changed

+690
-1
lines changed

packages/types/src/experiment.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import type { Keys, Equals, AssertEqual } from "./type-fu.js"
66
* ExperimentId
77
*/
88

9-
export const experimentIds = ["powerSteering", "multiFileApplyDiff"] as const
9+
export const experimentIds = ["powerSteering", "multiFileApplyDiff", "readFileDeduplication"] as const
1010

1111
export const experimentIdsSchema = z.enum(experimentIds)
1212

@@ -19,6 +19,7 @@ export type ExperimentId = z.infer<typeof experimentIdsSchema>
1919
export const experimentsSchema = z.object({
2020
powerSteering: z.boolean().optional(),
2121
multiFileApplyDiff: z.boolean().optional(),
22+
readFileDeduplication: z.boolean().optional(),
2223
})
2324

2425
export type Experiments = z.infer<typeof experimentsSchema>

src/core/task/Task.ts

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,140 @@ export class Task extends EventEmitter<ClineEvents> {
340340
await this.saveApiConversationHistory()
341341
}
342342

343+
public async deduplicateReadFileHistory(): Promise<void> {
344+
// Check if the experimental feature is enabled
345+
const state = await this.providerRef.deref()?.getState()
346+
if (!state?.experiments || !experiments.isEnabled(state.experiments, EXPERIMENT_IDS.READ_FILE_DEDUPLICATION)) {
347+
return
348+
}
349+
350+
// Track files we've seen and their most recent location
351+
const fileLastSeen = new Map<string, { messageIndex: number; blockIndex: number }>()
352+
const blocksToRemove = new Map<number, Set<number>>() // messageIndex -> Set of blockIndices to remove
353+
354+
// Iterate through messages in reverse order (newest first)
355+
for (let i = this.apiConversationHistory.length - 1; i >= 0; i--) {
356+
const message = this.apiConversationHistory[i]
357+
358+
// Only process user messages
359+
if (message.role !== "user") continue
360+
361+
const content = Array.isArray(message.content) ? message.content : [{ type: "text", text: message.content }]
362+
363+
// Track blocks to remove within this message
364+
const blockIndicesToRemove = new Set<number>()
365+
366+
// Iterate through blocks in reverse order within the message
367+
for (let j = content.length - 1; j >= 0; j--) {
368+
const block = content[j]
369+
if (block.type !== "text") continue
370+
371+
const text = block.text
372+
373+
// Check if this is a read_file result
374+
if (!text.startsWith("[read_file") || !text.includes("Result:")) continue
375+
376+
// Extract file paths from the result
377+
const filePaths = this.extractFilePathsFromReadResult(text)
378+
379+
// For each file path, check if we've seen it before
380+
for (const filePath of filePaths) {
381+
const lastSeen = fileLastSeen.get(filePath)
382+
if (lastSeen) {
383+
// We've seen this file before
384+
if (lastSeen.messageIndex === i) {
385+
// It's in the same message, mark the older block for removal
386+
blockIndicesToRemove.add(j)
387+
} else {
388+
// It's in a different message, mark this specific block for removal
389+
if (!blocksToRemove.has(i)) {
390+
blocksToRemove.set(i, new Set())
391+
}
392+
blocksToRemove.get(i)!.add(j)
393+
}
394+
} else {
395+
// First time seeing this file (going backwards), record it
396+
fileLastSeen.set(filePath, { messageIndex: i, blockIndex: j })
397+
}
398+
}
399+
}
400+
401+
// If we have blocks to remove from this message, add them to the map
402+
if (blockIndicesToRemove.size > 0) {
403+
if (!blocksToRemove.has(i)) {
404+
blocksToRemove.set(i, new Set())
405+
}
406+
blockIndicesToRemove.forEach((idx) => blocksToRemove.get(i)!.add(idx))
407+
}
408+
}
409+
410+
// Apply the removals
411+
if (blocksToRemove.size > 0) {
412+
let modified = false
413+
414+
// Create a new conversation history with duplicates removed
415+
this.apiConversationHistory = this.apiConversationHistory
416+
.map((message, messageIndex) => {
417+
const blocksToRemoveForMessage = blocksToRemove.get(messageIndex)
418+
if (!blocksToRemoveForMessage || blocksToRemoveForMessage.size === 0) {
419+
return message
420+
}
421+
422+
// This message has blocks to remove
423+
const content = Array.isArray(message.content)
424+
? message.content
425+
: [{ type: "text", text: message.content }]
426+
427+
// Check if this is a string content (legacy format)
428+
if (!Array.isArray(message.content)) {
429+
// For string content, we can only remove the entire message if it's a duplicate
430+
if (blocksToRemoveForMessage.has(0)) {
431+
modified = true
432+
return null
433+
}
434+
return message
435+
}
436+
437+
const newContent = content.filter((_, blockIndex) => !blocksToRemoveForMessage.has(blockIndex))
438+
439+
// If all content was removed, filter out this message entirely
440+
if (newContent.length === 0) {
441+
modified = true
442+
return null
443+
}
444+
445+
modified = true
446+
return { ...message, content: newContent }
447+
})
448+
.filter((message) => message !== null) as ApiMessage[]
449+
450+
if (modified) {
451+
await this.saveApiConversationHistory()
452+
}
453+
}
454+
}
455+
456+
private extractFilePathsFromReadResult(text: string): string[] {
457+
const paths: string[] = []
458+
459+
// Match file paths in the XML structure
460+
// Handles both single file and multi-file formats
461+
const filePathRegex = /<file>\s*<path>([^<]+)<\/path>/g
462+
let match
463+
464+
while ((match = filePathRegex.exec(text)) !== null) {
465+
paths.push(match[1].trim())
466+
}
467+
468+
// Also handle legacy format where path might be in the header
469+
const headerMatch = text.match(/\[read_file for '([^']+)'\]/)
470+
if (headerMatch && paths.length === 0) {
471+
paths.push(headerMatch[1])
472+
}
473+
474+
return paths
475+
}
476+
343477
private async saveApiConversationHistory() {
344478
try {
345479
await saveApiMessages({
@@ -1254,6 +1388,9 @@ export class Task extends EventEmitter<ClineEvents> {
12541388
await this.addToApiConversationHistory({ role: "user", content: finalUserContent })
12551389
TelemetryService.instance.captureConversationMessage(this.taskId, "user")
12561390

1391+
// Deduplicate read_file history after adding new content
1392+
await this.deduplicateReadFileHistory()
1393+
12571394
// Since we sent off a placeholder api_req_started message to update the
12581395
// webview while waiting to actually start the API request (to load
12591396
// potential details for example), we need to update the text of that

0 commit comments

Comments
 (0)