Skip to content

Commit 6a3b160

Browse files
committed
fix: improve deduplicateReadFileHistory implementation
- Use Map for O(n) performance instead of nested loops - Add timestamp-based cache preservation (30 minutes) - Add robust validation to prevent runtime errors - Use regex pattern matching for better reliability - Add comprehensive JSDoc documentation - Add extensive unit tests covering edge cases Addresses review feedback from @daniel-lxs
1 parent c80b24a commit 6a3b160

File tree

2 files changed

+479
-17
lines changed

2 files changed

+479
-17
lines changed

src/core/task/Task.ts

Lines changed: 69 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1895,37 +1895,89 @@ export class Task extends EventEmitter<ClineEvents> {
18951895
yield* iterator
18961896
}
18971897

1898+
/**
1899+
* Deduplicates read_file tool results in the conversation history to optimize context length.
1900+
* Keeps only the most recent read_file result for each file, removing older occurrences.
1901+
*
1902+
* This method:
1903+
* - Uses a Map for O(n) performance instead of nested loops
1904+
* - Preserves messages within the LLM cache window (30 minutes)
1905+
* - Validates array operations to prevent runtime errors
1906+
* - Uses robust pattern matching for tool identification
1907+
*
1908+
* @remarks
1909+
* The method assumes read_file messages have a specific structure:
1910+
* - First content item contains text like "[read_file for path/to/file]"
1911+
* - Second content item contains the actual file content
1912+
* - Third content item contains additional metadata
1913+
*/
18981914
deduplicateReadFileHistory() {
1899-
for (let i = this.apiConversationHistory.length - 1; i >= 0; i--) {
1915+
const CACHE_WINDOW_MS = 30 * 60 * 1000 // 30 minutes
1916+
const currentTime = Date.now()
1917+
1918+
// Map to track the most recent occurrence of each file
1919+
const fileOccurrences = new Map<string, { index: number; timestamp?: number }>()
1920+
1921+
// Pattern to match read_file tool use more robustly
1922+
const READ_FILE_PATTERN = /^\[read_file for (.+?)\]/
1923+
1924+
// First pass: identify all read_file occurrences
1925+
for (let i = 0; i < this.apiConversationHistory.length; i++) {
19001926
const conversation = this.apiConversationHistory[i]
19011927

1928+
// Skip non-user messages
19021929
if (conversation.role !== "user") continue
19031930

1931+
// Validate content structure
19041932
const content = conversation.content
1905-
if (typeof content === "string") continue
1933+
if (typeof content === "string" || !Array.isArray(content)) continue
19061934

1935+
// Check first item for read_file pattern
19071936
const firstItem = content[0]
1908-
if (typeof firstItem === "string" || !("type" in firstItem) || firstItem.type !== "text") continue
1937+
if (!firstItem || typeof firstItem !== "object" || firstItem.type !== "text") continue
1938+
1939+
const match = firstItem.text?.match(READ_FILE_PATTERN)
1940+
if (!match) continue
1941+
1942+
const filePath = match[1]
1943+
const messageTimestamp = conversation.ts
1944+
1945+
// Store the most recent occurrence of each file
1946+
fileOccurrences.set(filePath, { index: i, timestamp: messageTimestamp })
1947+
}
19091948

1910-
const toolUseText = firstItem.text
1911-
if (!toolUseText || !toolUseText.startsWith("[read_file for ")) continue
1949+
// Second pass: remove older occurrences
1950+
for (let i = this.apiConversationHistory.length - 1; i >= 0; i--) {
1951+
const conversation = this.apiConversationHistory[i]
1952+
1953+
if (conversation.role !== "user") continue
1954+
1955+
const content = conversation.content
1956+
if (typeof content === "string" || !Array.isArray(content)) continue
19121957

1913-
for (let j = i - 1; j >= 0; j--) {
1914-
const prevConversation = this.apiConversationHistory[j]
1958+
const firstItem = content[0]
1959+
if (!firstItem || typeof firstItem !== "object" || firstItem.type !== "text") continue
19151960

1916-
if (prevConversation.role === "assistant") continue
1961+
const match = firstItem.text?.match(READ_FILE_PATTERN)
1962+
if (!match) continue
19171963

1918-
const prevContent = prevConversation.content
1919-
if (typeof prevContent === "string") continue
1964+
const filePath = match[1]
1965+
const mostRecent = fileOccurrences.get(filePath)
19201966

1921-
const prevFirstItem = prevContent[0]
1922-
if (typeof prevFirstItem === "string" || !("type" in prevFirstItem) || prevFirstItem.type !== "text")
1923-
continue
1967+
// Skip if this is the most recent occurrence
1968+
if (mostRecent && mostRecent.index === i) continue
19241969

1925-
if (prevFirstItem.text === toolUseText && prevContent.length === 3) {
1926-
prevContent.splice(1, 1)
1927-
break
1928-
}
1970+
// Check if message is within cache window
1971+
const messageTimestamp = conversation.ts
1972+
if (messageTimestamp && currentTime - messageTimestamp < CACHE_WINDOW_MS) {
1973+
// Preserve messages within cache window
1974+
continue
1975+
}
1976+
1977+
// Safely remove the file content (second item) if structure is as expected
1978+
if (content.length >= 2 && content[1]?.type === "text") {
1979+
// Remove the file content while preserving the tool use and any additional items
1980+
content.splice(1, 1)
19291981
}
19301982
}
19311983
}

0 commit comments

Comments
 (0)