-
Notifications
You must be signed in to change notification settings - Fork 2.6k
feat: Add file read caching to prevent redundant reads in conversation history #4501
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
49bf0c7
b20bd7b
44ea42f
03ef66a
2071612
369cf70
a9fff91
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,8 @@ | ||
| function getFromEnv(key: string, defaultValue: string): string { | ||
| const value = process.env[key] | ||
| return value === undefined ? defaultValue : value | ||
| } | ||
|
|
||
| export const ROO_AGENT_CONFIG = { | ||
| fileReadCacheSize: () => parseInt(getFromEnv("ROO_FILE_READ_CACHE_SIZE", "100")), | ||
| } |
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,140 @@ | ||
| import { vi, describe, it, expect, beforeEach } from "vitest" | ||
| import { | ||
| processAndFilterReadRequest, | ||
| subtractRange, | ||
| subtractRanges, | ||
| ConversationMessage, | ||
| } from "../fileReadCacheService" | ||
| import { stat } from "fs/promises" | ||
| import { lruCache } from "../../utils/lruCache" | ||
| vi.mock("fs/promises", () => ({ | ||
| stat: vi.fn(), | ||
| })) | ||
| vi.mock("../../utils/lruCache") | ||
| vi.mock("../../config/envConfig", () => ({ | ||
| ROO_AGENT_CONFIG: { | ||
| fileReadCacheSize: () => 10, | ||
| }, | ||
| })) | ||
| const mockedStat = vi.mocked(stat) | ||
| describe("fileReadCacheService", () => { | ||
| describe("subtractRange", () => { | ||
| it("should return the original range if there is no overlap", () => { | ||
| const original = { start: 1, end: 10 } | ||
| const toRemove = { start: 11, end: 20 } | ||
| expect(subtractRange(original, toRemove)).toEqual([original]) | ||
| }) | ||
| it("should return an empty array if the range is completely removed", () => { | ||
| const original = { start: 1, end: 10 } | ||
| const toRemove = { start: 1, end: 10 } | ||
| expect(subtractRange(original, toRemove)).toEqual([]) | ||
| }) | ||
| it("should subtract from the beginning", () => { | ||
| const original = { start: 1, end: 10 } | ||
| const toRemove = { start: 1, end: 5 } | ||
| expect(subtractRange(original, toRemove)).toEqual([{ start: 6, end: 10 }]) | ||
| }) | ||
| it("should subtract from the end", () => { | ||
| const original = { start: 1, end: 10 } | ||
| const toRemove = { start: 6, end: 10 } | ||
| expect(subtractRange(original, toRemove)).toEqual([{ start: 1, end: 5 }]) | ||
| }) | ||
| it("should subtract from the middle, creating two new ranges", () => { | ||
| const original = { start: 1, end: 10 } | ||
| const toRemove = { start: 4, end: 6 } | ||
| expect(subtractRange(original, toRemove)).toEqual([ | ||
| { start: 1, end: 3 }, | ||
| { start: 7, end: 10 }, | ||
| ]) | ||
| }) | ||
| }) | ||
| describe("subtractRanges", () => { | ||
| it("should subtract multiple ranges from a single original range", () => { | ||
| const originals = [{ start: 1, end: 20 }] | ||
| const toRemoves = [ | ||
| { start: 1, end: 5 }, | ||
| { start: 15, end: 20 }, | ||
| ] | ||
| expect(subtractRanges(originals, toRemoves)).toEqual([{ start: 6, end: 14 }]) | ||
| }) | ||
| }) | ||
| describe("processAndFilterReadRequest", () => { | ||
| const MOCK_FILE_PATH = "/test/file.txt" | ||
| const CURRENT_MTIME = new Date().toISOString() | ||
| beforeEach(() => { | ||
| vi.clearAllMocks() | ||
| mockedStat.mockResolvedValue({ mtime: { toISOString: () => CURRENT_MTIME } } as any) | ||
| }) | ||
| it("should allow all when history is empty", async () => { | ||
| const requestedRanges = [{ start: 1, end: 10 }] | ||
| const result = await processAndFilterReadRequest(MOCK_FILE_PATH, requestedRanges, []) | ||
| expect(result.status).toBe("ALLOW_ALL") | ||
| expect(result.rangesToRead).toEqual(requestedRanges) | ||
| }) | ||
| it("should reject all when a full cache hit occurs", async () => { | ||
| const requestedRanges = [{ start: 1, end: 10 }] | ||
| const conversationHistory: ConversationMessage[] = [ | ||
| { | ||
| files: [ | ||
| { | ||
| fileName: MOCK_FILE_PATH, | ||
| mtime: new Date(CURRENT_MTIME).getTime(), | ||
| lineRanges: [{ start: 1, end: 10 }], | ||
| }, | ||
| ], | ||
| } as any, | ||
| ] | ||
| const result = await processAndFilterReadRequest(MOCK_FILE_PATH, requestedRanges, conversationHistory) | ||
| expect(result.status).toBe("REJECT_ALL") | ||
| expect(result.rangesToRead).toEqual([]) | ||
| }) | ||
| it("should allow partial when a partial cache hit occurs", async () => { | ||
| const requestedRanges = [{ start: 1, end: 20 }] | ||
| const conversationHistory: ConversationMessage[] = [ | ||
| { | ||
| files: [ | ||
| { | ||
| fileName: MOCK_FILE_PATH, | ||
| mtime: new Date(CURRENT_MTIME).getTime(), | ||
| lineRanges: [{ start: 1, end: 10 }], | ||
| }, | ||
| ], | ||
| } as any, | ||
| ] | ||
| const result = await processAndFilterReadRequest(MOCK_FILE_PATH, requestedRanges, conversationHistory) | ||
| expect(result.status).toBe("ALLOW_PARTIAL") | ||
| expect(result.rangesToRead).toEqual([{ start: 11, end: 20 }]) | ||
| }) | ||
| it("should allow all when mtime is older in history", async () => { | ||
| const requestedRanges = [{ start: 1, end: 10 }] | ||
| const conversationHistory: ConversationMessage[] = [ | ||
| { | ||
| files: [ | ||
| { | ||
| fileName: MOCK_FILE_PATH, | ||
| mtime: new Date(CURRENT_MTIME).getTime() - 100, // Older mtime | ||
| lineRanges: [{ start: 1, end: 10 }], | ||
| }, | ||
| ], | ||
| } as any, | ||
| ] | ||
| const result = await processAndFilterReadRequest(MOCK_FILE_PATH, requestedRanges, conversationHistory) | ||
| expect(result.status).toBe("ALLOW_ALL") | ||
| expect(result.rangesToRead).toEqual(requestedRanges) | ||
| }) | ||
| it("should allow all when file does not exist", async () => { | ||
| mockedStat.mockRejectedValue({ code: "ENOENT" }) | ||
| const requestedRanges = [{ start: 1, end: 10 }] | ||
| const result = await processAndFilterReadRequest(MOCK_FILE_PATH, requestedRanges, []) | ||
| expect(result.status).toBe("ALLOW_ALL") | ||
| expect(result.rangesToRead).toEqual(requestedRanges) | ||
| }) | ||
| it("should throw an error for non-ENOENT stat errors", async () => { | ||
| const error = new Error("EPERM") | ||
| mockedStat.mockRejectedValue(error) | ||
| const requestedRanges = [{ start: 1, end: 10 }] | ||
| const result = await processAndFilterReadRequest(MOCK_FILE_PATH, requestedRanges, []) | ||
| expect(result.status).toBe("ALLOW_ALL") // Fallback to allow all | ||
| }) | ||
| }) | ||
| }) |
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a simpler way of checking if the recently read file hasn't changed? on codebase indexing we use hashes to verify that the content of the file hasn't changed. If the file read is being rejected do we need to keep a cache of the whole file or would keeping a hash be a better option?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After reviewing the code, I need to clarify the current implementation: Current Implementation Reality: The cache does NOT store full file content More Reliable: Hashes detect actual content changes vs mtime manipulation mtime comparison can miss cases where file content changes but timestamp is preserved interface HashCacheEntry { typescript Trade-off Question: Would the hash generation cost be acceptable given the improved reliability and potential to simplify the conversation history analysis logic? |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,169 @@ | ||
| import fs from "fs" | ||
| import { lruCache } from "../utils/lruCache" | ||
| import { ROO_AGENT_CONFIG } from "../config/envConfig" | ||
|
|
||
| const CACHE_SIZE = ROO_AGENT_CONFIG.fileReadCacheSize() | ||
|
|
||
| // Types | ||
| export interface LineRange { | ||
| start: number | ||
| end: number | ||
| } | ||
|
|
||
| export interface FileMetadata { | ||
| fileName: string | ||
| mtime: number | ||
| lineRanges: LineRange[] | ||
| } | ||
|
|
||
| export interface ConversationMessage { | ||
| role: "user" | "assistant" | ||
| content: any | ||
| ts: number | ||
| tool?: { | ||
| name: string | ||
| options: any | ||
| } | ||
| files?: FileMetadata[] | ||
| } | ||
|
|
||
| type CacheResult = | ||
| | { status: "ALLOW_ALL"; rangesToRead: LineRange[] } | ||
| | { status: "ALLOW_PARTIAL"; rangesToRead: LineRange[] } | ||
| | { status: "REJECT_ALL"; rangesToRead: LineRange[] } | ||
|
|
||
| // Initialize a new LRU cache for file modification times. | ||
| const mtimeCache = new lruCache<string, string>(CACHE_SIZE) | ||
|
|
||
| /** | ||
| * Checks if two line ranges overlap. | ||
| * @param r1 - The first line range. | ||
| * @param r2 - The second line range. | ||
| * @returns True if the ranges overlap, false otherwise. | ||
| */ | ||
| function rangesOverlap(r1: LineRange, r2: LineRange): boolean { | ||
| return r1.start <= r2.end && r1.end >= r2.start | ||
| } | ||
|
|
||
| /** | ||
| * Subtracts one line range from another. | ||
| * @param from - The range to subtract from. | ||
| * @param toSubtract - The range to subtract. | ||
| * @returns An array of ranges remaining after subtraction. | ||
| */ | ||
| export function subtractRange(from: LineRange, toSubtract: LineRange): LineRange[] { | ||
| // No overlap | ||
| if (from.end < toSubtract.start || from.start > toSubtract.end) { | ||
| return [from] | ||
| } | ||
| const remainingRanges: LineRange[] = [] | ||
| // Part of 'from' is before 'toSubtract' | ||
| if (from.start < toSubtract.start) { | ||
| remainingRanges.push({ start: from.start, end: toSubtract.start - 1 }) | ||
| } | ||
| // Part of 'from' is after 'toSubtract' | ||
| if (from.end > toSubtract.end) { | ||
| remainingRanges.push({ start: toSubtract.end + 1, end: from.end }) | ||
| } | ||
| return remainingRanges | ||
| } | ||
|
|
||
| /** | ||
| * Subtracts a set of ranges from another set of ranges. | ||
| */ | ||
| export function subtractRanges(originals: LineRange[], toRemoves: LineRange[]): LineRange[] { | ||
| let remaining = [...originals] | ||
|
|
||
| for (const toRemove of toRemoves) { | ||
| remaining = remaining.flatMap((original) => subtractRange(original, toRemove)) | ||
| } | ||
|
|
||
| return remaining | ||
| } | ||
|
|
||
| /** | ||
| * | ||
| * @param filePath The path to the file to get the mtime for. | ||
| * @returns The mtime of the file as an ISO string, or null if the file does not exist. | ||
| * @throws An error if there is an error getting the file stats, other than the file not existing. | ||
| */ | ||
| async function getFileMtime(filePath: string): Promise<string | null> { | ||
| const cachedMtime = mtimeCache.get(filePath) | ||
| if (cachedMtime) { | ||
| return cachedMtime | ||
| } | ||
| try { | ||
| const stats = await fs.promises.stat(filePath) | ||
| const mtime = stats.mtime.toISOString() | ||
| mtimeCache.set(filePath, mtime) | ||
| return mtime | ||
| } catch (error) { | ||
| if (error instanceof Error && "code" in error && error.code === "ENOENT") { | ||
| return null // File does not exist, so no mtime. | ||
| } | ||
| // For other errors, we want to know about them. | ||
| throw error | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Processes a read request against cached file data in conversation history. | ||
| * @param requestedFilePath - The full path of the file being requested. | ||
| * @param requestedRanges - The line ranges being requested. | ||
| * @param conversationHistory - The history of conversation messages. | ||
| * @returns A CacheResult indicating whether to allow, partially allow, or reject the read. | ||
| */ | ||
| export async function processAndFilterReadRequest( | ||
| requestedFilePath: string, | ||
| requestedRanges: LineRange[], | ||
| conversationHistory: ConversationMessage[], | ||
| ): Promise<CacheResult> { | ||
| try { | ||
| const currentMtime = await getFileMtime(requestedFilePath) | ||
| if (currentMtime === null) { | ||
| // If file does not exist, there's nothing to read from cache. Let the tool handle it. | ||
| return { status: "ALLOW_ALL", rangesToRead: requestedRanges } | ||
| } | ||
|
|
||
| let rangesToRead = [...requestedRanges] | ||
|
|
||
| // If no specific ranges are requested, treat it as a request for the whole file. | ||
| if (rangesToRead.length === 0) { | ||
| // We need to know the number of lines to create a full range. | ||
| // This logic is simplified; in a real scenario, you'd get the line count. | ||
| // For this example, we'll assume we can't determine the full range without reading the file, | ||
| // so we proceed with ALLOW_ALL if no ranges are specified. | ||
| return { status: "ALLOW_ALL", rangesToRead: requestedRanges } | ||
| } | ||
|
|
||
| for (const message of conversationHistory) { | ||
| if (message.files) { | ||
| for (const file of message.files) { | ||
| if (file.fileName === requestedFilePath && new Date(file.mtime) >= new Date(currentMtime)) { | ||
| // File in history is up-to-date. Check ranges. | ||
| for (const cachedRange of file.lineRanges) { | ||
| rangesToRead = rangesToRead.flatMap((reqRange) => { | ||
| if (rangesOverlap(reqRange, cachedRange)) { | ||
| return subtractRange(reqRange, cachedRange) | ||
| } | ||
| return [reqRange] | ||
| }) | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| if (rangesToRead.length === 0) { | ||
| return { status: "REJECT_ALL", rangesToRead: [] } | ||
| } else if (rangesToRead.length < requestedRanges.length) { | ||
| return { status: "ALLOW_PARTIAL", rangesToRead } | ||
| } else { | ||
| return { status: "ALLOW_ALL", rangesToRead: requestedRanges } | ||
| } | ||
| } catch (error) { | ||
| console.error(`Error processing file read request for ${requestedFilePath}:`, error) | ||
| // On other errors, allow the read to proceed to let the tool handle it. | ||
| return { status: "ALLOW_ALL", rangesToRead: requestedRanges } | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I couldn't find mentions about why partial reads are being permanently enabled. Is this change intentional? since partial reads can be disabled in the settings.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This change is intentional and addresses Issue #4009. Let me clarify what's actually happening:
The Problem Being Solved:
The "Always read entire file" setting (maxReadFileLine = -1) was prohibiting line-range reads entirely, forcing users to always read complete files even when they had specific line numbers from:
git grep -n results
Compiler/linter error messages
search_files output
Manual diffs with line references
What This Change Does:
Preserves existing behavior: When no <line_range> is specified, entire files are still read
Adds intelligent choice: Model can now choose line ranges when contextually appropriate
Maintains the setting's intent: "Always read entire file" becomes the default, not an absolute restriction
Technical Detail:
Previously: partialReadsEnabled = maxReadFileLine !== -1 meant unlimited readers couldn't see line-range options
Now: Line ranges are always available in the tool interface, letting the model make smart decisions based on context
This transforms a rigid limitation into flexible intelligence - the model gets entire files by default but can target specific lines when it has line numbers to work with.