Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
228 changes: 144 additions & 84 deletions src/core/prompts/__tests__/__snapshots__/system.test.ts.snap

Large diffs are not rendered by default.

28 changes: 10 additions & 18 deletions src/core/prompts/tools/read-file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,22 @@ export function getReadFileDescription(args: ToolArgs): string {
const isMultipleReadsEnabled = maxConcurrentReads > 1

return `## read_file
Description: Request to read the contents of ${isMultipleReadsEnabled ? "one or more files" : "a file"}. The tool outputs line-numbered content (e.g. "1 | const x = 1") for easy reference when creating diffs or discussing code.${args.partialReadsEnabled ? " Use line ranges to efficiently read specific portions of large files." : ""} Supports text extraction from PDF and DOCX files, but may not handle other binary files properly.
Description: Request to read the contents of ${isMultipleReadsEnabled ? "one or more files" : "a file"}. The tool outputs line-numbered content (e.g. "1 | const x = 1") for easy reference when creating diffs or discussing code. Use line ranges to efficiently read specific portions of large files. Supports text extraction from PDF and DOCX files, but may not handle other binary files properly.

${isMultipleReadsEnabled ? `**IMPORTANT: You can read a maximum of ${maxConcurrentReads} files in a single request.** If you need to read more files, use multiple sequential read_file requests.` : "**IMPORTANT: Multiple file reads are currently disabled. You can only read one file at a time.**"}

${args.partialReadsEnabled ? `By specifying line ranges, you can efficiently read specific portions of large files without loading the entire file into memory.` : ""}
By specifying line ranges, you can efficiently read specific portions of large files without loading the entire file into memory.
Parameters:
- args: Contains one or more file elements, where each file contains:
- path: (required) File path (relative to workspace directory ${args.cwd})
${args.partialReadsEnabled ? `- line_range: (optional) One or more line range elements in format "start-end" (1-based, inclusive)` : ""}
- line_range: (optional) One or more line range elements in format "start-end" (1-based, inclusive)

Usage:
<read_file>
<args>
<file>
<path>path/to/file</path>
${args.partialReadsEnabled ? `<line_range>start-end</line_range>` : ""}
<line_range>start-end</line_range>
</file>
</args>
</read_file>
Expand All @@ -32,7 +32,7 @@ Examples:
<args>
<file>
<path>src/app.ts</path>
${args.partialReadsEnabled ? `<line_range>1-1000</line_range>` : ""}
<line_range>1-1000</line_range>
</file>
</args>
</read_file>
Expand All @@ -44,16 +44,12 @@ ${isMultipleReadsEnabled ? `2. Reading multiple files (within the ${maxConcurren
<args>
<file>
<path>src/app.ts</path>
${
args.partialReadsEnabled
? `<line_range>1-50</line_range>
<line_range>100-150</line_range>`
: ""
}
<line_range>1-50</line_range>
<line_range>100-150</line_range>
</file>
<file>
<path>src/utils.ts</path>
${args.partialReadsEnabled ? `<line_range>10-20</line_range>` : ""}
<line_range>10-20</line_range>
</file>
</args>
</read_file>`
Expand All @@ -72,14 +68,10 @@ ${isMultipleReadsEnabled ? "3. " : "2. "}Reading an entire file:
IMPORTANT: You MUST use this Efficient Reading Strategy:
- ${isMultipleReadsEnabled ? `You MUST read all related files and implementations together in a single operation (up to ${maxConcurrentReads} files at once)` : "You MUST read files one at a time, as multiple file reads are currently disabled"}
- You MUST obtain all necessary context before proceeding with changes
${
args.partialReadsEnabled
? `- You MUST use line ranges to read specific portions of large files, rather than reading entire files when not needed
${`- You MUST use line ranges to read specific portions of large files, rather than reading entire files when not needed
- You MUST combine adjacent line ranges (<10 lines apart)
- You MUST use multiple ranges for content separated by >10 lines
- You MUST include sufficient line context for planned modifications while keeping ranges minimal
`
: ""
}
`}
${isMultipleReadsEnabled ? `- When you need to read more than ${maxConcurrentReads} files, prioritize the most critical files first, then use subsequent read_file requests for additional files` : ""}`
}
179 changes: 179 additions & 0 deletions src/core/services/__tests__/fileReadCacheService.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
import { vi, describe, it, expect, beforeEach } from "vitest"
import {
processAndFilterReadRequest,
subtractRange,
subtractRanges,
ConversationMessage,
} from "../fileReadCacheService"
import { stat } from "fs/promises"

vi.mock("fs/promises", () => ({
stat: vi.fn(),
}))

const mockedStat = vi.mocked(stat)

describe("fileReadCacheService", () => {
describe("subtractRange", () => {
it("should return the original range if there is no overlap", () => {
const original = { start: 1, end: 10 }
const toRemove = { start: 11, end: 20 }
expect(subtractRange(original, toRemove)).toEqual([original])
})

it("should return an empty array if the range is completely removed", () => {
const original = { start: 1, end: 10 }
const toRemove = { start: 1, end: 10 }
expect(subtractRange(original, toRemove)).toEqual([])
})

it("should subtract from the beginning", () => {
const original = { start: 1, end: 10 }
const toRemove = { start: 1, end: 5 }
expect(subtractRange(original, toRemove)).toEqual([{ start: 6, end: 10 }])
})

it("should subtract from the end", () => {
const original = { start: 1, end: 10 }
const toRemove = { start: 6, end: 10 }
expect(subtractRange(original, toRemove)).toEqual([{ start: 1, end: 5 }])
})

it("should subtract from the middle, creating two new ranges", () => {
const original = { start: 1, end: 10 }
const toRemove = { start: 4, end: 6 }
expect(subtractRange(original, toRemove)).toEqual([
{ start: 1, end: 3 },
{ start: 7, end: 10 },
])
})
})

describe("subtractRanges", () => {
it("should subtract multiple ranges from a single original range", () => {
const originals = [{ start: 1, end: 20 }]
const toRemoves = [
{ start: 1, end: 5 },
{ start: 15, end: 20 },
]
expect(subtractRanges(originals, toRemoves)).toEqual([{ start: 6, end: 14 }])
})
})

describe("processAndFilterReadRequest", () => {
const MOCK_FILE_PATH = "/test/file.txt"
const CURRENT_MTIME = 1000

beforeEach(() => {
mockedStat.mockResolvedValue({ mtime: { getTime: () => CURRENT_MTIME } } as any)
})

it("should allow all when history is empty", async () => {
const requestedRanges = [{ start: 1, end: 10 }]
const result = await processAndFilterReadRequest(MOCK_FILE_PATH, requestedRanges, [])
expect(result.status).toBe("ALLOW_ALL")
expect(result.rangesToRead).toEqual(requestedRanges)
})

it("should reject all when a full cache hit occurs", async () => {
const requestedRanges = [{ start: 1, end: 10 }]
const conversationHistory: ConversationMessage[] = [
{
files: [
{
fileName: MOCK_FILE_PATH,
mtime: CURRENT_MTIME,
loadedRanges: [{ start: 1, end: 10 }],
},
],
},
]
const result = await processAndFilterReadRequest(MOCK_FILE_PATH, requestedRanges, conversationHistory)
expect(result.status).toBe("REJECT_ALL")
expect(result.rangesToRead).toEqual([])
})

it("should allow partial when a partial cache hit occurs", async () => {
const requestedRanges = [{ start: 1, end: 20 }]
const conversationHistory: ConversationMessage[] = [
{
files: [
{
fileName: MOCK_FILE_PATH,
mtime: CURRENT_MTIME,
loadedRanges: [{ start: 1, end: 10 }],
},
],
},
]
const result = await processAndFilterReadRequest(MOCK_FILE_PATH, requestedRanges, conversationHistory)
expect(result.status).toBe("ALLOW_PARTIAL")
expect(result.rangesToRead).toEqual([{ start: 11, end: 20 }])
})

it("should allow all when mtime is older in history", async () => {
const requestedRanges = [{ start: 1, end: 10 }]
const conversationHistory: ConversationMessage[] = [
{
files: [
{
fileName: MOCK_FILE_PATH,
mtime: CURRENT_MTIME - 100, // Older mtime
loadedRanges: [{ start: 1, end: 10 }],
},
],
},
]
const result = await processAndFilterReadRequest(MOCK_FILE_PATH, requestedRanges, conversationHistory)
expect(result.status).toBe("ALLOW_ALL")
expect(result.rangesToRead).toEqual(requestedRanges)
})

it("should allow all for a file not in history", async () => {
const requestedRanges = [{ start: 1, end: 10 }]
const conversationHistory: ConversationMessage[] = [
{
files: [
{
fileName: "/another/file.txt",
mtime: CURRENT_MTIME,
loadedRanges: [{ start: 1, end: 10 }],
},
],
},
]
const result = await processAndFilterReadRequest(MOCK_FILE_PATH, requestedRanges, conversationHistory)
expect(result.status).toBe("ALLOW_ALL")
expect(result.rangesToRead).toEqual(requestedRanges)
})

it("should correctly use the most recent valid history entry", async () => {
const requestedRanges = [{ start: 1, end: 20 }]
const conversationHistory: ConversationMessage[] = [
{
// Older, incorrect mtime
files: [
{
fileName: MOCK_FILE_PATH,
mtime: CURRENT_MTIME - 100,
loadedRanges: [{ start: 1, end: 20 }],
},
],
},
{
// Newer, correct mtime but only partial coverage
files: [
{
fileName: MOCK_FILE_PATH,
mtime: CURRENT_MTIME,
loadedRanges: [{ start: 1, end: 5 }],
},
],
},
]
const result = await processAndFilterReadRequest(MOCK_FILE_PATH, requestedRanges, conversationHistory)
expect(result.status).toBe("ALLOW_PARTIAL")
expect(result.rangesToRead).toEqual([{ start: 6, end: 20 }])
})
})
})
119 changes: 119 additions & 0 deletions src/core/services/fileReadCacheService.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import { stat } from "fs/promises"

export interface LineRange {
start: number
end: number
}

export interface FileMetadata {
fileName: string
mtime: number
loadedRanges: LineRange[]
}

export interface ConversationMessage {
files?: FileMetadata[]
}

export interface FilteredReadRequest {
status: "REJECT_ALL" | "ALLOW_PARTIAL" | "ALLOW_ALL"
rangesToRead: LineRange[]
}

/**
* Checks if two ranges overlap.
*/
function rangesOverlap(r1: LineRange, r2: LineRange): boolean {
return r1.start <= r2.end && r1.end >= r2.start
}

/**
* Subtracts one range from another.
* Returns an array of ranges that are in `original` but not in `toRemove`.
*/
export function subtractRange(original: LineRange, toRemove: LineRange): LineRange[] {
if (!rangesOverlap(original, toRemove)) {
return [original]
}

const result: LineRange[] = []

// Part of original before toRemove
if (original.start < toRemove.start) {
result.push({ start: original.start, end: toRemove.start - 1 })
}

// Part of original after toRemove
if (original.end > toRemove.end) {
result.push({ start: toRemove.end + 1, end: original.end })
}

return result
}

/**
* Subtracts a set of ranges from another set of ranges.
*/
export function subtractRanges(originals: LineRange[], toRemoves: LineRange[]): LineRange[] {
let remaining = [...originals]

for (const toRemove of toRemoves) {
remaining = remaining.flatMap((original) => subtractRange(original, toRemove))
}

return remaining
}

export async function processAndFilterReadRequest(
requestedFile: string,
requestedRanges: LineRange[],
conversationHistory: ConversationMessage[],
): Promise<FilteredReadRequest> {
let currentMtime: number
try {
currentMtime = (await stat(requestedFile)).mtime.getTime()
} catch (error) {
// File doesn't exist or other error, so we must read.
return {
status: "ALLOW_ALL",
rangesToRead: requestedRanges,
}
}

let rangesThatStillNeedToBeRead = [...requestedRanges]

for (let i = conversationHistory.length - 1; i >= 0; i--) {
const message = conversationHistory[i]
if (!message.files) {
continue
}

const relevantFileHistory = message.files.find((f) => f.fileName === requestedFile)

if (relevantFileHistory && relevantFileHistory.mtime >= currentMtime) {
rangesThatStillNeedToBeRead = subtractRanges(rangesThatStillNeedToBeRead, relevantFileHistory.loadedRanges)

if (rangesThatStillNeedToBeRead.length === 0) {
return {
status: "REJECT_ALL",
rangesToRead: [],
}
}
}
}

const originalRangesString = JSON.stringify(requestedRanges.sort((a, b) => a.start - b.start))
const finalRangesString = JSON.stringify(rangesThatStillNeedToBeRead.sort((a, b) => a.start - b.start))

if (originalRangesString === finalRangesString) {
return {
status: "ALLOW_ALL",
rangesToRead: requestedRanges,
}
}

return {
status: "ALLOW_PARTIAL",
rangesToRead: rangesThatStillNeedToBeRead,
}
}
Loading