Skip to content

Commit 8099b93

Browse files
committed
fix: respect maxReadFileLine setting for file mentions to prevent context exhaustion
- Modified extractTextFromFile to accept and respect maxReadFileLine parameter - Updated parseMentions and related functions to pass maxReadFileLine through the call chain - Modified Task.ts to retrieve maxReadFileLine from state and pass to processUserContentMentions - Added comprehensive tests for large file handling - Files are now truncated with informative messages when they exceed the line limit Fixes #6069
1 parent 5629199 commit 8099b93

File tree

5 files changed

+259
-5
lines changed

5 files changed

+259
-5
lines changed

src/core/mentions/index.ts

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ export async function parseMentions(
8080
fileContextTracker?: FileContextTracker,
8181
rooIgnoreController?: RooIgnoreController,
8282
showRooIgnoredFiles: boolean = true,
83+
maxReadFileLine?: number,
8384
): Promise<string> {
8485
const mentions: Set<string> = new Set()
8586
let parsedText = text.replace(mentionRegexGlobal, (match, mention) => {
@@ -147,7 +148,13 @@ export async function parseMentions(
147148
} else if (mention.startsWith("/")) {
148149
const mentionPath = mention.slice(1)
149150
try {
150-
const content = await getFileOrFolderContent(mentionPath, cwd, rooIgnoreController, showRooIgnoredFiles)
151+
const content = await getFileOrFolderContent(
152+
mentionPath,
153+
cwd,
154+
rooIgnoreController,
155+
showRooIgnoredFiles,
156+
maxReadFileLine,
157+
)
151158
if (mention.endsWith("/")) {
152159
parsedText += `\n\n<folder_content path="${mentionPath}">\n${content}\n</folder_content>`
153160
} else {
@@ -210,6 +217,7 @@ async function getFileOrFolderContent(
210217
cwd: string,
211218
rooIgnoreController?: any,
212219
showRooIgnoredFiles: boolean = true,
220+
maxReadFileLine?: number,
213221
): Promise<string> {
214222
const unescapedPath = unescapeSpaces(mentionPath)
215223
const absPath = path.resolve(cwd, unescapedPath)
@@ -222,7 +230,7 @@ async function getFileOrFolderContent(
222230
return `(File ${mentionPath} is ignored by .rooignore)`
223231
}
224232
try {
225-
const content = await extractTextFromFile(absPath)
233+
const content = await extractTextFromFile(absPath, maxReadFileLine)
226234
return content
227235
} catch (error) {
228236
return `(Failed to read contents of ${mentionPath}): ${error.message}`
@@ -262,7 +270,7 @@ async function getFileOrFolderContent(
262270
if (isBinary) {
263271
return undefined
264272
}
265-
const content = await extractTextFromFile(absoluteFilePath)
273+
const content = await extractTextFromFile(absoluteFilePath, maxReadFileLine)
266274
return `<file_content path="${filePath.toPosix()}">\n${content}\n</file_content>`
267275
} catch (error) {
268276
return undefined

src/core/mentions/processUserContentMentions.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,15 @@ export async function processUserContentMentions({
1313
fileContextTracker,
1414
rooIgnoreController,
1515
showRooIgnoredFiles = true,
16+
maxReadFileLine,
1617
}: {
1718
userContent: Anthropic.Messages.ContentBlockParam[]
1819
cwd: string
1920
urlContentFetcher: UrlContentFetcher
2021
fileContextTracker: FileContextTracker
2122
rooIgnoreController?: any
2223
showRooIgnoredFiles?: boolean
24+
maxReadFileLine?: number
2325
}) {
2426
// Process userContent array, which contains various block types:
2527
// TextBlockParam, ImageBlockParam, ToolUseBlockParam, and ToolResultBlockParam.
@@ -46,6 +48,7 @@ export async function processUserContentMentions({
4648
fileContextTracker,
4749
rooIgnoreController,
4850
showRooIgnoredFiles,
51+
maxReadFileLine,
4952
),
5053
}
5154
}
@@ -63,6 +66,7 @@ export async function processUserContentMentions({
6366
fileContextTracker,
6467
rooIgnoreController,
6568
showRooIgnoredFiles,
69+
maxReadFileLine,
6670
),
6771
}
6872
}
@@ -81,6 +85,7 @@ export async function processUserContentMentions({
8185
fileContextTracker,
8286
rooIgnoreController,
8387
showRooIgnoredFiles,
88+
maxReadFileLine,
8489
),
8590
}
8691
}

src/core/task/Task.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1225,7 +1225,7 @@ export class Task extends EventEmitter<ClineEvents> {
12251225
}),
12261226
)
12271227

1228-
const { showRooIgnoredFiles = true } = (await this.providerRef.deref()?.getState()) ?? {}
1228+
const { showRooIgnoredFiles = true, maxReadFileLine = -1 } = (await this.providerRef.deref()?.getState()) ?? {}
12291229

12301230
const parsedUserContent = await processUserContentMentions({
12311231
userContent,
@@ -1234,6 +1234,7 @@ export class Task extends EventEmitter<ClineEvents> {
12341234
fileContextTracker: this.fileContextTracker,
12351235
rooIgnoreController: this.rooIgnoreController,
12361236
showRooIgnoredFiles,
1237+
maxReadFileLine,
12371238
})
12381239

12391240
const environmentDetails = await getEnvironmentDetails(this, includeFileDetails)
Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
// npx vitest run integrations/misc/__tests__/extract-text-large-files.spec.ts
2+
3+
import { describe, it, expect, vi, beforeEach, Mock } from "vitest"
4+
import * as fs from "fs/promises"
5+
import { extractTextFromFile } from "../extract-text"
6+
import { countFileLines } from "../line-counter"
7+
import { readLines } from "../read-lines"
8+
import { isBinaryFile } from "isbinaryfile"
9+
10+
// Mock all dependencies
11+
vi.mock("fs/promises")
12+
vi.mock("../line-counter")
13+
vi.mock("../read-lines")
14+
vi.mock("isbinaryfile")
15+
16+
describe("extractTextFromFile - Large File Handling", () => {
17+
// Type the mocks
18+
const mockedFs = vi.mocked(fs)
19+
const mockedCountFileLines = vi.mocked(countFileLines)
20+
const mockedReadLines = vi.mocked(readLines)
21+
const mockedIsBinaryFile = vi.mocked(isBinaryFile)
22+
23+
beforeEach(() => {
24+
vi.clearAllMocks()
25+
// Set default mock behavior
26+
mockedFs.access.mockResolvedValue(undefined)
27+
mockedIsBinaryFile.mockResolvedValue(false)
28+
})
29+
30+
it("should truncate files that exceed maxReadFileLine limit", async () => {
31+
const largeFileContent = Array(150)
32+
.fill(null)
33+
.map((_, i) => `Line ${i + 1}: This is a test line with some content`)
34+
.join("\n")
35+
36+
mockedCountFileLines.mockResolvedValue(150)
37+
mockedReadLines.mockResolvedValue(
38+
Array(100)
39+
.fill(null)
40+
.map((_, i) => `Line ${i + 1}: This is a test line with some content`)
41+
.join("\n"),
42+
)
43+
44+
const result = await extractTextFromFile("/test/large-file.ts", 100)
45+
46+
// Should only include first 100 lines with line numbers
47+
expect(result).toContain(" 1 | Line 1: This is a test line with some content")
48+
expect(result).toContain("100 | Line 100: This is a test line with some content")
49+
expect(result).not.toContain("101 | Line 101: This is a test line with some content")
50+
51+
// Should include truncation message
52+
expect(result).toContain(
53+
"[File truncated: showing 100 of 150 total lines. The file is too large and may exhaust the context window if read in full.]",
54+
)
55+
})
56+
57+
it("should not truncate files within the maxReadFileLine limit", async () => {
58+
const smallFileContent = Array(50)
59+
.fill(null)
60+
.map((_, i) => `Line ${i + 1}: This is a test line`)
61+
.join("\n")
62+
63+
mockedCountFileLines.mockResolvedValue(50)
64+
mockedFs.readFile.mockResolvedValue(smallFileContent as any)
65+
66+
const result = await extractTextFromFile("/test/small-file.ts", 100)
67+
68+
// Should include all lines with line numbers
69+
expect(result).toContain(" 1 | Line 1: This is a test line")
70+
expect(result).toContain("50 | Line 50: This is a test line")
71+
72+
// Should not include truncation message
73+
expect(result).not.toContain("[File truncated:")
74+
})
75+
76+
it("should handle files with exactly maxReadFileLine lines", async () => {
77+
const exactFileContent = Array(100)
78+
.fill(null)
79+
.map((_, i) => `Line ${i + 1}`)
80+
.join("\n")
81+
82+
mockedCountFileLines.mockResolvedValue(100)
83+
mockedFs.readFile.mockResolvedValue(exactFileContent as any)
84+
85+
const result = await extractTextFromFile("/test/exact-file.ts", 100)
86+
87+
// Should include all lines with line numbers
88+
expect(result).toContain(" 1 | Line 1")
89+
expect(result).toContain("100 | Line 100")
90+
91+
// Should not include truncation message
92+
expect(result).not.toContain("[File truncated:")
93+
})
94+
95+
it("should handle undefined maxReadFileLine by not truncating", async () => {
96+
const largeFileContent = Array(200)
97+
.fill(null)
98+
.map((_, i) => `Line ${i + 1}`)
99+
.join("\n")
100+
101+
mockedFs.readFile.mockResolvedValue(largeFileContent as any)
102+
103+
const result = await extractTextFromFile("/test/large-file.ts", undefined)
104+
105+
// Should include all lines with line numbers when maxReadFileLine is undefined
106+
expect(result).toContain(" 1 | Line 1")
107+
expect(result).toContain("200 | Line 200")
108+
109+
// Should not include truncation message
110+
expect(result).not.toContain("[File truncated:")
111+
})
112+
113+
it("should handle empty files", async () => {
114+
mockedFs.readFile.mockResolvedValue("" as any)
115+
116+
const result = await extractTextFromFile("/test/empty-file.ts", 100)
117+
118+
expect(result).toBe("")
119+
expect(result).not.toContain("[File truncated:")
120+
})
121+
122+
it("should handle files with only newlines", async () => {
123+
const newlineOnlyContent = "\n\n\n\n\n"
124+
125+
mockedCountFileLines.mockResolvedValue(6) // 5 newlines = 6 lines
126+
mockedReadLines.mockResolvedValue("\n\n")
127+
128+
const result = await extractTextFromFile("/test/newline-file.ts", 3)
129+
130+
// Should truncate at line 3
131+
expect(result).toContain("[File truncated: showing 3 of 6 total lines")
132+
})
133+
134+
it("should handle very large files efficiently", async () => {
135+
// Simulate a 10,000 line file
136+
mockedCountFileLines.mockResolvedValue(10000)
137+
mockedReadLines.mockResolvedValue(
138+
Array(500)
139+
.fill(null)
140+
.map((_, i) => `Line ${i + 1}: Some content here`)
141+
.join("\n"),
142+
)
143+
144+
const result = await extractTextFromFile("/test/very-large-file.ts", 500)
145+
146+
// Should only include first 500 lines with line numbers
147+
expect(result).toContain(" 1 | Line 1: Some content here")
148+
expect(result).toContain("500 | Line 500: Some content here")
149+
expect(result).not.toContain("501 | Line 501: Some content here")
150+
151+
// Should show truncation message
152+
expect(result).toContain("[File truncated: showing 500 of 10000 total lines")
153+
})
154+
155+
it("should handle maxReadFileLine of 0 by not truncating", async () => {
156+
const fileContent = "Line 1\nLine 2\nLine 3"
157+
158+
mockedFs.readFile.mockResolvedValue(fileContent as any)
159+
160+
const result = await extractTextFromFile("/test/file.ts", 0)
161+
162+
// maxReadFileLine of 0 or negative means no limit
163+
expect(result).toContain("1 | Line 1")
164+
expect(result).toContain("2 | Line 2")
165+
expect(result).toContain("3 | Line 3")
166+
expect(result).not.toContain("[File truncated:")
167+
})
168+
169+
it("should handle negative maxReadFileLine by treating as undefined", async () => {
170+
const fileContent = "Line 1\nLine 2\nLine 3"
171+
172+
mockedFs.readFile.mockResolvedValue(fileContent as any)
173+
174+
const result = await extractTextFromFile("/test/file.ts", -1)
175+
176+
// Should include all content with line numbers when negative
177+
expect(result).toContain("1 | Line 1")
178+
expect(result).toContain("2 | Line 2")
179+
expect(result).toContain("3 | Line 3")
180+
expect(result).not.toContain("[File truncated:")
181+
})
182+
183+
it("should preserve file content structure when truncating", async () => {
184+
const structuredContent = [
185+
"function example() {",
186+
" const x = 1;",
187+
" const y = 2;",
188+
" return x + y;",
189+
"}",
190+
"",
191+
"// More code below",
192+
].join("\n")
193+
194+
mockedCountFileLines.mockResolvedValue(7)
195+
mockedReadLines.mockResolvedValue(["function example() {", " const x = 1;", " const y = 2;"].join("\n"))
196+
197+
const result = await extractTextFromFile("/test/structured.ts", 3)
198+
199+
// Should preserve the first 3 lines with line numbers
200+
expect(result).toContain("1 | function example() {")
201+
expect(result).toContain("2 | const x = 1;")
202+
expect(result).toContain("3 | const y = 2;")
203+
expect(result).not.toContain("4 | return x + y;")
204+
205+
// Should include truncation info
206+
expect(result).toContain("[File truncated: showing 3 of 7 total lines")
207+
})
208+
209+
it("should handle binary files by throwing an error", async () => {
210+
mockedIsBinaryFile.mockResolvedValue(true)
211+
212+
await expect(extractTextFromFile("/test/binary.bin", 100)).rejects.toThrow(
213+
"Cannot read text for file type: .bin",
214+
)
215+
})
216+
217+
it("should handle file not found errors", async () => {
218+
mockedFs.access.mockRejectedValue(new Error("ENOENT"))
219+
220+
await expect(extractTextFromFile("/test/nonexistent.ts", 100)).rejects.toThrow(
221+
"File not found: /test/nonexistent.ts",
222+
)
223+
})
224+
})

src/integrations/misc/extract-text.ts

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import mammoth from "mammoth"
55
import fs from "fs/promises"
66
import { isBinaryFile } from "isbinaryfile"
77
import { extractTextFromXLSX } from "./extract-text-from-xlsx"
8+
import { countFileLines } from "./line-counter"
9+
import { readLines } from "./read-lines"
810

911
async function extractTextFromPDF(filePath: string): Promise<string> {
1012
const dataBuffer = await fs.readFile(filePath)
@@ -48,7 +50,7 @@ export function getSupportedBinaryFormats(): string[] {
4850
return Object.keys(SUPPORTED_BINARY_FORMATS)
4951
}
5052

51-
export async function extractTextFromFile(filePath: string): Promise<string> {
53+
export async function extractTextFromFile(filePath: string, maxReadFileLine?: number): Promise<string> {
5254
try {
5355
await fs.access(filePath)
5456
} catch (error) {
@@ -67,6 +69,20 @@ export async function extractTextFromFile(filePath: string): Promise<string> {
6769
const isBinary = await isBinaryFile(filePath).catch(() => false)
6870

6971
if (!isBinary) {
72+
// Check if we need to apply line limit
73+
if (maxReadFileLine && maxReadFileLine > 0) {
74+
const totalLines = await countFileLines(filePath)
75+
if (totalLines > maxReadFileLine) {
76+
// Read only up to maxReadFileLine
77+
const content = await readLines(filePath, maxReadFileLine - 1, 0)
78+
const numberedContent = addLineNumbers(content)
79+
return (
80+
numberedContent +
81+
`\n\n[File truncated: showing ${maxReadFileLine} of ${totalLines} total lines. The file is too large and may exhaust the context window if read in full.]`
82+
)
83+
}
84+
}
85+
// Read the entire file if no limit or file is within limit
7086
return addLineNumbers(await fs.readFile(filePath, "utf8"))
7187
} else {
7288
throw new Error(`Cannot read text for file type: ${fileExtension}`)

0 commit comments

Comments
 (0)