Skip to content

Commit 83df51a

Browse files
roomotedaniel-lxs
authored andcommitted
fix: respect maxReadFileLine setting for file mentions to prevent context exhaustion
- Modified extractTextFromFile to accept and respect maxReadFileLine parameter - Updated parseMentions and related functions to pass maxReadFileLine through the call chain - Modified Task.ts to retrieve maxReadFileLine from state and pass to processUserContentMentions - Added comprehensive tests for large file handling - Files are now truncated with informative messages when they exceed the line limit Fixes #6069
1 parent d62a260 commit 83df51a

File tree

5 files changed

+259
-4
lines changed

5 files changed

+259
-4
lines changed

src/core/mentions/index.ts

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ export async function parseMentions(
8282
showRooIgnoredFiles: boolean = true,
8383
includeDiagnosticMessages: boolean = true,
8484
maxDiagnosticMessages: number = 50,
85+
maxReadFileLine?: number,
8586
): Promise<string> {
8687
const mentions: Set<string> = new Set()
8788
let parsedText = text.replace(mentionRegexGlobal, (match, mention) => {
@@ -149,7 +150,13 @@ export async function parseMentions(
149150
} else if (mention.startsWith("/")) {
150151
const mentionPath = mention.slice(1)
151152
try {
152-
const content = await getFileOrFolderContent(mentionPath, cwd, rooIgnoreController, showRooIgnoredFiles)
153+
const content = await getFileOrFolderContent(
154+
mentionPath,
155+
cwd,
156+
rooIgnoreController,
157+
showRooIgnoredFiles,
158+
maxReadFileLine,
159+
)
153160
if (mention.endsWith("/")) {
154161
parsedText += `\n\n<folder_content path="${mentionPath}">\n${content}\n</folder_content>`
155162
} else {
@@ -212,6 +219,7 @@ async function getFileOrFolderContent(
212219
cwd: string,
213220
rooIgnoreController?: any,
214221
showRooIgnoredFiles: boolean = true,
222+
maxReadFileLine?: number,
215223
): Promise<string> {
216224
const unescapedPath = unescapeSpaces(mentionPath)
217225
const absPath = path.resolve(cwd, unescapedPath)
@@ -224,7 +232,7 @@ async function getFileOrFolderContent(
224232
return `(File ${mentionPath} is ignored by .rooignore)`
225233
}
226234
try {
227-
const content = await extractTextFromFile(absPath)
235+
const content = await extractTextFromFile(absPath, maxReadFileLine)
228236
return content
229237
} catch (error) {
230238
return `(Failed to read contents of ${mentionPath}): ${error.message}`
@@ -264,7 +272,7 @@ async function getFileOrFolderContent(
264272
if (isBinary) {
265273
return undefined
266274
}
267-
const content = await extractTextFromFile(absoluteFilePath)
275+
const content = await extractTextFromFile(absoluteFilePath, maxReadFileLine)
268276
return `<file_content path="${filePath.toPosix()}">\n${content}\n</file_content>`
269277
} catch (error) {
270278
return undefined

src/core/mentions/processUserContentMentions.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ export async function processUserContentMentions({
1515
showRooIgnoredFiles = true,
1616
includeDiagnosticMessages = true,
1717
maxDiagnosticMessages = 50,
18+
maxReadFileLine,
1819
}: {
1920
userContent: Anthropic.Messages.ContentBlockParam[]
2021
cwd: string
@@ -24,6 +25,7 @@ export async function processUserContentMentions({
2425
showRooIgnoredFiles?: boolean
2526
includeDiagnosticMessages?: boolean
2627
maxDiagnosticMessages?: number
28+
maxReadFileLine?: number
2729
}) {
2830
// Process userContent array, which contains various block types:
2931
// TextBlockParam, ImageBlockParam, ToolUseBlockParam, and ToolResultBlockParam.
@@ -52,6 +54,7 @@ export async function processUserContentMentions({
5254
showRooIgnoredFiles,
5355
includeDiagnosticMessages,
5456
maxDiagnosticMessages,
57+
maxReadFileLine,
5558
),
5659
}
5760
}
@@ -71,6 +74,7 @@ export async function processUserContentMentions({
7174
showRooIgnoredFiles,
7275
includeDiagnosticMessages,
7376
maxDiagnosticMessages,
77+
maxReadFileLine,
7478
),
7579
}
7680
}
@@ -91,6 +95,7 @@ export async function processUserContentMentions({
9195
showRooIgnoredFiles,
9296
includeDiagnosticMessages,
9397
maxDiagnosticMessages,
98+
maxReadFileLine,
9499
),
95100
}
96101
}

src/core/task/Task.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1230,6 +1230,7 @@ export class Task extends EventEmitter<ClineEvents> {
12301230
showRooIgnoredFiles = true,
12311231
includeDiagnosticMessages = true,
12321232
maxDiagnosticMessages = 50,
1233+
maxReadFileLine = -1,
12331234
} = (await this.providerRef.deref()?.getState()) ?? {}
12341235

12351236
const parsedUserContent = await processUserContentMentions({
@@ -1241,6 +1242,7 @@ export class Task extends EventEmitter<ClineEvents> {
12411242
showRooIgnoredFiles,
12421243
includeDiagnosticMessages,
12431244
maxDiagnosticMessages,
1245+
maxReadFileLine,
12441246
})
12451247

12461248
const environmentDetails = await getEnvironmentDetails(this, includeFileDetails)
Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
// npx vitest run integrations/misc/__tests__/extract-text-large-files.spec.ts
2+
3+
import { describe, it, expect, vi, beforeEach, Mock } from "vitest"
4+
import * as fs from "fs/promises"
5+
import { extractTextFromFile } from "../extract-text"
6+
import { countFileLines } from "../line-counter"
7+
import { readLines } from "../read-lines"
8+
import { isBinaryFile } from "isbinaryfile"
9+
10+
// Mock all dependencies
11+
vi.mock("fs/promises")
12+
vi.mock("../line-counter")
13+
vi.mock("../read-lines")
14+
vi.mock("isbinaryfile")
15+
16+
describe("extractTextFromFile - Large File Handling", () => {
17+
// Type the mocks
18+
const mockedFs = vi.mocked(fs)
19+
const mockedCountFileLines = vi.mocked(countFileLines)
20+
const mockedReadLines = vi.mocked(readLines)
21+
const mockedIsBinaryFile = vi.mocked(isBinaryFile)
22+
23+
beforeEach(() => {
24+
vi.clearAllMocks()
25+
// Set default mock behavior
26+
mockedFs.access.mockResolvedValue(undefined)
27+
mockedIsBinaryFile.mockResolvedValue(false)
28+
})
29+
30+
it("should truncate files that exceed maxReadFileLine limit", async () => {
31+
const largeFileContent = Array(150)
32+
.fill(null)
33+
.map((_, i) => `Line ${i + 1}: This is a test line with some content`)
34+
.join("\n")
35+
36+
mockedCountFileLines.mockResolvedValue(150)
37+
mockedReadLines.mockResolvedValue(
38+
Array(100)
39+
.fill(null)
40+
.map((_, i) => `Line ${i + 1}: This is a test line with some content`)
41+
.join("\n"),
42+
)
43+
44+
const result = await extractTextFromFile("/test/large-file.ts", 100)
45+
46+
// Should only include first 100 lines with line numbers
47+
expect(result).toContain(" 1 | Line 1: This is a test line with some content")
48+
expect(result).toContain("100 | Line 100: This is a test line with some content")
49+
expect(result).not.toContain("101 | Line 101: This is a test line with some content")
50+
51+
// Should include truncation message
52+
expect(result).toContain(
53+
"[File truncated: showing 100 of 150 total lines. The file is too large and may exhaust the context window if read in full.]",
54+
)
55+
})
56+
57+
it("should not truncate files within the maxReadFileLine limit", async () => {
58+
const smallFileContent = Array(50)
59+
.fill(null)
60+
.map((_, i) => `Line ${i + 1}: This is a test line`)
61+
.join("\n")
62+
63+
mockedCountFileLines.mockResolvedValue(50)
64+
mockedFs.readFile.mockResolvedValue(smallFileContent as any)
65+
66+
const result = await extractTextFromFile("/test/small-file.ts", 100)
67+
68+
// Should include all lines with line numbers
69+
expect(result).toContain(" 1 | Line 1: This is a test line")
70+
expect(result).toContain("50 | Line 50: This is a test line")
71+
72+
// Should not include truncation message
73+
expect(result).not.toContain("[File truncated:")
74+
})
75+
76+
it("should handle files with exactly maxReadFileLine lines", async () => {
77+
const exactFileContent = Array(100)
78+
.fill(null)
79+
.map((_, i) => `Line ${i + 1}`)
80+
.join("\n")
81+
82+
mockedCountFileLines.mockResolvedValue(100)
83+
mockedFs.readFile.mockResolvedValue(exactFileContent as any)
84+
85+
const result = await extractTextFromFile("/test/exact-file.ts", 100)
86+
87+
// Should include all lines with line numbers
88+
expect(result).toContain(" 1 | Line 1")
89+
expect(result).toContain("100 | Line 100")
90+
91+
// Should not include truncation message
92+
expect(result).not.toContain("[File truncated:")
93+
})
94+
95+
it("should handle undefined maxReadFileLine by not truncating", async () => {
96+
const largeFileContent = Array(200)
97+
.fill(null)
98+
.map((_, i) => `Line ${i + 1}`)
99+
.join("\n")
100+
101+
mockedFs.readFile.mockResolvedValue(largeFileContent as any)
102+
103+
const result = await extractTextFromFile("/test/large-file.ts", undefined)
104+
105+
// Should include all lines with line numbers when maxReadFileLine is undefined
106+
expect(result).toContain(" 1 | Line 1")
107+
expect(result).toContain("200 | Line 200")
108+
109+
// Should not include truncation message
110+
expect(result).not.toContain("[File truncated:")
111+
})
112+
113+
it("should handle empty files", async () => {
114+
mockedFs.readFile.mockResolvedValue("" as any)
115+
116+
const result = await extractTextFromFile("/test/empty-file.ts", 100)
117+
118+
expect(result).toBe("")
119+
expect(result).not.toContain("[File truncated:")
120+
})
121+
122+
it("should handle files with only newlines", async () => {
123+
const newlineOnlyContent = "\n\n\n\n\n"
124+
125+
mockedCountFileLines.mockResolvedValue(6) // 5 newlines = 6 lines
126+
mockedReadLines.mockResolvedValue("\n\n")
127+
128+
const result = await extractTextFromFile("/test/newline-file.ts", 3)
129+
130+
// Should truncate at line 3
131+
expect(result).toContain("[File truncated: showing 3 of 6 total lines")
132+
})
133+
134+
it("should handle very large files efficiently", async () => {
135+
// Simulate a 10,000 line file
136+
mockedCountFileLines.mockResolvedValue(10000)
137+
mockedReadLines.mockResolvedValue(
138+
Array(500)
139+
.fill(null)
140+
.map((_, i) => `Line ${i + 1}: Some content here`)
141+
.join("\n"),
142+
)
143+
144+
const result = await extractTextFromFile("/test/very-large-file.ts", 500)
145+
146+
// Should only include first 500 lines with line numbers
147+
expect(result).toContain(" 1 | Line 1: Some content here")
148+
expect(result).toContain("500 | Line 500: Some content here")
149+
expect(result).not.toContain("501 | Line 501: Some content here")
150+
151+
// Should show truncation message
152+
expect(result).toContain("[File truncated: showing 500 of 10000 total lines")
153+
})
154+
155+
it("should handle maxReadFileLine of 0 by not truncating", async () => {
156+
const fileContent = "Line 1\nLine 2\nLine 3"
157+
158+
mockedFs.readFile.mockResolvedValue(fileContent as any)
159+
160+
const result = await extractTextFromFile("/test/file.ts", 0)
161+
162+
// maxReadFileLine of 0 or negative means no limit
163+
expect(result).toContain("1 | Line 1")
164+
expect(result).toContain("2 | Line 2")
165+
expect(result).toContain("3 | Line 3")
166+
expect(result).not.toContain("[File truncated:")
167+
})
168+
169+
it("should handle negative maxReadFileLine by treating as undefined", async () => {
170+
const fileContent = "Line 1\nLine 2\nLine 3"
171+
172+
mockedFs.readFile.mockResolvedValue(fileContent as any)
173+
174+
const result = await extractTextFromFile("/test/file.ts", -1)
175+
176+
// Should include all content with line numbers when negative
177+
expect(result).toContain("1 | Line 1")
178+
expect(result).toContain("2 | Line 2")
179+
expect(result).toContain("3 | Line 3")
180+
expect(result).not.toContain("[File truncated:")
181+
})
182+
183+
it("should preserve file content structure when truncating", async () => {
184+
const structuredContent = [
185+
"function example() {",
186+
" const x = 1;",
187+
" const y = 2;",
188+
" return x + y;",
189+
"}",
190+
"",
191+
"// More code below",
192+
].join("\n")
193+
194+
mockedCountFileLines.mockResolvedValue(7)
195+
mockedReadLines.mockResolvedValue(["function example() {", " const x = 1;", " const y = 2;"].join("\n"))
196+
197+
const result = await extractTextFromFile("/test/structured.ts", 3)
198+
199+
// Should preserve the first 3 lines with line numbers
200+
expect(result).toContain("1 | function example() {")
201+
expect(result).toContain("2 | const x = 1;")
202+
expect(result).toContain("3 | const y = 2;")
203+
expect(result).not.toContain("4 | return x + y;")
204+
205+
// Should include truncation info
206+
expect(result).toContain("[File truncated: showing 3 of 7 total lines")
207+
})
208+
209+
it("should handle binary files by throwing an error", async () => {
210+
mockedIsBinaryFile.mockResolvedValue(true)
211+
212+
await expect(extractTextFromFile("/test/binary.bin", 100)).rejects.toThrow(
213+
"Cannot read text for file type: .bin",
214+
)
215+
})
216+
217+
it("should handle file not found errors", async () => {
218+
mockedFs.access.mockRejectedValue(new Error("ENOENT"))
219+
220+
await expect(extractTextFromFile("/test/nonexistent.ts", 100)).rejects.toThrow(
221+
"File not found: /test/nonexistent.ts",
222+
)
223+
})
224+
})

src/integrations/misc/extract-text.ts

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import mammoth from "mammoth"
55
import fs from "fs/promises"
66
import { isBinaryFile } from "isbinaryfile"
77
import { extractTextFromXLSX } from "./extract-text-from-xlsx"
8+
import { countFileLines } from "./line-counter"
9+
import { readLines } from "./read-lines"
810

911
async function extractTextFromPDF(filePath: string): Promise<string> {
1012
const dataBuffer = await fs.readFile(filePath)
@@ -48,7 +50,7 @@ export function getSupportedBinaryFormats(): string[] {
4850
return Object.keys(SUPPORTED_BINARY_FORMATS)
4951
}
5052

51-
export async function extractTextFromFile(filePath: string): Promise<string> {
53+
export async function extractTextFromFile(filePath: string, maxReadFileLine?: number): Promise<string> {
5254
try {
5355
await fs.access(filePath)
5456
} catch (error) {
@@ -67,6 +69,20 @@ export async function extractTextFromFile(filePath: string): Promise<string> {
6769
const isBinary = await isBinaryFile(filePath).catch(() => false)
6870

6971
if (!isBinary) {
72+
// Check if we need to apply line limit
73+
if (maxReadFileLine && maxReadFileLine > 0) {
74+
const totalLines = await countFileLines(filePath)
75+
if (totalLines > maxReadFileLine) {
76+
// Read only up to maxReadFileLine
77+
const content = await readLines(filePath, maxReadFileLine - 1, 0)
78+
const numberedContent = addLineNumbers(content)
79+
return (
80+
numberedContent +
81+
`\n\n[File truncated: showing ${maxReadFileLine} of ${totalLines} total lines. The file is too large and may exhaust the context window if read in full.]`
82+
)
83+
}
84+
}
85+
// Read the entire file if no limit or file is within limit
7086
return addLineNumbers(await fs.readFile(filePath, "utf8"))
7187
} else {
7288
throw new Error(`Cannot read text for file type: ${fileExtension}`)

0 commit comments

Comments
 (0)