fix: respect maxReadFileLine setting for file mentions to prevent context exhaustion

roomote · daniel-lxs · commit 83df51a567ad · 2025-07-25T12:14:34.000-05:00
- Modified extractTextFromFile to accept and respect maxReadFileLine parameter - Updated parseMentions and related functions to pass maxReadFileLine through the call chain - Modified Task.ts to retrieve maxReadFileLine from state and pass to processUserContentMentions - Added comprehensive tests for large file handling - Files are now truncated with informative messages when they exceed the line limit Fixes #6069
diff --git a/src/core/mentions/index.ts b/src/core/mentions/index.ts
@@ -82,6 +82,7 @@ export async function parseMentions(
 	showRooIgnoredFiles: boolean = true,
 	includeDiagnosticMessages: boolean = true,
 	maxDiagnosticMessages: number = 50,
+	maxReadFileLine?: number,
 ): Promise<string> {
 	const mentions: Set<string> = new Set()
 	let parsedText = text.replace(mentionRegexGlobal, (match, mention) => {
@@ -149,7 +150,13 @@ export async function parseMentions(
 		} else if (mention.startsWith("/")) {
 			const mentionPath = mention.slice(1)
 			try {
-				const content = await getFileOrFolderContent(mentionPath, cwd, rooIgnoreController, showRooIgnoredFiles)
+				const content = await getFileOrFolderContent(
+					mentionPath,
+					cwd,
+					rooIgnoreController,
+					showRooIgnoredFiles,
+					maxReadFileLine,
+				)
 				if (mention.endsWith("/")) {
 					parsedText += `\n\n<folder_content path="${mentionPath}">\n${content}\n</folder_content>`
 				} else {
@@ -212,6 +219,7 @@ async function getFileOrFolderContent(
 	cwd: string,
 	rooIgnoreController?: any,
 	showRooIgnoredFiles: boolean = true,
+	maxReadFileLine?: number,
 ): Promise<string> {
 	const unescapedPath = unescapeSpaces(mentionPath)
 	const absPath = path.resolve(cwd, unescapedPath)
@@ -224,7 +232,7 @@ async function getFileOrFolderContent(
 				return `(File ${mentionPath} is ignored by .rooignore)`
 			}
 			try {
-				const content = await extractTextFromFile(absPath)
+				const content = await extractTextFromFile(absPath, maxReadFileLine)
 				return content
 			} catch (error) {
 				return `(Failed to read contents of ${mentionPath}): ${error.message}`
@@ -264,7 +272,7 @@ async function getFileOrFolderContent(
 									if (isBinary) {
 										return undefined
 									}
-									const content = await extractTextFromFile(absoluteFilePath)
+									const content = await extractTextFromFile(absoluteFilePath, maxReadFileLine)
 									return `<file_content path="${filePath.toPosix()}">\n${content}\n</file_content>`
 								} catch (error) {
 									return undefined
diff --git a/src/core/mentions/processUserContentMentions.ts b/src/core/mentions/processUserContentMentions.ts
@@ -15,6 +15,7 @@ export async function processUserContentMentions({
 	showRooIgnoredFiles = true,
 	includeDiagnosticMessages = true,
 	maxDiagnosticMessages = 50,
+	maxReadFileLine,
 }: {
 	userContent: Anthropic.Messages.ContentBlockParam[]
 	cwd: string
@@ -24,6 +25,7 @@ export async function processUserContentMentions({
 	showRooIgnoredFiles?: boolean
 	includeDiagnosticMessages?: boolean
 	maxDiagnosticMessages?: number
+	maxReadFileLine?: number
 }) {
 	// Process userContent array, which contains various block types:
 	// TextBlockParam, ImageBlockParam, ToolUseBlockParam, and ToolResultBlockParam.
@@ -52,6 +54,7 @@ export async function processUserContentMentions({
 							showRooIgnoredFiles,
 							includeDiagnosticMessages,
 							maxDiagnosticMessages,
+							maxReadFileLine,
 						),
 					}
 				}
@@ -71,6 +74,7 @@ export async function processUserContentMentions({
 								showRooIgnoredFiles,
 								includeDiagnosticMessages,
 								maxDiagnosticMessages,
+								maxReadFileLine,
 							),
 						}
 					}
@@ -91,6 +95,7 @@ export async function processUserContentMentions({
 										showRooIgnoredFiles,
 										includeDiagnosticMessages,
 										maxDiagnosticMessages,
+										maxReadFileLine,
 									),
 								}
 							}
diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts
@@ -1230,6 +1230,7 @@ export class Task extends EventEmitter<ClineEvents> {
 			showRooIgnoredFiles = true,
 			includeDiagnosticMessages = true,
 			maxDiagnosticMessages = 50,
+			maxReadFileLine = -1,
 		} = (await this.providerRef.deref()?.getState()) ?? {}
 
 		const parsedUserContent = await processUserContentMentions({
@@ -1241,6 +1242,7 @@ export class Task extends EventEmitter<ClineEvents> {
 			showRooIgnoredFiles,
 			includeDiagnosticMessages,
 			maxDiagnosticMessages,
+			maxReadFileLine,
 		})
 
 		const environmentDetails = await getEnvironmentDetails(this, includeFileDetails)
diff --git a/src/integrations/misc/__tests__/extract-text-large-files.spec.ts b/src/integrations/misc/__tests__/extract-text-large-files.spec.ts
@@ -0,0 +1,224 @@
+// npx vitest run integrations/misc/__tests__/extract-text-large-files.spec.ts
+
+import { describe, it, expect, vi, beforeEach, Mock } from "vitest"
+import * as fs from "fs/promises"
+import { extractTextFromFile } from "../extract-text"
+import { countFileLines } from "../line-counter"
+import { readLines } from "../read-lines"
+import { isBinaryFile } from "isbinaryfile"
+
+// Mock all dependencies
+vi.mock("fs/promises")
+vi.mock("../line-counter")
+vi.mock("../read-lines")
+vi.mock("isbinaryfile")
+
+describe("extractTextFromFile - Large File Handling", () => {
+	// Type the mocks
+	const mockedFs = vi.mocked(fs)
+	const mockedCountFileLines = vi.mocked(countFileLines)
+	const mockedReadLines = vi.mocked(readLines)
+	const mockedIsBinaryFile = vi.mocked(isBinaryFile)
+
+	beforeEach(() => {
+		vi.clearAllMocks()
+		// Set default mock behavior
+		mockedFs.access.mockResolvedValue(undefined)
+		mockedIsBinaryFile.mockResolvedValue(false)
+	})
+
+	it("should truncate files that exceed maxReadFileLine limit", async () => {
+		const largeFileContent = Array(150)
+			.fill(null)
+			.map((_, i) => `Line ${i + 1}: This is a test line with some content`)
+			.join("\n")
+
+		mockedCountFileLines.mockResolvedValue(150)
+		mockedReadLines.mockResolvedValue(
+			Array(100)
+				.fill(null)
+				.map((_, i) => `Line ${i + 1}: This is a test line with some content`)
+				.join("\n"),
+		)
+
+		const result = await extractTextFromFile("/test/large-file.ts", 100)
+
+		// Should only include first 100 lines with line numbers
+		expect(result).toContain("  1 | Line 1: This is a test line with some content")
+		expect(result).toContain("100 | Line 100: This is a test line with some content")
+		expect(result).not.toContain("101 | Line 101: This is a test line with some content")
+
+		// Should include truncation message
+		expect(result).toContain(
+			"[File truncated: showing 100 of 150 total lines. The file is too large and may exhaust the context window if read in full.]",
+		)
+	})
+
+	it("should not truncate files within the maxReadFileLine limit", async () => {
+		const smallFileContent = Array(50)
+			.fill(null)
+			.map((_, i) => `Line ${i + 1}: This is a test line`)
+			.join("\n")
+
+		mockedCountFileLines.mockResolvedValue(50)
+		mockedFs.readFile.mockResolvedValue(smallFileContent as any)
+
+		const result = await extractTextFromFile("/test/small-file.ts", 100)
+
+		// Should include all lines with line numbers
+		expect(result).toContain(" 1 | Line 1: This is a test line")
+		expect(result).toContain("50 | Line 50: This is a test line")
+
+		// Should not include truncation message
+		expect(result).not.toContain("[File truncated:")
+	})
+
+	it("should handle files with exactly maxReadFileLine lines", async () => {
+		const exactFileContent = Array(100)
+			.fill(null)
+			.map((_, i) => `Line ${i + 1}`)
+			.join("\n")
+
+		mockedCountFileLines.mockResolvedValue(100)
+		mockedFs.readFile.mockResolvedValue(exactFileContent as any)
+
+		const result = await extractTextFromFile("/test/exact-file.ts", 100)
+
+		// Should include all lines with line numbers
+		expect(result).toContain("  1 | Line 1")
+		expect(result).toContain("100 | Line 100")
+
+		// Should not include truncation message
+		expect(result).not.toContain("[File truncated:")
+	})
+
+	it("should handle undefined maxReadFileLine by not truncating", async () => {
+		const largeFileContent = Array(200)
+			.fill(null)
+			.map((_, i) => `Line ${i + 1}`)
+			.join("\n")
+
+		mockedFs.readFile.mockResolvedValue(largeFileContent as any)
+
+		const result = await extractTextFromFile("/test/large-file.ts", undefined)
+
+		// Should include all lines with line numbers when maxReadFileLine is undefined
+		expect(result).toContain("  1 | Line 1")
+		expect(result).toContain("200 | Line 200")
+
+		// Should not include truncation message
+		expect(result).not.toContain("[File truncated:")
+	})
+
+	it("should handle empty files", async () => {
+		mockedFs.readFile.mockResolvedValue("" as any)
+
+		const result = await extractTextFromFile("/test/empty-file.ts", 100)
+
+		expect(result).toBe("")
+		expect(result).not.toContain("[File truncated:")
+	})
+
+	it("should handle files with only newlines", async () => {
+		const newlineOnlyContent = "\n\n\n\n\n"
+
+		mockedCountFileLines.mockResolvedValue(6) // 5 newlines = 6 lines
+		mockedReadLines.mockResolvedValue("\n\n")
+
+		const result = await extractTextFromFile("/test/newline-file.ts", 3)
+
+		// Should truncate at line 3
+		expect(result).toContain("[File truncated: showing 3 of 6 total lines")
+	})
+
+	it("should handle very large files efficiently", async () => {
+		// Simulate a 10,000 line file
+		mockedCountFileLines.mockResolvedValue(10000)
+		mockedReadLines.mockResolvedValue(
+			Array(500)
+				.fill(null)
+				.map((_, i) => `Line ${i + 1}: Some content here`)
+				.join("\n"),
+		)
+
+		const result = await extractTextFromFile("/test/very-large-file.ts", 500)
+
+		// Should only include first 500 lines with line numbers
+		expect(result).toContain("  1 | Line 1: Some content here")
+		expect(result).toContain("500 | Line 500: Some content here")
+		expect(result).not.toContain("501 | Line 501: Some content here")
+
+		// Should show truncation message
+		expect(result).toContain("[File truncated: showing 500 of 10000 total lines")
+	})
+
+	it("should handle maxReadFileLine of 0 by not truncating", async () => {
+		const fileContent = "Line 1\nLine 2\nLine 3"
+
+		mockedFs.readFile.mockResolvedValue(fileContent as any)
+
+		const result = await extractTextFromFile("/test/file.ts", 0)
+
+		// maxReadFileLine of 0 or negative means no limit
+		expect(result).toContain("1 | Line 1")
+		expect(result).toContain("2 | Line 2")
+		expect(result).toContain("3 | Line 3")
+		expect(result).not.toContain("[File truncated:")
+	})
+
+	it("should handle negative maxReadFileLine by treating as undefined", async () => {
+		const fileContent = "Line 1\nLine 2\nLine 3"
+
+		mockedFs.readFile.mockResolvedValue(fileContent as any)
+
+		const result = await extractTextFromFile("/test/file.ts", -1)
+
+		// Should include all content with line numbers when negative
+		expect(result).toContain("1 | Line 1")
+		expect(result).toContain("2 | Line 2")
+		expect(result).toContain("3 | Line 3")
+		expect(result).not.toContain("[File truncated:")
+	})
+
+	it("should preserve file content structure when truncating", async () => {
+		const structuredContent = [
+			"function example() {",
+			"  const x = 1;",
+			"  const y = 2;",
+			"  return x + y;",
+			"}",
+			"",
+			"// More code below",
+		].join("\n")
+
+		mockedCountFileLines.mockResolvedValue(7)
+		mockedReadLines.mockResolvedValue(["function example() {", "  const x = 1;", "  const y = 2;"].join("\n"))
+
+		const result = await extractTextFromFile("/test/structured.ts", 3)
+
+		// Should preserve the first 3 lines with line numbers
+		expect(result).toContain("1 | function example() {")
+		expect(result).toContain("2 |   const x = 1;")
+		expect(result).toContain("3 |   const y = 2;")
+		expect(result).not.toContain("4 |   return x + y;")
+
+		// Should include truncation info
+		expect(result).toContain("[File truncated: showing 3 of 7 total lines")
+	})
+
+	it("should handle binary files by throwing an error", async () => {
+		mockedIsBinaryFile.mockResolvedValue(true)
+
+		await expect(extractTextFromFile("/test/binary.bin", 100)).rejects.toThrow(
+			"Cannot read text for file type: .bin",
+		)
+	})
+
+	it("should handle file not found errors", async () => {
+		mockedFs.access.mockRejectedValue(new Error("ENOENT"))
+
+		await expect(extractTextFromFile("/test/nonexistent.ts", 100)).rejects.toThrow(
+			"File not found: /test/nonexistent.ts",
+		)
+	})
+})
diff --git a/src/integrations/misc/extract-text.ts b/src/integrations/misc/extract-text.ts
@@ -5,6 +5,8 @@ import mammoth from "mammoth"
 import fs from "fs/promises"
 import { isBinaryFile } from "isbinaryfile"
 import { extractTextFromXLSX } from "./extract-text-from-xlsx"
+import { countFileLines } from "./line-counter"
+import { readLines } from "./read-lines"
 
 async function extractTextFromPDF(filePath: string): Promise<string> {
 	const dataBuffer = await fs.readFile(filePath)
@@ -48,7 +50,7 @@ export function getSupportedBinaryFormats(): string[] {
 	return Object.keys(SUPPORTED_BINARY_FORMATS)
 }
 
-export async function extractTextFromFile(filePath: string): Promise<string> {
+export async function extractTextFromFile(filePath: string, maxReadFileLine?: number): Promise<string> {
 	try {
 		await fs.access(filePath)
 	} catch (error) {
@@ -67,6 +69,20 @@ export async function extractTextFromFile(filePath: string): Promise<string> {
 	const isBinary = await isBinaryFile(filePath).catch(() => false)
 
 	if (!isBinary) {
+		// Check if we need to apply line limit
+		if (maxReadFileLine && maxReadFileLine > 0) {
+			const totalLines = await countFileLines(filePath)
+			if (totalLines > maxReadFileLine) {
+				// Read only up to maxReadFileLine
+				const content = await readLines(filePath, maxReadFileLine - 1, 0)
+				const numberedContent = addLineNumbers(content)
+				return (
+					numberedContent +
+					`\n\n[File truncated: showing ${maxReadFileLine} of ${totalLines} total lines. The file is too large and may exhaust the context window if read in full.]`
+				)
+			}
+		}
+		// Read the entire file if no limit or file is within limit
 		return addLineNumbers(await fs.readFile(filePath, "utf8"))
 	} else {
 		throw new Error(`Cannot read text for file type: ${fileExtension}`)