RooCodeInc
diff --git a/‎packages/types/src/global-settings.ts‎
Lines changed: 2 additions & 0 deletions b/‎packages/types/src/global-settings.ts‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/core/mentions/index.ts‎
Lines changed: 9 additions & 2 deletions b/‎src/core/mentions/index.ts‎
Lines changed: 9 additions & 2 deletions
diff --git a/‎src/core/tools/readFileTool.ts‎
Lines changed: 7 additions & 4 deletions b/‎src/core/tools/readFileTool.ts‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎src/core/webview/ClineProvider.ts‎
Lines changed: 3 additions & 0 deletions b/‎src/core/webview/ClineProvider.ts‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/core/webview/webviewMessageHandler.ts‎
Lines changed: 4 additions & 0 deletions b/‎src/core/webview/webviewMessageHandler.ts‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/integrations/misc/__tests__/extract-text-token-based.spec.ts‎
Lines changed: 190 additions & 0 deletions b/‎src/integrations/misc/__tests__/extract-text-token-based.spec.ts‎
Lines changed: 190 additions & 0 deletions
@@ -101,6 +101,7 @@ export const globalSettingsSchema = z.object({
 	maxWorkspaceFiles: z.number().optional(),
 	showRooIgnoredFiles: z.boolean().optional(),
 	maxReadFileLine: z.number().optional(),
+	maxReadFileTokens: z.number().optional(),
 
 	terminalOutputLineLimit: z.number().optional(),
 	terminalOutputCharacterLimit: z.number().optional(),
@@ -273,6 +274,7 @@ export const EVALS_SETTINGS: RooCodeSettings = {
 	maxWorkspaceFiles: 200,
 	showRooIgnoredFiles: true,
 	maxReadFileLine: -1, // -1 to enable full file reading.
+	maxReadFileTokens: -1, // -1 to enable full file reading.
 
 	includeDiagnosticMessages: true,
 	maxDiagnosticMessages: 50,
 
@@ -84,6 +84,7 @@ export async function parseMentions(
 	includeDiagnosticMessages: boolean = true,
 	maxDiagnosticMessages: number = 50,
 	maxReadFileLine?: number,
+	maxReadFileTokens?: number,
 ): Promise<string> {
 	const mentions: Set<string> = new Set()
 	const commandMentions: Set<string> = new Set()
@@ -166,6 +167,7 @@ export async function parseMentions(
 					rooIgnoreController,
 					showRooIgnoredFiles,
 					maxReadFileLine,
+					maxReadFileTokens,
 				)
 				if (mention.endsWith("/")) {
 					parsedText += `\n\n<folder_content path="${mentionPath}">\n${content}\n</folder_content>`
@@ -244,6 +246,7 @@ async function getFileOrFolderContent(
 	rooIgnoreController?: any,
 	showRooIgnoredFiles: boolean = true,
 	maxReadFileLine?: number,
+	maxReadFileTokens?: number,
 ): Promise<string> {
 	const unescapedPath = unescapeSpaces(mentionPath)
 	const absPath = path.resolve(cwd, unescapedPath)
@@ -256,7 +259,7 @@ async function getFileOrFolderContent(
 				return `(File ${mentionPath} is ignored by .rooignore)`
 			}
 			try {
-				const content = await extractTextFromFile(absPath, maxReadFileLine)
+				const content = await extractTextFromFile(absPath, maxReadFileLine, maxReadFileTokens)
 				return content
 			} catch (error) {
 				return `(Failed to read contents of ${mentionPath}): ${error.message}`
@@ -296,7 +299,11 @@ async function getFileOrFolderContent(
 									if (isBinary) {
 										return undefined
 									}
-									const content = await extractTextFromFile(absoluteFilePath, maxReadFileLine)
+									const content = await extractTextFromFile(
+										absoluteFilePath,
+										maxReadFileLine,
+										maxReadFileTokens,
+									)
 									return `<file_content path="${filePath.toPosix()}">\n${content}\n</file_content>`
 								} catch (error) {
 									return undefined
 
@@ -253,7 +253,8 @@ export async function readFileTool(
 
 		// Handle batch approval if there are multiple files to approve
 		if (filesToApprove.length > 1) {
-			const { maxReadFileLine = -1 } = (await cline.providerRef.deref()?.getState()) ?? {}
+			const { maxReadFileLine = -1, maxReadFileTokens = 10000 } =
+				(await cline.providerRef.deref()?.getState()) ?? {}
 
 			// Prepare batch file data
 			const batchFiles = filesToApprove.map((fileResult) => {
@@ -368,7 +369,8 @@ export async function readFileTool(
 			const relPath = fileResult.path
 			const fullPath = path.resolve(cline.cwd, relPath)
 			const isOutsideWorkspace = isPathOutsideWorkspace(fullPath)
-			const { maxReadFileLine = -1 } = (await cline.providerRef.deref()?.getState()) ?? {}
+			const { maxReadFileLine = -1, maxReadFileTokens = 10000 } =
+				(await cline.providerRef.deref()?.getState()) ?? {}
 
 			// Create line snippet for approval message
 			let lineSnippet = ""
@@ -429,7 +431,8 @@ export async function readFileTool(
 
 			const relPath = fileResult.path
 			const fullPath = path.resolve(cline.cwd, relPath)
-			const { maxReadFileLine = -1 } = (await cline.providerRef.deref()?.getState()) ?? {}
+			const { maxReadFileLine = -1, maxReadFileTokens = 10000 } =
+				(await cline.providerRef.deref()?.getState()) ?? {}
 
 			// Process approved files
 			try {
@@ -517,7 +520,7 @@ export async function readFileTool(
 				}
 
 				// Handle normal file read
-				const content = await extractTextFromFile(fullPath)
+				const content = await extractTextFromFile(fullPath, maxReadFileTokens)
 				const lineRangeAttr = ` lines="1-${totalLines}"`
 				let xmlInfo = totalLines > 0 ? `<content${lineRangeAttr}>\n${content}</content>\n` : `<content/>`
 
 
@@ -1425,6 +1425,7 @@ export class ClineProvider
 			showRooIgnoredFiles,
 			language,
 			maxReadFileLine,
+			maxReadFileTokens,
 			terminalCompressProgressBar,
 			historyPreviewCollapsed,
 			cloudUserInfo,
@@ -1532,6 +1533,7 @@ export class ClineProvider
 			language: language ?? formatLanguage(vscode.env.language),
 			renderContext: this.renderContext,
 			maxReadFileLine: maxReadFileLine ?? -1,
+			maxReadFileTokens: maxReadFileTokens ?? 10000,
 			maxConcurrentFileReads: maxConcurrentFileReads ?? 5,
 			settingsImportedAt: this.settingsImportedAt,
 			terminalCompressProgressBar: terminalCompressProgressBar ?? true,
@@ -1702,6 +1704,7 @@ export class ClineProvider
 			telemetrySetting: stateValues.telemetrySetting || "unset",
 			showRooIgnoredFiles: stateValues.showRooIgnoredFiles ?? true,
 			maxReadFileLine: stateValues.maxReadFileLine ?? -1,
+			maxReadFileTokens: stateValues.maxReadFileTokens ?? 10000,
 			maxConcurrentFileReads: stateValues.maxConcurrentFileReads ?? 5,
 			historyPreviewCollapsed: stateValues.historyPreviewCollapsed ?? false,
 			cloudUserInfo,
 
@@ -1265,6 +1265,10 @@ export const webviewMessageHandler = async (
 			await updateGlobalState("maxReadFileLine", message.value)
 			await provider.postStateToWebview()
 			break
+		case "maxReadFileTokens":
+			await updateGlobalState("maxReadFileTokens", message.value)
+			await provider.postStateToWebview()
+			break
 		case "maxConcurrentFileReads":
 			const valueToSave = message.value // Capture the value intended for saving
 			await updateGlobalState("maxConcurrentFileReads", valueToSave)
 
@@ -0,0 +1,190 @@
+// npx vitest run integrations/misc/__tests__/extract-text-token-based.spec.ts
+
+import { describe, it, expect, vi, beforeEach, Mock } from "vitest"
+import * as fs from "fs/promises"
+import { Anthropic } from "@anthropic-ai/sdk"
+import { extractTextFromFile } from "../extract-text"
+import { countFileLines } from "../line-counter"
+import { readLines } from "../read-lines"
+import { isBinaryFile } from "isbinaryfile"
+import { countTokens } from "../../../utils/countTokens"
+
+// Mock all dependencies
+vi.mock("fs/promises")
+vi.mock("../line-counter")
+vi.mock("../read-lines")
+vi.mock("isbinaryfile")
+vi.mock("../../../utils/countTokens")
+
+describe("extractTextFromFile - Token-based Truncation", () => {
+	// Type the mocks
+	const mockedFs = vi.mocked(fs)
+	const mockedCountFileLines = vi.mocked(countFileLines)
+	const mockedReadLines = vi.mocked(readLines)
+	const mockedIsBinaryFile = vi.mocked(isBinaryFile)
+	const mockedCountTokens = vi.mocked(countTokens)
+
+	beforeEach(() => {
+		vi.clearAllMocks()
+		// Set default mock behavior
+		mockedFs.access.mockResolvedValue(undefined)
+		mockedIsBinaryFile.mockResolvedValue(false)
+
+		// Mock countTokens to return a predictable token count
+		mockedCountTokens.mockImplementation(async (content: Anthropic.Messages.ContentBlockParam[]) => {
+			// Simulate token counting based on text content
+			const text = content
+				.filter((block) => block.type === "text")
+				.map((block) => (block as Anthropic.Messages.TextBlockParam).text)
+				.join("")
+			const words = text.split(/\s+/).length
+			return Math.floor(words * 1.5)
+		})
+	})
+
+	it("should truncate files based on token count when maxReadFileTokens is provided", async () => {
+		const fileContent = Array(100)
+			.fill(null)
+			.map((_, i) => `Line ${i + 1}: This is a test line with some content that has multiple words`)
+			.join("\n")
+
+		mockedFs.readFile.mockResolvedValue(fileContent as any)
+
+		// Mock token counting to exceed limit after 50 lines
+		let tokenCount = 0
+		mockedCountTokens.mockImplementation(async (content: Anthropic.Messages.ContentBlockParam[]) => {
+			const text = content
+				.filter((block) => block.type === "text")
+				.map((block) => (block as Anthropic.Messages.TextBlockParam).text)
+				.join("")
+			const lines = text.split("\n").length
+			// Each line has ~15 tokens, so 50 lines = 750 tokens
+			tokenCount = lines * 15
+			return tokenCount
+		})
+
+		const result = await extractTextFromFile("/test/large-file.ts", -1, 750)
+
+		// Should truncate based on tokens, not lines
+		expect(result).toContain("1 | Line 1:")
+		expect(result).toContain("[File truncated")
+		expect(result).toMatch(/\d+ of ~?\d+ tokens/)
+	})
+
+	it("should not truncate when token count is within limit", async () => {
+		const fileContent = Array(10)
+			.fill(null)
+			.map((_, i) => `Line ${i + 1}: Short content`)
+			.join("\n")
+
+		mockedFs.readFile.mockResolvedValue(fileContent as any)
+
+		// Mock token counting to stay under limit
+		mockedCountTokens.mockResolvedValue(100) // Well under 10000 default
+
+		const result = await extractTextFromFile("/test/small-file.ts", -1, 10000)
+
+		// Should include all content
+		expect(result).toContain(" 1 | Line 1: Short content")
+		expect(result).toContain("10 | Line 10: Short content")
+		expect(result).not.toContain("[File truncated")
+	})
+
+	it("should prioritize token-based truncation over line-based when both limits are set", async () => {
+		const fileContent = Array(200)
+			.fill(null)
+			.map((_, i) => `Line ${i + 1}: This line has many words to increase token count significantly`)
+			.join("\n")
+
+		mockedCountFileLines.mockResolvedValue(200)
+		mockedFs.readFile.mockResolvedValue(fileContent as any)
+
+		// Mock to exceed token limit before line limit
+		let callCount = 0
+		mockedCountTokens.mockImplementation(async (content: Anthropic.Messages.ContentBlockParam[]) => {
+			callCount++
+			const text = content
+				.filter((block) => block.type === "text")
+				.map((block) => (block as Anthropic.Messages.TextBlockParam).text)
+				.join("")
+			const lines = text.split("\n").length
+			// Make it exceed token limit at ~30 lines (30 * 20 = 600 tokens)
+			return lines * 20
+		})
+
+		// maxReadFileLine=100, maxReadFileTokens=500
+		const result = await extractTextFromFile("/test/file.ts", 100, 500)
+
+		// Should truncate based on tokens (500), not lines (100)
+		expect(result).toContain("[File truncated")
+		expect(result).toMatch(/\d+ of ~?\d+ tokens/)
+
+		// Should have stopped before reaching line limit
+		const resultLines = result.split("\n").filter((line) => line.match(/^\s*\d+\s*\|/))
+		expect(resultLines.length).toBeLessThan(100)
+	})
+
+	it("should handle maxReadFileTokens of 0 by throwing an error", async () => {
+		await expect(extractTextFromFile("/test/file.ts", -1, 0)).rejects.toThrow(
+			"Invalid maxReadFileTokens: 0. Must be a positive integer or -1 for unlimited.",
+		)
+	})
+
+	it("should handle negative maxReadFileTokens by throwing an error", async () => {
+		await expect(extractTextFromFile("/test/file.ts", -1, -100)).rejects.toThrow(
+			"Invalid maxReadFileTokens: -100. Must be a positive integer or -1 for unlimited.",
+		)
+	})
+
+	it("should work with both line and token limits disabled", async () => {
+		const fileContent = "Line 1\nLine 2\nLine 3"
+		mockedFs.readFile.mockResolvedValue(fileContent as any)
+
+		const result = await extractTextFromFile("/test/file.ts", -1, undefined)
+
+		// Should include all content
+		expect(result).toContain("1 | Line 1")
+		expect(result).toContain("2 | Line 2")
+		expect(result).toContain("3 | Line 3")
+		expect(result).not.toContain("[File truncated")
+	})
+
+	it("should handle empty files with token-based truncation", async () => {
+		mockedFs.readFile.mockResolvedValue("" as any)
+		mockedCountTokens.mockResolvedValue(0)
+
+		const result = await extractTextFromFile("/test/empty.ts", -1, 1000)
+
+		expect(result).toBe("")
+	})
+
+	it("should efficiently handle very large token counts", async () => {
+		// Simulate a file that would have millions of tokens
+		const hugeContent = Array(10000)
+			.fill(null)
+			.map((_, i) => `Line ${i + 1}: ${Array(100).fill("word").join(" ")}`)
+			.join("\n")
+
+		mockedFs.readFile.mockResolvedValue(hugeContent as any)
+
+		// Mock progressive token counting
+		mockedCountTokens.mockImplementation(async (content: Anthropic.Messages.ContentBlockParam[]) => {
+			const text = content
+				.filter((block) => block.type === "text")
+				.map((block) => (block as Anthropic.Messages.TextBlockParam).text)
+				.join("")
+			const lines = text.split("\n").length
+			return lines * 150 // Each line has ~150 tokens
+		})
+
+		const result = await extractTextFromFile("/test/huge.ts", -1, 5000)
+
+		// Should truncate early based on tokens
+		expect(result).toContain("[File truncated")
+		expect(result).toMatch(/\d+ of ~?\d+ tokens/)
+
+		// Should have stopped processing early
+		const resultLines = result.split("\n").filter((line) => line.match(/^\s*\d+\s*\|/))
+		expect(resultLines.length).toBeLessThan(50) // Should stop around 33 lines (5000/150)
+	})
+})