diff --git a/src/core/tools/__tests__/readFileTool.test.ts b/src/core/tools/__tests__/readFileTool.test.ts
index acdef9e62d..3ed5cbe3f1 100644
--- a/src/core/tools/__tests__/readFileTool.test.ts
+++ b/src/core/tools/__tests__/readFileTool.test.ts
@@ -44,16 +44,14 @@ const addLineNumbersMock = jest.fn().mockImplementation((text, startLine = 1) =>
return lines.map((line, i) => `${startLine + i} | ${line}`).join("\n")
})
-const extractTextFromFileMock = jest.fn().mockImplementation((_filePath) => {
- // Call addLineNumbersMock to register the call
- addLineNumbersMock(mockInputContent)
- return Promise.resolve(addLineNumbersMock(mockInputContent))
-})
+const extractTextFromFileMock = jest.fn()
+const getSupportedBinaryFormatsMock = jest.fn(() => [".pdf", ".docx", ".ipynb"])
// Now assign the mocks to the module
const extractTextModule = jest.requireMock("../../../integrations/misc/extract-text")
extractTextModule.extractTextFromFile = extractTextFromFileMock
extractTextModule.addLineNumbers = addLineNumbersMock
+extractTextModule.getSupportedBinaryFormats = getSupportedBinaryFormatsMock
jest.mock("../../ignore/RooIgnoreController", () => ({
RooIgnoreController: class {
@@ -128,6 +126,9 @@ describe("read_file tool with maxReadFileLine setting", () => {
mockCline.say = jest.fn().mockResolvedValue(undefined)
mockCline.ask = jest.fn().mockResolvedValue({ response: "yesButtonClicked" })
mockCline.presentAssistantMessage = jest.fn()
+ mockCline.handleError = jest.fn().mockResolvedValue(undefined)
+ mockCline.pushToolResult = jest.fn()
+ mockCline.removeClosingTag = jest.fn((tag, content) => content)
mockCline.fileContextTracker = {
trackFileContext: jest.fn().mockResolvedValue(undefined),
@@ -410,6 +411,13 @@ describe("read_file tool XML output structure", () => {
mockedPathResolve.mockReturnValue(absoluteFilePath)
mockedIsBinaryFile.mockResolvedValue(false)
+ // Set default implementation for extractTextFromFile
+ mockedExtractTextFromFile.mockImplementation((filePath) => {
+ // Call addLineNumbersMock to register the call
+ addLineNumbersMock(mockInputContent)
+ return Promise.resolve(addLineNumbersMock(mockInputContent))
+ })
+
mockInputContent = fileContent
// Setup mock provider with default maxReadFileLine
@@ -1126,34 +1134,101 @@ describe("read_file tool XML output structure", () => {
const textPath = "test/text.txt"
const binaryPath = "test/binary.pdf"
const numberedContent = "1 | Text file content"
+ const pdfContent = "1 | PDF content extracted"
+
+ // Mock path.resolve to return the expected paths
+ mockedPathResolve.mockImplementation((cwd, relPath) => `/${relPath}`)
// Mock binary file detection
- mockedIsBinaryFile.mockImplementationOnce(() => Promise.resolve(false))
- mockedIsBinaryFile.mockImplementationOnce(() => Promise.resolve(true))
+ mockedIsBinaryFile.mockImplementation((path) => {
+ if (path.includes("text.txt")) return Promise.resolve(false)
+ if (path.includes("binary.pdf")) return Promise.resolve(true)
+ return Promise.resolve(false)
+ })
+
+ mockedCountFileLines.mockImplementation((path) => {
+ return Promise.resolve(1)
+ })
- // Mock content based on file type
mockedExtractTextFromFile.mockImplementation((path) => {
- if (path.includes("binary")) {
- return Promise.resolve("")
+ if (path.includes("binary.pdf")) {
+ return Promise.resolve(pdfContent)
}
return Promise.resolve(numberedContent)
})
- mockedCountFileLines.mockImplementation((path) => {
- return Promise.resolve(path.includes("binary") ? 0 : 1)
- })
+
+ // Configure mocks for the test
mockProvider.getState.mockResolvedValue({ maxReadFileLine: -1 })
- // Execute
- const result = await executeReadFileTool(
- {
+ // Create standalone mock functions
+ const mockAskApproval = jest.fn().mockResolvedValue({ response: "yesButtonClicked" })
+ const mockHandleError = jest.fn().mockResolvedValue(undefined)
+ const mockPushToolResult = jest.fn()
+ const mockRemoveClosingTag = jest.fn((tag, content) => content)
+
+ // Create a tool use object directly
+ const toolUse: ReadFileToolUse = {
+ type: "tool_use",
+ name: "read_file",
+ params: {
args: `${textPath}${binaryPath}`,
},
- { totalLines: 1 },
+ partial: false,
+ }
+
+ // Call readFileTool directly
+ await readFileTool(
+ mockCline,
+ toolUse,
+ mockAskApproval,
+ mockHandleError,
+ mockPushToolResult,
+ mockRemoveClosingTag,
)
- // Verify
- expect(result).toBe(
- `\n${textPath}\n\n${numberedContent}\n\n${binaryPath}\nBinary file\n\n`,
+ // Check the result
+ expect(mockPushToolResult).toHaveBeenCalledWith(
+ `\n${textPath}\n\n${numberedContent}\n\n${binaryPath}\n\n${pdfContent}\n\n`,
+ )
+ })
+
+ it("should block unsupported binary files", async () => {
+ // Setup
+ const unsupportedBinaryPath = "test/binary.exe"
+
+ mockedIsBinaryFile.mockImplementation(() => Promise.resolve(true))
+ mockedCountFileLines.mockImplementation(() => Promise.resolve(1))
+ mockProvider.getState.mockResolvedValue({ maxReadFileLine: -1 })
+
+ // Create standalone mock functions
+ const mockAskApproval = jest.fn().mockResolvedValue({ response: "yesButtonClicked" })
+ const mockHandleError = jest.fn().mockResolvedValue(undefined)
+ const mockPushToolResult = jest.fn()
+ const mockRemoveClosingTag = jest.fn((tag, content) => content)
+
+ // Create a tool use object directly
+ const toolUse: ReadFileToolUse = {
+ type: "tool_use",
+ name: "read_file",
+ params: {
+ args: `${unsupportedBinaryPath}`,
+ },
+ partial: false,
+ }
+
+ // Call readFileTool directly
+ await readFileTool(
+ mockCline,
+ toolUse,
+ mockAskApproval,
+ mockHandleError,
+ mockPushToolResult,
+ mockRemoveClosingTag,
+ )
+
+ // Check the result
+ expect(mockPushToolResult).toHaveBeenCalledWith(
+ `\n${unsupportedBinaryPath}\nBinary file\n\n`,
)
})
})
@@ -1165,6 +1240,7 @@ describe("read_file tool XML output structure", () => {
const maxReadFileLine = -1
const totalLines = 0
mockedCountFileLines.mockResolvedValue(totalLines)
+ mockedIsBinaryFile.mockResolvedValue(false) // Ensure empty file is not detected as binary
// Execute
const result = await executeReadFileTool({}, { maxReadFileLine, totalLines })
diff --git a/src/core/tools/readFileTool.ts b/src/core/tools/readFileTool.ts
index 3bd79110cd..e49ac43d7b 100644
--- a/src/core/tools/readFileTool.ts
+++ b/src/core/tools/readFileTool.ts
@@ -11,7 +11,7 @@ import { isPathOutsideWorkspace } from "../../utils/pathUtils"
import { getReadablePath } from "../../utils/path"
import { countFileLines } from "../../integrations/misc/line-counter"
import { readLines } from "../../integrations/misc/read-lines"
-import { extractTextFromFile, addLineNumbers } from "../../integrations/misc/extract-text"
+import { extractTextFromFile, addLineNumbers, getSupportedBinaryFormats } from "../../integrations/misc/extract-text"
import { parseSourceCodeDefinitionsForFile } from "../../services/tree-sitter"
import { parseXml } from "../../utils/xml"
@@ -435,13 +435,19 @@ export async function readFileTool(
try {
const [totalLines, isBinary] = await Promise.all([countFileLines(fullPath), isBinaryFile(fullPath)])
- // Handle binary files
+ // Handle binary files (but allow specific file types that extractTextFromFile can handle)
if (isBinary) {
- updateFileResult(relPath, {
- notice: "Binary file",
- xmlContent: `${relPath}\nBinary file\n`,
- })
- continue
+ const fileExtension = path.extname(relPath).toLowerCase()
+ const supportedBinaryFormats = getSupportedBinaryFormats()
+
+ if (!supportedBinaryFormats.includes(fileExtension)) {
+ updateFileResult(relPath, {
+ notice: "Binary file",
+ xmlContent: `${relPath}\nBinary file\n`,
+ })
+ continue
+ }
+ // For supported binary formats (.pdf, .docx, .ipynb), continue to extractTextFromFile
}
// Handle range reads (bypass maxReadFileLine)
diff --git a/src/integrations/misc/extract-text.ts b/src/integrations/misc/extract-text.ts
index f2f1d094b6..bd8b9ce9d0 100644
--- a/src/integrations/misc/extract-text.ts
+++ b/src/integrations/misc/extract-text.ts
@@ -5,34 +5,6 @@ import mammoth from "mammoth"
import fs from "fs/promises"
import { isBinaryFile } from "isbinaryfile"
-export async function extractTextFromFile(filePath: string): Promise {
- try {
- await fs.access(filePath)
- } catch (error) {
- throw new Error(`File not found: ${filePath}`)
- }
-
- const fileExtension = path.extname(filePath).toLowerCase()
-
- switch (fileExtension) {
- case ".pdf":
- return extractTextFromPDF(filePath)
- case ".docx":
- return extractTextFromDOCX(filePath)
- case ".ipynb":
- return extractTextFromIPYNB(filePath)
- default: {
- const isBinary = await isBinaryFile(filePath).catch(() => false)
-
- if (!isBinary) {
- return addLineNumbers(await fs.readFile(filePath, "utf8"))
- } else {
- throw new Error(`Cannot read text for file type: ${fileExtension}`)
- }
- }
- }
-}
-
async function extractTextFromPDF(filePath: string): Promise {
const dataBuffer = await fs.readFile(filePath)
const data = await pdf(dataBuffer)
@@ -58,6 +30,47 @@ async function extractTextFromIPYNB(filePath: string): Promise {
return addLineNumbers(extractedText)
}
+/**
+ * Map of supported binary file formats to their extraction functions
+ */
+const SUPPORTED_BINARY_FORMATS = {
+ ".pdf": extractTextFromPDF,
+ ".docx": extractTextFromDOCX,
+ ".ipynb": extractTextFromIPYNB,
+} as const
+
+/**
+ * Returns the list of supported binary file formats that can be processed by extractTextFromFile
+ */
+export function getSupportedBinaryFormats(): string[] {
+ return Object.keys(SUPPORTED_BINARY_FORMATS)
+}
+
+export async function extractTextFromFile(filePath: string): Promise {
+ try {
+ await fs.access(filePath)
+ } catch (error) {
+ throw new Error(`File not found: ${filePath}`)
+ }
+
+ const fileExtension = path.extname(filePath).toLowerCase()
+
+ // Check if we have a specific extractor for this format
+ const extractor = SUPPORTED_BINARY_FORMATS[fileExtension as keyof typeof SUPPORTED_BINARY_FORMATS]
+ if (extractor) {
+ return extractor(filePath)
+ }
+
+ // Handle other files
+ const isBinary = await isBinaryFile(filePath).catch(() => false)
+
+ if (!isBinary) {
+ return addLineNumbers(await fs.readFile(filePath, "utf8"))
+ } else {
+ throw new Error(`Cannot read text for file type: ${fileExtension}`)
+ }
+}
+
export function addLineNumbers(content: string, startLine: number = 1): string {
// If content is empty, return empty string - empty files should not have line numbers
// If content is empty but startLine > 1, return "startLine | " because we know the file is not empty