Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 96 additions & 20 deletions src/core/tools/__tests__/readFileTool.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,14 @@ const addLineNumbersMock = jest.fn().mockImplementation((text, startLine = 1) =>
return lines.map((line, i) => `${startLine + i} | ${line}`).join("\n")
})

const extractTextFromFileMock = jest.fn().mockImplementation((_filePath) => {
// Call addLineNumbersMock to register the call
addLineNumbersMock(mockInputContent)
return Promise.resolve(addLineNumbersMock(mockInputContent))
})
const extractTextFromFileMock = jest.fn()
const getSupportedBinaryFormatsMock = jest.fn(() => [".pdf", ".docx", ".ipynb"])

// Now assign the mocks to the module
const extractTextModule = jest.requireMock("../../../integrations/misc/extract-text")
extractTextModule.extractTextFromFile = extractTextFromFileMock
extractTextModule.addLineNumbers = addLineNumbersMock
extractTextModule.getSupportedBinaryFormats = getSupportedBinaryFormatsMock

jest.mock("../../ignore/RooIgnoreController", () => ({
RooIgnoreController: class {
Expand Down Expand Up @@ -128,6 +126,9 @@ describe("read_file tool with maxReadFileLine setting", () => {
mockCline.say = jest.fn().mockResolvedValue(undefined)
mockCline.ask = jest.fn().mockResolvedValue({ response: "yesButtonClicked" })
mockCline.presentAssistantMessage = jest.fn()
mockCline.handleError = jest.fn().mockResolvedValue(undefined)
mockCline.pushToolResult = jest.fn()
mockCline.removeClosingTag = jest.fn((tag, content) => content)

mockCline.fileContextTracker = {
trackFileContext: jest.fn().mockResolvedValue(undefined),
Expand Down Expand Up @@ -410,6 +411,13 @@ describe("read_file tool XML output structure", () => {
mockedPathResolve.mockReturnValue(absoluteFilePath)
mockedIsBinaryFile.mockResolvedValue(false)

// Set default implementation for extractTextFromFile
mockedExtractTextFromFile.mockImplementation((filePath) => {
// Call addLineNumbersMock to register the call
addLineNumbersMock(mockInputContent)
return Promise.resolve(addLineNumbersMock(mockInputContent))
})

mockInputContent = fileContent

// Setup mock provider with default maxReadFileLine
Expand Down Expand Up @@ -1126,34 +1134,101 @@ describe("read_file tool XML output structure", () => {
const textPath = "test/text.txt"
const binaryPath = "test/binary.pdf"
const numberedContent = "1 | Text file content"
const pdfContent = "1 | PDF content extracted"

// Mock path.resolve to return the expected paths
mockedPathResolve.mockImplementation((cwd, relPath) => `/${relPath}`)

// Mock binary file detection
mockedIsBinaryFile.mockImplementationOnce(() => Promise.resolve(false))
mockedIsBinaryFile.mockImplementationOnce(() => Promise.resolve(true))
mockedIsBinaryFile.mockImplementation((path) => {
if (path.includes("text.txt")) return Promise.resolve(false)
if (path.includes("binary.pdf")) return Promise.resolve(true)
return Promise.resolve(false)
})

mockedCountFileLines.mockImplementation((path) => {
return Promise.resolve(1)
})

// Mock content based on file type
mockedExtractTextFromFile.mockImplementation((path) => {
if (path.includes("binary")) {
return Promise.resolve("")
if (path.includes("binary.pdf")) {
return Promise.resolve(pdfContent)
}
return Promise.resolve(numberedContent)
})
mockedCountFileLines.mockImplementation((path) => {
return Promise.resolve(path.includes("binary") ? 0 : 1)
})

// Configure mocks for the test
mockProvider.getState.mockResolvedValue({ maxReadFileLine: -1 })

// Execute
const result = await executeReadFileTool(
{
// Create standalone mock functions
const mockAskApproval = jest.fn().mockResolvedValue({ response: "yesButtonClicked" })
const mockHandleError = jest.fn().mockResolvedValue(undefined)
const mockPushToolResult = jest.fn()
const mockRemoveClosingTag = jest.fn((tag, content) => content)

// Create a tool use object directly
const toolUse: ReadFileToolUse = {
type: "tool_use",
name: "read_file",
params: {
args: `<file><path>${textPath}</path></file><file><path>${binaryPath}</path></file>`,
},
{ totalLines: 1 },
partial: false,
}

// Call readFileTool directly
await readFileTool(
mockCline,
toolUse,
mockAskApproval,
mockHandleError,
mockPushToolResult,
mockRemoveClosingTag,
)

// Verify
expect(result).toBe(
`<files>\n<file><path>${textPath}</path>\n<content lines="1-1">\n${numberedContent}</content>\n</file>\n<file><path>${binaryPath}</path>\n<notice>Binary file</notice>\n</file>\n</files>`,
// Check the result
expect(mockPushToolResult).toHaveBeenCalledWith(
`<files>\n<file><path>${textPath}</path>\n<content lines="1-1">\n${numberedContent}</content>\n</file>\n<file><path>${binaryPath}</path>\n<content lines="1-1">\n${pdfContent}</content>\n</file>\n</files>`,
)
})

it("should block unsupported binary files", async () => {
// Setup
const unsupportedBinaryPath = "test/binary.exe"

mockedIsBinaryFile.mockImplementation(() => Promise.resolve(true))
mockedCountFileLines.mockImplementation(() => Promise.resolve(1))
mockProvider.getState.mockResolvedValue({ maxReadFileLine: -1 })

// Create standalone mock functions
const mockAskApproval = jest.fn().mockResolvedValue({ response: "yesButtonClicked" })
const mockHandleError = jest.fn().mockResolvedValue(undefined)
const mockPushToolResult = jest.fn()
const mockRemoveClosingTag = jest.fn((tag, content) => content)

// Create a tool use object directly
const toolUse: ReadFileToolUse = {
type: "tool_use",
name: "read_file",
params: {
args: `<file><path>${unsupportedBinaryPath}</path></file>`,
},
partial: false,
}

// Call readFileTool directly
await readFileTool(
mockCline,
toolUse,
mockAskApproval,
mockHandleError,
mockPushToolResult,
mockRemoveClosingTag,
)

// Check the result
expect(mockPushToolResult).toHaveBeenCalledWith(
`<files>\n<file><path>${unsupportedBinaryPath}</path>\n<notice>Binary file</notice>\n</file>\n</files>`,
)
})
})
Expand All @@ -1165,6 +1240,7 @@ describe("read_file tool XML output structure", () => {
const maxReadFileLine = -1
const totalLines = 0
mockedCountFileLines.mockResolvedValue(totalLines)
mockedIsBinaryFile.mockResolvedValue(false) // Ensure empty file is not detected as binary

// Execute
const result = await executeReadFileTool({}, { maxReadFileLine, totalLines })
Expand Down
20 changes: 13 additions & 7 deletions src/core/tools/readFileTool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import { isPathOutsideWorkspace } from "../../utils/pathUtils"
import { getReadablePath } from "../../utils/path"
import { countFileLines } from "../../integrations/misc/line-counter"
import { readLines } from "../../integrations/misc/read-lines"
import { extractTextFromFile, addLineNumbers } from "../../integrations/misc/extract-text"
import { extractTextFromFile, addLineNumbers, getSupportedBinaryFormats } from "../../integrations/misc/extract-text"
import { parseSourceCodeDefinitionsForFile } from "../../services/tree-sitter"
import { parseXml } from "../../utils/xml"

Expand Down Expand Up @@ -435,13 +435,19 @@ export async function readFileTool(
try {
const [totalLines, isBinary] = await Promise.all([countFileLines(fullPath), isBinaryFile(fullPath)])

// Handle binary files
// Handle binary files (but allow specific file types that extractTextFromFile can handle)
if (isBinary) {
updateFileResult(relPath, {
notice: "Binary file",
xmlContent: `<file><path>${relPath}</path>\n<notice>Binary file</notice>\n</file>`,
})
continue
const fileExtension = path.extname(relPath).toLowerCase()
const supportedBinaryFormats = getSupportedBinaryFormats()

if (!supportedBinaryFormats.includes(fileExtension)) {
updateFileResult(relPath, {
notice: "Binary file",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

User‐facing text is hardcoded (e.g. 'Binary file') when blocking unsupported binary files. Use the translation function t(...) so that the UI messages can be localized.

Suggested change
notice: "Binary file",
notice: t("tools:readFile.binaryFile"),

This comment was generated because it violated a code review rule: irule_C0ez7Rji6ANcGkkX.

xmlContent: `<file><path>${relPath}</path>\n<notice>Binary file</notice>\n</file>`,
})
continue
}
// For supported binary formats (.pdf, .docx, .ipynb), continue to extractTextFromFile
}

// Handle range reads (bypass maxReadFileLine)
Expand Down
69 changes: 41 additions & 28 deletions src/integrations/misc/extract-text.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,34 +5,6 @@ import mammoth from "mammoth"
import fs from "fs/promises"
import { isBinaryFile } from "isbinaryfile"

export async function extractTextFromFile(filePath: string): Promise<string> {
try {
await fs.access(filePath)
} catch (error) {
throw new Error(`File not found: ${filePath}`)
}

const fileExtension = path.extname(filePath).toLowerCase()

switch (fileExtension) {
case ".pdf":
return extractTextFromPDF(filePath)
case ".docx":
return extractTextFromDOCX(filePath)
case ".ipynb":
return extractTextFromIPYNB(filePath)
default: {
const isBinary = await isBinaryFile(filePath).catch(() => false)

if (!isBinary) {
return addLineNumbers(await fs.readFile(filePath, "utf8"))
} else {
throw new Error(`Cannot read text for file type: ${fileExtension}`)
}
}
}
}

async function extractTextFromPDF(filePath: string): Promise<string> {
const dataBuffer = await fs.readFile(filePath)
const data = await pdf(dataBuffer)
Expand All @@ -58,6 +30,47 @@ async function extractTextFromIPYNB(filePath: string): Promise<string> {
return addLineNumbers(extractedText)
}

/**
* Map of supported binary file formats to their extraction functions
*/
const SUPPORTED_BINARY_FORMATS = {
".pdf": extractTextFromPDF,
".docx": extractTextFromDOCX,
".ipynb": extractTextFromIPYNB,
} as const

/**
* Returns the list of supported binary file formats that can be processed by extractTextFromFile
*/
export function getSupportedBinaryFormats(): string[] {
return Object.keys(SUPPORTED_BINARY_FORMATS)
}

export async function extractTextFromFile(filePath: string): Promise<string> {
try {
await fs.access(filePath)
} catch (error) {
throw new Error(`File not found: ${filePath}`)
}

const fileExtension = path.extname(filePath).toLowerCase()

// Check if we have a specific extractor for this format
const extractor = SUPPORTED_BINARY_FORMATS[fileExtension as keyof typeof SUPPORTED_BINARY_FORMATS]
if (extractor) {
return extractor(filePath)
}

// Handle other files
const isBinary = await isBinaryFile(filePath).catch(() => false)

if (!isBinary) {
return addLineNumbers(await fs.readFile(filePath, "utf8"))
} else {
throw new Error(`Cannot read text for file type: ${fileExtension}`)
}
}

export function addLineNumbers(content: string, startLine: number = 1): string {
// If content is empty, return empty string - empty files should not have line numbers
// If content is empty but startLine > 1, return "startLine | " because we know the file is not empty
Expand Down