diff --git a/src/core/tools/__tests__/readFileTool.spec.ts b/src/core/tools/__tests__/readFileTool.spec.ts index 44be1d3b924..b79ac269b96 100644 --- a/src/core/tools/__tests__/readFileTool.spec.ts +++ b/src/core/tools/__tests__/readFileTool.spec.ts @@ -481,6 +481,36 @@ describe("read_file tool XML output structure", () => { `\n${testFilePath}\nFile is empty\n\n`, ) }) + + it("should treat files with only BOM as empty", async () => { + // Setup - file has BOM only + mockedCountFileLines.mockResolvedValue(1) // File has 1 line + mockedExtractTextFromFile.mockResolvedValue("\uFEFF") // Only BOM + mockProvider.getState.mockResolvedValue({ maxReadFileLine: -1 }) + + // Execute + const result = await executeReadFileTool({}, { totalLines: 1 }) + + // Verify - should show empty file notice since BOM is stripped + expect(result).toBe( + `\n${testFilePath}\nFile is empty\n\n`, + ) + }) + + it("should strip BOM from file content", async () => { + // Setup - file has BOM followed by content + mockedCountFileLines.mockResolvedValue(1) + mockedExtractTextFromFile.mockResolvedValue("1 | \uFEFFHello World") // BOM + content with line number + mockProvider.getState.mockResolvedValue({ maxReadFileLine: -1 }) + + // Execute + const result = await executeReadFileTool({}, { totalLines: 1 }) + + // Verify - BOM should be stripped from the content + expect(result).toBe( + `\n${testFilePath}\n\n1 | \uFEFFHello World\n\n`, + ) + }) }) describe("Error Handling Tests", () => { diff --git a/src/core/tools/readFileTool.ts b/src/core/tools/readFileTool.ts index 6de8dd56421..0cc196bce1e 100644 --- a/src/core/tools/readFileTool.ts +++ b/src/core/tools/readFileTool.ts @@ -519,9 +519,14 @@ export async function readFileTool( // Handle normal file read const content = await extractTextFromFile(fullPath) const lineRangeAttr = ` lines="1-${totalLines}"` - let xmlInfo = totalLines > 0 ? `\n${content}\n` : `` - if (totalLines === 0) { + // Check if file is effectively empty (no lines, only whitespace, or only BOM) + // Note: BOM is already stripped by extractTextFromFile + const isEffectivelyEmpty = totalLines === 0 || content.trim() === "" + + let xmlInfo = !isEffectivelyEmpty ? `\n${content}\n` : `` + + if (isEffectivelyEmpty) { xmlInfo += `File is empty\n` } diff --git a/src/integrations/misc/extract-text.ts b/src/integrations/misc/extract-text.ts index 8c7e7408a68..b7011dd172e 100644 --- a/src/integrations/misc/extract-text.ts +++ b/src/integrations/misc/extract-text.ts @@ -5,6 +5,7 @@ import mammoth from "mammoth" import fs from "fs/promises" import { isBinaryFile } from "isbinaryfile" import { extractTextFromXLSX } from "./extract-text-from-xlsx" +import stripBom from "strip-bom" async function extractTextFromPDF(filePath: string): Promise { const dataBuffer = await fs.readFile(filePath) @@ -67,7 +68,9 @@ export async function extractTextFromFile(filePath: string): Promise { const isBinary = await isBinaryFile(filePath).catch(() => false) if (!isBinary) { - return addLineNumbers(await fs.readFile(filePath, "utf8")) + const content = await fs.readFile(filePath, "utf8") + // Strip BOM if present before adding line numbers + return addLineNumbers(stripBom(content)) } else { throw new Error(`Cannot read text for file type: ${fileExtension}`) } diff --git a/src/integrations/misc/read-lines.ts b/src/integrations/misc/read-lines.ts index 5a5eda9f838..2d9d1e9a22c 100644 --- a/src/integrations/misc/read-lines.ts +++ b/src/integrations/misc/read-lines.ts @@ -7,6 +7,7 @@ * Now you can read a range of lines from a file */ import { createReadStream } from "fs" +import stripBom from "strip-bom" const outOfRangeError = (filepath: string, n: number) => { return new RangeError(`Line with index ${n} does not exist in '${filepath}'. Note that line indexing is zero-based`) @@ -57,14 +58,24 @@ export function readLines(filepath: string, endLine?: number, startLine?: number let buffer = "" let lineCount = 0 let result = "" + let isFirstChunk = true // Handle errors input.on("error", reject) // Process data chunks directly input.on("data", (chunk) => { + // Convert chunk to string + let chunkStr = chunk.toString() + + // Strip BOM from the first chunk if present + if (isFirstChunk) { + chunkStr = stripBom(chunkStr) + isFirstChunk = false + } + // Add chunk to buffer - buffer += chunk.toString() + buffer += chunkStr let pos = 0 let nextNewline = buffer.indexOf("\n", pos)