diff --git a/src/core/tools/__tests__/readFileTool.spec.ts b/src/core/tools/__tests__/readFileTool.spec.ts
index 44be1d3b924..b79ac269b96 100644
--- a/src/core/tools/__tests__/readFileTool.spec.ts
+++ b/src/core/tools/__tests__/readFileTool.spec.ts
@@ -481,6 +481,36 @@ describe("read_file tool XML output structure", () => {
`\n${testFilePath}\nFile is empty\n\n`,
)
})
+
+ it("should treat files with only BOM as empty", async () => {
+ // Setup - file has BOM only
+ mockedCountFileLines.mockResolvedValue(1) // File has 1 line
+ mockedExtractTextFromFile.mockResolvedValue("\uFEFF") // Only BOM
+ mockProvider.getState.mockResolvedValue({ maxReadFileLine: -1 })
+
+ // Execute
+ const result = await executeReadFileTool({}, { totalLines: 1 })
+
+ // Verify - should show empty file notice since BOM is stripped
+ expect(result).toBe(
+ `\n${testFilePath}\nFile is empty\n\n`,
+ )
+ })
+
+ it("should strip BOM from file content", async () => {
+ // Setup - file has BOM followed by content
+ mockedCountFileLines.mockResolvedValue(1)
+ mockedExtractTextFromFile.mockResolvedValue("1 | \uFEFFHello World") // BOM + content with line number
+ mockProvider.getState.mockResolvedValue({ maxReadFileLine: -1 })
+
+ // Execute
+ const result = await executeReadFileTool({}, { totalLines: 1 })
+
+ // Verify - BOM should be stripped from the content
+ expect(result).toBe(
+ `\n${testFilePath}\n\n1 | \uFEFFHello World\n\n`,
+ )
+ })
})
describe("Error Handling Tests", () => {
diff --git a/src/core/tools/readFileTool.ts b/src/core/tools/readFileTool.ts
index 6de8dd56421..0cc196bce1e 100644
--- a/src/core/tools/readFileTool.ts
+++ b/src/core/tools/readFileTool.ts
@@ -519,9 +519,14 @@ export async function readFileTool(
// Handle normal file read
const content = await extractTextFromFile(fullPath)
const lineRangeAttr = ` lines="1-${totalLines}"`
- let xmlInfo = totalLines > 0 ? `\n${content}\n` : ``
- if (totalLines === 0) {
+ // Check if file is effectively empty (no lines, only whitespace, or only BOM)
+ // Note: BOM is already stripped by extractTextFromFile
+ const isEffectivelyEmpty = totalLines === 0 || content.trim() === ""
+
+ let xmlInfo = !isEffectivelyEmpty ? `\n${content}\n` : ``
+
+ if (isEffectivelyEmpty) {
xmlInfo += `File is empty\n`
}
diff --git a/src/integrations/misc/extract-text.ts b/src/integrations/misc/extract-text.ts
index 8c7e7408a68..b7011dd172e 100644
--- a/src/integrations/misc/extract-text.ts
+++ b/src/integrations/misc/extract-text.ts
@@ -5,6 +5,7 @@ import mammoth from "mammoth"
import fs from "fs/promises"
import { isBinaryFile } from "isbinaryfile"
import { extractTextFromXLSX } from "./extract-text-from-xlsx"
+import stripBom from "strip-bom"
async function extractTextFromPDF(filePath: string): Promise {
const dataBuffer = await fs.readFile(filePath)
@@ -67,7 +68,9 @@ export async function extractTextFromFile(filePath: string): Promise {
const isBinary = await isBinaryFile(filePath).catch(() => false)
if (!isBinary) {
- return addLineNumbers(await fs.readFile(filePath, "utf8"))
+ const content = await fs.readFile(filePath, "utf8")
+ // Strip BOM if present before adding line numbers
+ return addLineNumbers(stripBom(content))
} else {
throw new Error(`Cannot read text for file type: ${fileExtension}`)
}
diff --git a/src/integrations/misc/read-lines.ts b/src/integrations/misc/read-lines.ts
index 5a5eda9f838..2d9d1e9a22c 100644
--- a/src/integrations/misc/read-lines.ts
+++ b/src/integrations/misc/read-lines.ts
@@ -7,6 +7,7 @@
* Now you can read a range of lines from a file
*/
import { createReadStream } from "fs"
+import stripBom from "strip-bom"
const outOfRangeError = (filepath: string, n: number) => {
return new RangeError(`Line with index ${n} does not exist in '${filepath}'. Note that line indexing is zero-based`)
@@ -57,14 +58,24 @@ export function readLines(filepath: string, endLine?: number, startLine?: number
let buffer = ""
let lineCount = 0
let result = ""
+ let isFirstChunk = true
// Handle errors
input.on("error", reject)
// Process data chunks directly
input.on("data", (chunk) => {
+ // Convert chunk to string
+ let chunkStr = chunk.toString()
+
+ // Strip BOM from the first chunk if present
+ if (isFirstChunk) {
+ chunkStr = stripBom(chunkStr)
+ isFirstChunk = false
+ }
+
// Add chunk to buffer
- buffer += chunk.toString()
+ buffer += chunkStr
let pos = 0
let nextNewline = buffer.indexOf("\n", pos)