Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions src/core/tools/__tests__/readFileTool.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,36 @@ describe("read_file tool XML output structure", () => {
`<files>\n<file><path>${testFilePath}</path>\n<content/><notice>File is empty</notice>\n</file>\n</files>`,
)
})

it("should treat files with only BOM as empty", async () => {
// Setup - file has BOM only
mockedCountFileLines.mockResolvedValue(1) // File has 1 line
mockedExtractTextFromFile.mockResolvedValue("\uFEFF") // Only BOM
mockProvider.getState.mockResolvedValue({ maxReadFileLine: -1 })

// Execute
const result = await executeReadFileTool({}, { totalLines: 1 })

// Verify - should show empty file notice since BOM is stripped
expect(result).toBe(
`<files>\n<file><path>${testFilePath}</path>\n<content/><notice>File is empty</notice>\n</file>\n</files>`,
)
})

it("should strip BOM from file content", async () => {
// Setup - file has BOM followed by content
mockedCountFileLines.mockResolvedValue(1)
mockedExtractTextFromFile.mockResolvedValue("1 | \uFEFFHello World") // BOM + content with line number
mockProvider.getState.mockResolvedValue({ maxReadFileLine: -1 })

// Execute
const result = await executeReadFileTool({}, { totalLines: 1 })

// Verify - BOM should be stripped from the content
expect(result).toBe(
`<files>\n<file><path>${testFilePath}</path>\n<content lines="1-1">\n1 | \uFEFFHello World</content>\n</file>\n</files>`,
)
})
})

describe("Error Handling Tests", () => {
Expand Down
9 changes: 7 additions & 2 deletions src/core/tools/readFileTool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -519,9 +519,14 @@ export async function readFileTool(
// Handle normal file read
const content = await extractTextFromFile(fullPath)
const lineRangeAttr = ` lines="1-${totalLines}"`
let xmlInfo = totalLines > 0 ? `<content${lineRangeAttr}>\n${content}</content>\n` : `<content/>`

if (totalLines === 0) {
// Check if file is effectively empty (no lines, only whitespace, or only BOM)
// Note: BOM is already stripped by extractTextFromFile
const isEffectivelyEmpty = totalLines === 0 || content.trim() === ""

let xmlInfo = !isEffectivelyEmpty ? `<content${lineRangeAttr}>\n${content}</content>\n` : `<content/>`

if (isEffectivelyEmpty) {
xmlInfo += `<notice>File is empty</notice>\n`
}

Expand Down
5 changes: 4 additions & 1 deletion src/integrations/misc/extract-text.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import mammoth from "mammoth"
import fs from "fs/promises"
import { isBinaryFile } from "isbinaryfile"
import { extractTextFromXLSX } from "./extract-text-from-xlsx"
import stripBom from "strip-bom"

async function extractTextFromPDF(filePath: string): Promise<string> {
const dataBuffer = await fs.readFile(filePath)
Expand Down Expand Up @@ -67,7 +68,9 @@ export async function extractTextFromFile(filePath: string): Promise<string> {
const isBinary = await isBinaryFile(filePath).catch(() => false)

if (!isBinary) {
return addLineNumbers(await fs.readFile(filePath, "utf8"))
const content = await fs.readFile(filePath, "utf8")
// Strip BOM if present before adding line numbers
return addLineNumbers(stripBom(content))
} else {
throw new Error(`Cannot read text for file type: ${fileExtension}`)
}
Expand Down
13 changes: 12 additions & 1 deletion src/integrations/misc/read-lines.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
* Now you can read a range of lines from a file
*/
import { createReadStream } from "fs"
import stripBom from "strip-bom"

const outOfRangeError = (filepath: string, n: number) => {
return new RangeError(`Line with index ${n} does not exist in '${filepath}'. Note that line indexing is zero-based`)
Expand Down Expand Up @@ -57,14 +58,24 @@ export function readLines(filepath: string, endLine?: number, startLine?: number
let buffer = ""
let lineCount = 0
let result = ""
let isFirstChunk = true

// Handle errors
input.on("error", reject)

// Process data chunks directly
input.on("data", (chunk) => {
// Convert chunk to string
let chunkStr = chunk.toString()

// Strip BOM from the first chunk if present
if (isFirstChunk) {
chunkStr = stripBom(chunkStr)
isFirstChunk = false
}

// Add chunk to buffer
buffer += chunk.toString()
buffer += chunkStr

let pos = 0
let nextNewline = buffer.indexOf("\n", pos)
Expand Down