Skip to content

Commit ebd1b18

Browse files
authored
fix: better binary file detection (sst#2025)
1 parent de17648 commit ebd1b18

File tree

1 file changed

+52
-6
lines changed

1 file changed

+52
-6
lines changed

packages/opencode/src/tool/read.ts

Lines changed: 52 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ export const ReadTool = Tool.define("read", {
5353
const offset = params.offset || 0
5454
const isImage = isImageFile(filepath)
5555
if (isImage) throw new Error(`This is an image file of type: ${isImage}\nUse a different tool to process images`)
56-
const isBinary = await isBinaryFile(file)
56+
const isBinary = await isBinaryFile(filepath, file)
5757
if (isBinary) throw new Error(`Cannot read binary file: ${filepath}`)
5858
const lines = await file.text().then((text) => text.split("\n"))
5959
const raw = lines.slice(offset, offset + limit).map((line) => {
@@ -105,13 +105,59 @@ function isImageFile(filePath: string): string | false {
105105
}
106106
}
107107

108-
async function isBinaryFile(file: Bun.BunFile): Promise<boolean> {
108+
async function isBinaryFile(filepath: string, file: Bun.BunFile): Promise<boolean> {
109+
const ext = path.extname(filepath).toLowerCase()
110+
// binary check for common non-text extensions
111+
switch (ext) {
112+
case ".zip":
113+
case ".tar":
114+
case ".gz":
115+
case ".exe":
116+
case ".dll":
117+
case ".so":
118+
case ".class":
119+
case ".jar":
120+
case ".war":
121+
case ".7z":
122+
case ".doc":
123+
case ".docx":
124+
case ".xls":
125+
case ".xlsx":
126+
case ".ppt":
127+
case ".pptx":
128+
case ".odt":
129+
case ".ods":
130+
case ".odp":
131+
case ".bin":
132+
case ".dat":
133+
case ".obj":
134+
case ".o":
135+
case ".a":
136+
case ".lib":
137+
case ".wasm":
138+
case ".pyc":
139+
case ".pyo":
140+
return true
141+
default:
142+
break
143+
}
144+
145+
const stat = await file.stat()
146+
const fileSize = stat.size
147+
if (fileSize === 0) return false
148+
149+
const bufferSize = Math.min(4096, fileSize)
109150
const buffer = await file.arrayBuffer()
110-
const bytes = new Uint8Array(buffer.slice(0, 512)) // Check first 512 bytes
151+
if (buffer.byteLength === 0) return false
152+
const bytes = new Uint8Array(buffer.slice(0, bufferSize))
111153

154+
let nonPrintableCount = 0
112155
for (let i = 0; i < bytes.length; i++) {
113-
if (bytes[i] === 0) return true // Null byte indicates binary
156+
if (bytes[i] === 0) return true
157+
if (bytes[i] < 9 || (bytes[i] > 13 && bytes[i] < 32)) {
158+
nonPrintableCount++
159+
}
114160
}
115-
116-
return false
161+
// If >30% non-printable characters, consider it binary
162+
return nonPrintableCount / bytes.length > 0.3
117163
}

0 commit comments

Comments
 (0)