@@ -53,7 +53,7 @@ export const ReadTool = Tool.define("read", {
53
53
const offset = params . offset || 0
54
54
const isImage = isImageFile ( filepath )
55
55
if ( isImage ) throw new Error ( `This is an image file of type: ${ isImage } \nUse a different tool to process images` )
56
- const isBinary = await isBinaryFile ( file )
56
+ const isBinary = await isBinaryFile ( filepath , file )
57
57
if ( isBinary ) throw new Error ( `Cannot read binary file: ${ filepath } ` )
58
58
const lines = await file . text ( ) . then ( ( text ) => text . split ( "\n" ) )
59
59
const raw = lines . slice ( offset , offset + limit ) . map ( ( line ) => {
@@ -105,13 +105,59 @@ function isImageFile(filePath: string): string | false {
105
105
}
106
106
}
107
107
108
- async function isBinaryFile ( file : Bun . BunFile ) : Promise < boolean > {
108
+ async function isBinaryFile ( filepath : string , file : Bun . BunFile ) : Promise < boolean > {
109
+ const ext = path . extname ( filepath ) . toLowerCase ( )
110
+ // binary check for common non-text extensions
111
+ switch ( ext ) {
112
+ case ".zip" :
113
+ case ".tar" :
114
+ case ".gz" :
115
+ case ".exe" :
116
+ case ".dll" :
117
+ case ".so" :
118
+ case ".class" :
119
+ case ".jar" :
120
+ case ".war" :
121
+ case ".7z" :
122
+ case ".doc" :
123
+ case ".docx" :
124
+ case ".xls" :
125
+ case ".xlsx" :
126
+ case ".ppt" :
127
+ case ".pptx" :
128
+ case ".odt" :
129
+ case ".ods" :
130
+ case ".odp" :
131
+ case ".bin" :
132
+ case ".dat" :
133
+ case ".obj" :
134
+ case ".o" :
135
+ case ".a" :
136
+ case ".lib" :
137
+ case ".wasm" :
138
+ case ".pyc" :
139
+ case ".pyo" :
140
+ return true
141
+ default :
142
+ break
143
+ }
144
+
145
+ const stat = await file . stat ( )
146
+ const fileSize = stat . size
147
+ if ( fileSize === 0 ) return false
148
+
149
+ const bufferSize = Math . min ( 4096 , fileSize )
109
150
const buffer = await file . arrayBuffer ( )
110
- const bytes = new Uint8Array ( buffer . slice ( 0 , 512 ) ) // Check first 512 bytes
151
+ if ( buffer . byteLength === 0 ) return false
152
+ const bytes = new Uint8Array ( buffer . slice ( 0 , bufferSize ) )
111
153
154
+ let nonPrintableCount = 0
112
155
for ( let i = 0 ; i < bytes . length ; i ++ ) {
113
- if ( bytes [ i ] === 0 ) return true // Null byte indicates binary
156
+ if ( bytes [ i ] === 0 ) return true
157
+ if ( bytes [ i ] < 9 || ( bytes [ i ] > 13 && bytes [ i ] < 32 ) ) {
158
+ nonPrintableCount ++
159
+ }
114
160
}
115
-
116
- return false
161
+ // If >30% non-printable characters, consider it binary
162
+ return nonPrintableCount / bytes . length > 0.3
117
163
}
0 commit comments