@@ -7,6 +7,23 @@ const TEXT_VALIDATION_SAMPLE_SIZE = 4096 // Check first 4KB for binary indicator
77const VALID_BYTE_RATIO = 0.95 // Reduced from 0.99 to 0.95
88const CONTROL_CHAR_THRESHOLD = 0.05 // Increased from 0.01 to 0.05
99
10+ const BINARY_SIGNATURES = new Map ( [
11+ // Images
12+ [ [ 0xff , 0xd8 , 0xff ] , "image/jpeg" ] , // JPEG
13+ [ [ 0x89 , 0x50 , 0x4e , 0x47 ] , "image/png" ] , // PNG
14+ [ [ 0x47 , 0x49 , 0x46 ] , "image/gif" ] , // GIF
15+ // Archives
16+ [ [ 0x50 , 0x4b , 0x03 , 0x04 ] , "application/zip" ] , // ZIP
17+ [ [ 0x52 , 0x61 , 0x72 , 0x21 ] , "application/rar" ] , // RAR
18+ // PDFs
19+ [ [ 0x25 , 0x50 , 0x44 , 0x46 ] , "application/pdf" ] , // PDF
20+ // Office documents
21+ [ [ 0x50 , 0x4b , 0x03 , 0x04 ] , "application/msoffice" ] , // Office files (DOCX, XLSX, etc.)
22+ // Executables
23+ [ [ 0x4d , 0x5a ] , "application/exe" ] , // EXE
24+ [ [ 0x7f , 0x45 , 0x4c , 0x46 ] , "application/elf" ] , // ELF
25+ ] )
26+
1027export async function isTextFile ( filePath : string , maxSize : number ) : Promise < boolean > {
1128 const stats = await vscode . workspace . fs . stat ( vscode . Uri . file ( filePath ) )
1229 if ( stats . type === vscode . FileType . Directory ) return false
@@ -16,15 +33,10 @@ export async function isTextFile(filePath: string, maxSize: number): Promise<boo
1633 const fileContent = await vscode . workspace . fs . readFile ( vscode . Uri . file ( filePath ) )
1734 const buffer = Buffer . from ( fileContent )
1835
19- // 1. Check for known binary types using file-type
20- const type = await fileTypeFromBuffer ( buffer )
21- if ( type ) {
22- // Reject if detected as binary type (non-text)
23- const isBinary = ! type . mime . startsWith ( "text/" ) && ! [ "application/json" , "application/xml" ] . includes ( type . mime )
24- if ( isBinary ) {
25- console . log ( `File ${ filePath } rejected: binary MIME type ${ type . mime } ` )
26- return false
27- }
36+ // 1. Check for binary file signatures
37+ if ( hasBinarySignature ( buffer ) ) {
38+ console . log ( `File ${ filePath } rejected: matches binary signature` )
39+ return false
2840 }
2941
3042 // 2. Validate UTF-8 encoding
@@ -54,6 +66,24 @@ export async function isTextFile(filePath: string, maxSize: number): Promise<boo
5466 return isValid
5567}
5668
69+ function hasBinarySignature ( buffer : Buffer ) : boolean {
70+ if ( buffer . length < 2 ) return false
71+
72+ for ( const [ signature , _ ] of BINARY_SIGNATURES ) {
73+ if ( signature . length > buffer . length ) continue
74+
75+ let matches = true
76+ for ( let i = 0 ; i < signature . length ; i ++ ) {
77+ if ( buffer [ i ] !== signature [ i ] ) {
78+ matches = false
79+ break
80+ }
81+ }
82+ if ( matches ) return true
83+ }
84+ return false
85+ }
86+
5787function analyzeBytes ( buffer : Buffer ) : { controlCount : number ; validCount : number ; totalSampled : number } {
5888 let controlCount = 0
5989 let validCount = 0
0 commit comments