@@ -14,6 +14,7 @@ import (
1414 "path/filepath"
1515 "regexp"
1616 "runtime"
17+ "slices"
1718 "strings"
1819 "sync"
1920
@@ -74,6 +75,7 @@ var supportedKind = map[string]string{
7475 "html" : "" ,
7576 "java" : "text/x-java" ,
7677 "js" : "application/javascript" ,
78+ "json" : "" ,
7779 "ko" : "application/x-object" ,
7880 "lnk" : "application/x-ms-shortcut" ,
7981 "lua" : "text/x-lua" ,
@@ -99,7 +101,10 @@ var supportedKind = map[string]string{
99101 "sh" : "text/x-shellscript" ,
100102 "so" : "application/x-sharedlib" ,
101103 "ts" : "application/typescript" ,
104+ "txt" : "" ,
102105 "upx" : "application/x-upx" ,
106+ "vbs" : "text/x-vbscript" ,
107+ "vim" : "text/x-vim" ,
103108 "yaml" : "" ,
104109 "yara" : "" ,
105110 "yml" : "" ,
@@ -116,9 +121,41 @@ var (
116121 initializeOnce sync.Once
117122 versionRegex = regexp .MustCompile (`\d+\.\d+\.\d+$` )
118123 // Magic byte constants for common file signatures.
119- elfMagic = []byte {0x7f , 'E' , 'L' , 'F' }
120- gzipMagic = []byte {0x1f , 0x8b }
121- ZMagic = []byte {0x78 , 0x5E }
124+ elfMagic = []byte {0x7f , 'E' , 'L' , 'F' }
125+ gzipMagic = []byte {0x1f , 0x8b }
126+ ZMagic = []byte {0x78 , 0x5E }
127+ shellShebangs = [][]byte {
128+ []byte ("#!/bin/ash" ),
129+ []byte ("#!/bin/bash" ),
130+ []byte ("#!/bin/dash" ),
131+ []byte ("#!/bin/fish" ),
132+ []byte ("#!/bin/ksh" ),
133+ []byte ("#!/bin/sh" ),
134+ []byte ("#!/bin/zsh" ),
135+ []byte ("#!/usr/bin/env bash" ),
136+ []byte ("#!/usr/bin/env sh" ),
137+ []byte ("#!/usr/bin/env zsh" ),
138+ }
139+ shellPatterns = [][]byte {
140+ []byte ("; then\n " ),
141+ []byte ("; do\n " ),
142+ []byte ("esac" ),
143+ []byte ("fi\n " ),
144+ []byte ("done\n " ),
145+ []byte ("$((" ),
146+ []byte ("$(" ),
147+ []byte ("${" ),
148+ []byte ("<<EOF" ),
149+ []byte ("<<-EOF" ),
150+ []byte ("<<'EOF'" ),
151+ []byte ("|| exit" ),
152+ []byte ("&& exit" ),
153+ []byte ("set -e" ),
154+ []byte ("set -x" ),
155+ []byte ("set -u" ),
156+ []byte ("set -o " ),
157+ []byte ("export PATH" ),
158+ }
122159)
123160
124161// IsSupportedArchive returns whether a path can be processed by our archive extractor.
@@ -203,12 +240,15 @@ func makeFileType(path string, ext string, mime string) *FileType {
203240 ext = strings .TrimPrefix (ext , "." )
204241
205242 // Archives are supported
243+ if _ , ok := ArchiveMap [ext ]; ok {
244+ return & FileType {Ext : ext , MIME : mime }
245+ }
206246 if _ , ok := ArchiveMap [GetExt (path )]; ok {
207247 return & FileType {Ext : ext , MIME : mime }
208248 }
209249
210250 // typically, JSON and YAML files are data files only scanned via --all, but we want to support the NPM ecosystem
211- if strings .HasSuffix (path , "package.json" ) || strings .HasSuffix (path , "package-lock.json" ) {
251+ if strings .HasSuffix (path , "package.json" ) || strings .HasSuffix (path , "package-lock.json" ) || strings . Contains ( path , ".js.map" ) {
212252 return & FileType {
213253 Ext : ext ,
214254 MIME : "application/json" ,
@@ -248,6 +288,36 @@ func makeFileType(path string, ext string, mime string) *FileType {
248288 return nil
249289}
250290
291+ // isLikelyShellScript determines if file content is likely a shell script
292+ // and focuses on multiple criteria to reduce false-positives.
293+ func isLikelyShellScript (fc []byte , path string ) bool {
294+ if slices .ContainsFunc (shellShebangs , func (shebang []byte ) bool {
295+ return bytes .HasPrefix (fc , shebang )
296+ }) {
297+ return true
298+ }
299+
300+ if strings .HasSuffix (path , "profile" ) ||
301+ strings .HasSuffix (path , ".bashrc" ) ||
302+ strings .HasSuffix (path , ".bash_profile" ) ||
303+ strings .HasSuffix (path , ".zshrc" ) ||
304+ strings .HasSuffix (path , ".zsh_profile" ) {
305+ return true
306+ }
307+
308+ matches := 0
309+ for _ , pattern := range shellPatterns {
310+ if bytes .Contains (fc , pattern ) {
311+ matches ++
312+ if matches >= 2 {
313+ return true
314+ }
315+ }
316+ }
317+
318+ return false
319+ }
320+
251321// File detects what kind of program this file might be.
252322func File (ctx context.Context , path string ) (* FileType , error ) {
253323 // Follow symlinks and return cleanly if the target does not exist
@@ -293,7 +363,8 @@ func File(ctx context.Context, path string) (*FileType, error) {
293363 // default strategy: mimetype (no limit for improved magic type detection)
294364 mimetype .SetLimit (0 ) // a limit of 0 means the whole input file will be used
295365 mtype := mimetype .Detect (fc )
296- if ft := makeFileType (path , mtype .Extension (), mtype .String ()); ft != nil {
366+ ext , mime := mtype .Extension (), mtype .String ()
367+ if ft := makeFileType (path , ext , mime ); ft != nil {
297368 return ft , nil
298369 }
299370
@@ -302,6 +373,17 @@ func File(ctx context.Context, path string) (*FileType, error) {
302373 return mtype , nil
303374 }
304375
376+ pathExt := strings .TrimPrefix (GetExt (path ), "." )
377+
378+ if _ , pathExtKnown := supportedKind [pathExt ]; pathExtKnown {
379+ return nil , nil
380+ }
381+
382+ if mime == "application/octet-stream" && len (pathExt ) >= 2 {
383+ return nil , nil
384+ }
385+
386+ // Content-based detection for files with no recognized extension or mimetype
305387 switch {
306388 case bytes .HasPrefix (fc , elfMagic ):
307389 return Path (".elf" ), nil
@@ -311,18 +393,7 @@ func File(ctx context.Context, path string) (*FileType, error) {
311393 return Path (".py" ), nil
312394 case bytes .Contains (fc , []byte (" = require(" )):
313395 return Path (".js" ), nil
314- case bytes .HasPrefix (fc , []byte ("#!/bin/ash" )) ||
315- bytes .HasPrefix (fc , []byte ("#!/bin/bash" )) ||
316- bytes .HasPrefix (fc , []byte ("#!/bin/fish" )) ||
317- bytes .HasPrefix (fc , []byte ("#!/bin/sh" )) ||
318- bytes .HasPrefix (fc , []byte ("#!/bin/zsh" )) ||
319- bytes .Contains (fc , []byte ("if [" )) ||
320- bytes .Contains (fc , []byte ("if !" )) ||
321- bytes .Contains (fc , []byte ("echo " )) ||
322- bytes .Contains (fc , []byte ("grep " )) ||
323- bytes .Contains (fc , []byte ("; then" )) ||
324- bytes .Contains (fc , []byte ("export " )) ||
325- strings .HasSuffix (path , "profile" ):
396+ case isLikelyShellScript (fc , path ):
326397 return Path (".sh" ), nil
327398 case bytes .HasPrefix (fc , []byte ("#!" )):
328399 return Path (".script" ), nil
@@ -347,7 +418,7 @@ func initializeHeaderPool() {
347418
348419// Path returns a filetype based strictly on file path.
349420func Path (path string ) * FileType {
350- ext := strings .ReplaceAll ( filepath . Ext (path ), "." , " " )
421+ ext := strings .TrimPrefix ( GetExt (path ), "." )
351422 mime := supportedKind [ext ]
352423 return makeFileType (path , ext , mime )
353424}
0 commit comments