33
44class FileDetector
55{
6+ private const string NO_EXTENSION_KEY = '_%any%_ ' ;
7+
68 public bool $ FilterEvidenceMatches = true ;
79
810 /** @var string[] */
911 public array $ Map = [];
1012
11- /** @var string[] */
13+ /** @var array< string, string[]> */
1214 public array $ Regexes = [];
1315
1416 /**
@@ -34,11 +36,7 @@ public function __construct( ?array $Rulesets, ?string $Path )
3436
3537 foreach ( $ Rulesets as $ Type => $ Rules )
3638 {
37- $ Regexes =
38- [
39- 0 => [],
40- 1 => [],
41- ];
39+ $ Regexes = [];
4240
4341 foreach ( $ Rules as $ Name => $ RuleRegexes )
4442 {
@@ -52,33 +50,72 @@ public function __construct( ?array $Rulesets, ?string $Path )
5250 $ this ->Map [ $ MarkIndex ] = "{$ Type }. {$ Name }" ;
5351
5452 $ Regex = strtolower ( $ Regex );
53+ $ HasSimpleExtension = preg_match ( '/ \\\.(?:(?<Extension>\w+)|\(\?:(?<MultiExtension>[\w\|]+)\))\$$/ ' , $ Regex , $ SimpleExtension ) === 1 ;
54+ $ HasCommonPrefix = false ;
5555
56+ // Regexes that match start of the file (root, or a folder) will be put into a separate regex
5657 if ( str_starts_with ( $ Regex , $ CommonFolderPrefix ) )
5758 {
58- $ Regexes [ 0 ][] = substr ( $ Regex , strlen ( $ CommonFolderPrefix ) ) . '(*: ' . $ MarkIndex . ') ' ;
59+ $ HasCommonPrefix = true ;
60+ $ Regex = substr ( $ Regex , strlen ( $ CommonFolderPrefix ) ) . '(*: ' . $ MarkIndex . ') ' ;
5961 }
6062 else
6163 {
62- $ Regexes [ 1 ][] = $ Regex . '(*: ' . $ MarkIndex . ') ' ;
64+ $ Regex .= '(*: ' . $ MarkIndex . ') ' ;
65+ }
66+
67+ // Regexes that end with a file extension will be put into an array based on the extension
68+ // to reduce the amount of regexes needed to match for each file path
69+ if ( $ HasSimpleExtension )
70+ {
71+ // If regex ends with "\.dll$" then it's a single extension,
72+ // If regex ends with "\.(?:dylib|dll)$" then it's multi.
73+ $ Extensions = empty ( $ SimpleExtension [ 'MultiExtension ' ] ) ? [ $ SimpleExtension [ 'Extension ' ] ] : explode ( '| ' , $ SimpleExtension [ 'MultiExtension ' ] );
74+
75+ foreach ( $ Extensions as $ Extension )
76+ {
77+ if ( $ HasCommonPrefix )
78+ {
79+ $ Regexes [ $ Extension ][ 0 ][] = $ Regex ;
80+ }
81+ else
82+ {
83+ $ Regexes [ $ Extension ][ 1 ][] = $ Regex ;
84+ }
85+ }
86+ }
87+ else if ( $ HasCommonPrefix )
88+ {
89+ $ Regexes [ self ::NO_EXTENSION_KEY ][ 0 ][] = $ Regex ;
90+ }
91+ else
92+ {
93+ $ Regexes [ self ::NO_EXTENSION_KEY ][ 1 ][] = $ Regex ;
6394 }
6495
6596 $ MarkIndex ++;
6697 }
6798 }
6899
69- if ( ! empty ( $ Regexes[ 0 ] ) )
100+ foreach ( $ Regexes as $ Extension => $ RegexesForExtension )
70101 {
71- sort ( $ Regexes [ 0 ] );
72- $ this -> Regexes [] = ' ~ ' . $ CommonFolderPrefix . ' (?: ' . implode ( ' | ' , $ Regexes [ 0 ] ) . ' )~i ' ;
73- }
102+ if ( ! empty ( $ RegexesForExtension [ 0 ] ) )
103+ {
104+ sort ( $ RegexesForExtension [ 0 ] );
74105
75- if ( !empty ( $ Regexes [ 1 ] ) )
76- {
77- sort ( $ Regexes [ 1 ] );
106+ $ this ->Regexes [ $ Extension ][] = '~ ' . $ CommonFolderPrefix . '(?: ' . implode ( '| ' , $ RegexesForExtension [ 0 ] ) . ')~i ' ;
107+ }
108+
109+ if ( !empty ( $ RegexesForExtension [ 1 ] ) )
110+ {
111+ sort ( $ RegexesForExtension [ 1 ] );
78112
79- $ this ->Regexes [] = '~ ' . implode ( '| ' , $ Regexes [ 1 ] ) . '~i ' ;
113+ $ this ->Regexes [ $ Extension ][] = '~ ' . implode ( '| ' , $ RegexesForExtension [ 1 ] ) . '~i ' ;
114+ }
80115 }
81116 }
117+
118+ ksort ( $ this ->Regexes );
82119 }
83120
84121 /**
@@ -92,17 +129,20 @@ public function GetMatchedFiles( array $Files ) : array
92129
93130 foreach ( $ Files as $ Path )
94131 {
95- foreach ( $ this ->Regexes as $ Regex )
132+ foreach ( $ this ->Regexes as $ RegexesForExtension )
96133 {
97- if ( preg_match ( $ Regex , $ Path , $ RegexMatches ) === 1 )
134+ foreach ( $ RegexesForExtension as $ Regex )
98135 {
99- $ Match = $ this ->Map [ $ RegexMatches [ 'MARK ' ] ];
136+ if ( preg_match ( $ Regex , $ Path , $ RegexMatches ) === 1 )
137+ {
138+ $ Match = $ this ->Map [ $ RegexMatches [ 'MARK ' ] ];
100139
101- $ Matches [] =
102- [
103- 'File ' => $ Path ,
104- 'Match ' => $ Match ,
105- ];
140+ $ Matches [] =
141+ [
142+ 'File ' => $ Path ,
143+ 'Match ' => $ Match ,
144+ ];
145+ }
106146 }
107147 }
108148 }
@@ -121,7 +161,15 @@ public function GetMatchesForFileList( array $Files ) : array
121161
122162 foreach ( $ Files as $ Path )
123163 {
124- foreach ( $ this ->Regexes as $ Regex )
164+ $ RegexesToTry = $ this ->Regexes [ self ::NO_EXTENSION_KEY ];
165+ $ Extension = strtolower ( pathinfo ( $ Path , PATHINFO_EXTENSION ) );
166+
167+ if ( isset ( $ this ->Regexes [ $ Extension ] ) )
168+ {
169+ $ RegexesToTry = [ ...$ this ->Regexes [ $ Extension ], ...$ RegexesToTry ];
170+ }
171+
172+ foreach ( $ RegexesToTry as $ Regex )
125173 {
126174 if ( preg_match ( $ Regex , $ Path , $ RegexMatches ) === 1 )
127175 {
0 commit comments