Skip to content

Commit c7bcfd5

Browse files
committed
Split regexes to match based on the file extension
Support splitting `(?:dylib|dll|so)$`
1 parent 7335c74 commit c7bcfd5

File tree

1 file changed

+73
-25
lines changed

1 file changed

+73
-25
lines changed

tests/FileDetector.php

Lines changed: 73 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,14 @@
33

44
class FileDetector
55
{
6+
private const string NO_EXTENSION_KEY = '_%any%_';
7+
68
public bool $FilterEvidenceMatches = true;
79

810
/** @var string[] */
911
public array $Map = [];
1012

11-
/** @var string[] */
13+
/** @var array<string, string[]> */
1214
public array $Regexes = [];
1315

1416
/**
@@ -34,11 +36,7 @@ public function __construct( ?array $Rulesets, ?string $Path )
3436

3537
foreach( $Rulesets as $Type => $Rules )
3638
{
37-
$Regexes =
38-
[
39-
0 => [],
40-
1 => [],
41-
];
39+
$Regexes = [];
4240

4341
foreach( $Rules as $Name => $RuleRegexes )
4442
{
@@ -52,33 +50,72 @@ public function __construct( ?array $Rulesets, ?string $Path )
5250
$this->Map[ $MarkIndex ] = "{$Type}.{$Name}";
5351

5452
$Regex = strtolower( $Regex );
53+
$HasSimpleExtension = preg_match( '/\\\.(?:(?<Extension>\w+)|\(\?:(?<MultiExtension>[\w\|]+)\))\$$/', $Regex, $SimpleExtension ) === 1;
54+
$HasCommonPrefix = false;
5555

56+
// Regexes that match start of the file (root, or a folder) will be put into a separate regex
5657
if( str_starts_with( $Regex, $CommonFolderPrefix ) )
5758
{
58-
$Regexes[ 0 ][] = substr( $Regex, strlen( $CommonFolderPrefix ) ) . '(*:' . $MarkIndex . ')';
59+
$HasCommonPrefix = true;
60+
$Regex = substr( $Regex, strlen( $CommonFolderPrefix ) ) . '(*:' . $MarkIndex . ')';
5961
}
6062
else
6163
{
62-
$Regexes[ 1 ][] = $Regex . '(*:' . $MarkIndex . ')';
64+
$Regex .= '(*:' . $MarkIndex . ')';
65+
}
66+
67+
// Regexes that end with a file extension will be put into an array based on the extension
68+
// to reduce the amount of regexes needed to match for each file path
69+
if( $HasSimpleExtension )
70+
{
71+
// If regex ends with "\.dll$" then it's a single extension,
72+
// If regex ends with "\.(?:dylib|dll)$" then it's multi.
73+
$Extensions = empty( $SimpleExtension[ 'MultiExtension' ] ) ? [ $SimpleExtension[ 'Extension' ] ] : explode( '|', $SimpleExtension[ 'MultiExtension' ] );
74+
75+
foreach( $Extensions as $Extension )
76+
{
77+
if( $HasCommonPrefix )
78+
{
79+
$Regexes[ $Extension ][ 0 ][] = $Regex;
80+
}
81+
else
82+
{
83+
$Regexes[ $Extension ][ 1 ][] = $Regex;
84+
}
85+
}
86+
}
87+
else if( $HasCommonPrefix )
88+
{
89+
$Regexes[ self::NO_EXTENSION_KEY ][ 0 ][] = $Regex;
90+
}
91+
else
92+
{
93+
$Regexes[ self::NO_EXTENSION_KEY ][ 1 ][] = $Regex;
6394
}
6495

6596
$MarkIndex++;
6697
}
6798
}
6899

69-
if( !empty( $Regexes[ 0 ] ) )
100+
foreach( $Regexes as $Extension => $RegexesForExtension )
70101
{
71-
sort( $Regexes[ 0 ] );
72-
$this->Regexes[] = '~' . $CommonFolderPrefix . '(?:' . implode( '|', $Regexes[ 0 ] ) . ')~i';
73-
}
102+
if( !empty( $RegexesForExtension[ 0 ] ) )
103+
{
104+
sort( $RegexesForExtension[ 0 ] );
74105

75-
if( !empty( $Regexes[ 1 ] ) )
76-
{
77-
sort( $Regexes[ 1 ] );
106+
$this->Regexes[ $Extension ][] = '~' . $CommonFolderPrefix . '(?:' . implode( '|', $RegexesForExtension[ 0 ] ) . ')~i';
107+
}
108+
109+
if( !empty( $RegexesForExtension[ 1 ] ) )
110+
{
111+
sort( $RegexesForExtension[ 1 ] );
78112

79-
$this->Regexes[] = '~' . implode( '|', $Regexes[ 1 ] ) . '~i';
113+
$this->Regexes[ $Extension ][] = '~' . implode( '|', $RegexesForExtension[ 1 ] ) . '~i';
114+
}
80115
}
81116
}
117+
118+
ksort( $this->Regexes );
82119
}
83120

84121
/**
@@ -92,17 +129,20 @@ public function GetMatchedFiles( array $Files ) : array
92129

93130
foreach( $Files as $Path )
94131
{
95-
foreach( $this->Regexes as $Regex )
132+
foreach( $this->Regexes as $RegexesForExtension )
96133
{
97-
if( preg_match( $Regex, $Path, $RegexMatches ) === 1 )
134+
foreach( $RegexesForExtension as $Regex )
98135
{
99-
$Match = $this->Map[ $RegexMatches[ 'MARK' ] ];
136+
if( preg_match( $Regex, $Path, $RegexMatches ) === 1 )
137+
{
138+
$Match = $this->Map[ $RegexMatches[ 'MARK' ] ];
100139

101-
$Matches[] =
102-
[
103-
'File' => $Path,
104-
'Match' => $Match,
105-
];
140+
$Matches[] =
141+
[
142+
'File' => $Path,
143+
'Match' => $Match,
144+
];
145+
}
106146
}
107147
}
108148
}
@@ -121,7 +161,15 @@ public function GetMatchesForFileList( array $Files ) : array
121161

122162
foreach( $Files as $Path )
123163
{
124-
foreach( $this->Regexes as $Regex )
164+
$RegexesToTry = $this->Regexes[ self::NO_EXTENSION_KEY ];
165+
$Extension = strtolower( pathinfo( $Path, PATHINFO_EXTENSION ) );
166+
167+
if( isset( $this->Regexes[ $Extension ] ) )
168+
{
169+
$RegexesToTry = [ ...$this->Regexes[ $Extension ], ...$RegexesToTry ];
170+
}
171+
172+
foreach( $RegexesToTry as $Regex )
125173
{
126174
if( preg_match( $Regex, $Path, $RegexMatches ) === 1 )
127175
{

0 commit comments

Comments
 (0)