9
9
*/
10
10
namespace PHPUnit \Util ;
11
11
12
- use InvalidArgumentException ;
12
+ use PHPUnit \ Exception ;
13
13
use RuntimeException ;
14
14
15
15
/**
16
16
* @no-named-arguments Parameter names are not covered by the backward compatibility promise for PHPUnit
17
17
*
18
18
* @internal This class is not covered by the backward compatibility promise for PHPUnit
19
+ * @phpstan-type token array{self::T_*,string}
19
20
*/
20
21
final readonly class FileMatcher
21
22
{
23
+ private const T_BRACKET_OPEN = 'bracket_open ' ;
24
+ private const T_BRACKET_CLOSE = 'bracket_close ' ;
25
+ private const T_BANG = 'bang ' ;
26
+ private const T_HYPHEN = 'hyphen ' ;
27
+ private const T_ASTERIX = 'asterix ' ;
28
+ private const T_SLASH = 'slash ' ;
29
+ private const T_BACKSLASH = 'backslash ' ;
30
+ private const T_CHAR = 'char ' ;
31
+ private const T_GLOBSTAR = 'globstar ' ;
32
+ private const T_QUERY = 'query ' ;
33
+
34
+
22
35
public static function match (string $ path , FileMatcherPattern $ pattern ): bool
23
36
{
24
37
self ::assertIsAbsolute ($ path );
@@ -37,101 +50,27 @@ public static function toRegEx($glob, $flags = 0): string
37
50
{
38
51
self ::assertIsAbsolute ($ glob );
39
52
40
- $ regex = '' ;
41
- $ length = strlen ($ glob );
42
-
43
- $ brackets = [];
53
+ $ tokens = self ::tokenize ($ glob );
44
54
45
- for ($ i = 0 ; $ i < $ length ; ++$ i ) {
46
- $ c = $ glob [$ i ];
47
-
48
- switch ($ c ) {
49
- case '[ ' :
50
- $ regex .= '[ ' ;
51
- $ brackets [] = $ i ;
52
- break ;
53
- case '] ' :
54
- $ regex .= '] ' ;
55
- array_pop ($ brackets );
56
- break ;
57
- case '? ' :
58
- $ regex .= '. ' ;
59
- break ;
60
- case '- ' :
61
- $ regex .= '- ' ;
62
- break ;
63
- case '! ' :
64
- // complementation/negation: taking into account escaped square brackets
65
- if ($ glob [$ i - 1 ] === '[ ' && ($ glob [$ i - 2 ] !== '\\' || ($ glob [$ i -2 ] === '\\' && $ glob [$ i - 3 ] === '\\' ))) {
66
- $ regex .= '^ ' ;
67
- break ;
68
- }
69
-
70
- // the PHPUnit file iterator will match all
71
- // files within a wildcard, not just until the
72
- // next directory separator
73
- case '* ' :
74
- // if this is a ** but it is NOT preceded with `/` then
75
- // it is not a globstar and just interpret it as a literal
76
- if (($ glob [$ i + 1 ] ?? null ) === '* ' ) {
77
- $ regex .= '\*\* ' ;
78
- $ i ++;
79
- break ;
80
- }
81
- $ regex .= '.* ' ;
82
- break ;
83
- case '/ ' :
84
- // code could be refactored - handle globstars
85
- if (isset ($ glob [$ i + 3 ]) && '**/ ' === $ glob [$ i + 1 ].$ glob [$ i + 2 ].$ glob [$ i + 3 ]) {
86
- $ regex .= '/([^/]+/)* ' ;
87
- $ i += 3 ;
88
- break ;
89
- }
90
- if ((!isset ($ glob [$ i + 3 ])) && isset ($ glob [$ i + 2 ]) && '** ' === $ glob [$ i + 1 ].$ glob [$ i + 2 ]) {
91
- $ regex .= '.* ' ;
92
- $ i += 2 ;
93
- break ;
94
- }
95
- $ regex .= '/ ' ;
96
- break ;
97
- case '\\' :
98
- // escape characters - this code is copy/pasted from webmozart/glob and
99
- // needs revision
100
- if (isset ($ glob [$ i + 1 ])) {
101
- switch ($ glob [$ i + 1 ]) {
102
- case '* ' :
103
- case '? ' :
104
- case '[ ' :
105
- case '] ' :
106
- case '\\' :
107
- $ regex .= '\\' .$ glob [$ i + 1 ];
108
- ++$ i ;
109
- break ;
110
-
111
- default :
112
- $ regex .= '\\\\' ;
113
- }
114
- } else {
115
- $ regex .= '\\\\' ;
116
- }
117
- break ;
118
-
119
- default :
120
- $ regex .= preg_quote ($ c );
121
- break ;
122
- }
123
- }
55
+ $ regex = '' ;
124
56
125
- // escape unterminated brackets
126
- $ bracketOffset = 0 ;
127
- foreach ($ brackets as $ offset ) {
128
- $ regex = substr ($ regex , 0 , $ offset + $ bracketOffset ) . '\\' . substr ($ regex , $ offset + $ bracketOffset );
129
- $ bracketOffset ++;
57
+ foreach ($ tokens as $ token ) {
58
+ $ type = $ token [0 ];
59
+ $ regex .= match ($ type ) {
60
+ // literal char
61
+ self ::T_CHAR => $ token [1 ] ?? throw new Exception ('Expected char token to have a value ' ),
62
+
63
+ // literal directory separator
64
+ self ::T_SLASH => '/ ' ,
65
+ self ::T_QUERY => '. ' ,
66
+
67
+ // match any segment up until the next directory separator
68
+ self ::T_ASTERIX => '[^/]* ' ,
69
+ self ::T_GLOBSTAR => '.* ' ,
70
+ default => '' ,
71
+ };
130
72
}
131
73
132
- $ regex .= '(/|$) ' ;
133
-
134
- dump ($ regex );
135
74
return '{^ ' .$ regex .'} ' ;
136
75
}
137
76
@@ -144,4 +83,64 @@ private static function assertIsAbsolute(string $path): void
144
83
));
145
84
}
146
85
}
86
+
87
+ /**
88
+ * @return list<token>
89
+ */
90
+ private static function tokenize (string $ glob ): array
91
+ {
92
+ $ length = strlen ($ glob );
93
+
94
+ $ tokens = [];
95
+
96
+ for ($ i = 0 ; $ i < $ length ; ++$ i ) {
97
+ $ c = $ glob [$ i ];
98
+
99
+ $ tokens [] = match ($ c ) {
100
+ '[ ' => [self ::T_BRACKET_OPEN , $ c ],
101
+ '] ' => [self ::T_BRACKET_CLOSE , $ c ],
102
+ '? ' => [self ::T_QUERY , $ c ],
103
+ '- ' => [self ::T_HYPHEN , $ c ],
104
+ '! ' => [self ::T_BANG , $ c ],
105
+ '* ' => [self ::T_ASTERIX , $ c ],
106
+ '/ ' => [self ::T_SLASH , $ c ],
107
+ '\\' => [self ::T_BACKSLASH , $ c ],
108
+ default => [self ::T_CHAR , $ c ],
109
+ };
110
+ }
111
+
112
+ return self ::processTokens ($ tokens );
113
+ }
114
+
115
+ /**
116
+ * @param list<token> $tokens
117
+ * @return list<token>
118
+ */
119
+ private static function processTokens (array $ tokens ): array
120
+ {
121
+ $ resolved = [];
122
+ $ escaped = false ;
123
+ for ($ offset = 0 ; $ offset < count ($ tokens ); $ offset ++) {
124
+ [$ type , $ char ] = $ tokens [$ offset ];
125
+
126
+ if ($ type === self ::T_BACKSLASH && false === $ escaped ) {
127
+ $ escaped = true ;
128
+ continue ;
129
+ }
130
+
131
+ if ($ escaped === true ) {
132
+ $ resolved [] = [self ::T_CHAR , $ char ];
133
+ continue ;
134
+ }
135
+
136
+ if ($ type === self ::T_ASTERIX && ($ tokens [$ offset + 1 ] ?? null ) === self ::T_ASTERIX ) {
137
+ $ offset ++;
138
+ $ resolved [] = [self ::T_GLOBSTAR , '** ' ];
139
+ continue ;
140
+ }
141
+
142
+ $ resolved [] = [$ type , $ char ];
143
+ }
144
+ return $ resolved ;
145
+ }
147
146
}
0 commit comments