1212use function array_key_last ;
1313use function array_pop ;
1414use function count ;
15+ use function ctype_alpha ;
1516use function preg_match ;
1617use function preg_quote ;
1718use function sprintf ;
3940 private const T_GREEDY_GLOBSTAR = 'greedy_globstar ' ;
4041 private const T_QUERY = 'query ' ;
4142 private const T_GLOBSTAR = 'globstar ' ;
43+ private const T_COLON = 'colon ' ;
44+ private const T_CHAR_CLASS = 'char_class ' ;
4245
4346 public static function match (string $ path , FileMatcherPattern $ pattern ): bool
4447 {
@@ -80,6 +83,7 @@ public static function toRegEx($glob, $flags = 0): string
8083 self ::T_BRACKET_OPEN => '[ ' ,
8184 self ::T_BRACKET_CLOSE => '] ' ,
8285 self ::T_HYPHEN => '- ' ,
86+ self ::T_CHAR_CLASS => '[: ' . $ token [1 ] . ':] ' ,
8387 default => '' ,
8488 };
8589 }
@@ -121,6 +125,7 @@ private static function tokenize(string $glob): array
121125 '* ' => [self ::T_ASTERIX , $ c ],
122126 '/ ' => [self ::T_SLASH , $ c ],
123127 '\\' => [self ::T_BACKSLASH , $ c ],
128+ ': ' => [self ::T_COLON , $ c ],
124129 default => [self ::T_CHAR , $ c ],
125130 };
126131 }
@@ -135,13 +140,14 @@ private static function tokenize(string $glob): array
135140 */
136141 private static function processTokens (array $ tokens ): array
137142 {
138- $ resolved = [];
139- $ escaped = false ;
143+ $ resolved = [];
144+ $ escaped = false ;
140145 $ bracketOpen = false ;
141- $ brackets = [];
146+ $ brackets = [];
142147
143148 for ($ offset = 0 ; $ offset < count ($ tokens ); $ offset ++) {
144149 [$ type , $ char ] = $ tokens [$ offset ];
150+ $ nextType = $ tokens [$ offset + 1 ][0 ] ?? null ;
145151
146152 if ($ type === self ::T_BACKSLASH && false === $ escaped ) {
147153 $ escaped = true ;
@@ -205,27 +211,50 @@ private static function processTokens(array $tokens): array
205211 continue ;
206212 }
207213
208- if ($ type === self ::T_BRACKET_OPEN && $ tokens [ $ offset + 1 ][ 0 ] === self ::T_BRACKET_CLOSE ) {
214+ if ($ type === self ::T_BRACKET_OPEN && $ nextType === self ::T_BRACKET_CLOSE ) {
209215 $ bracketOpen = true ;
210- $ resolved [] = [self ::T_BRACKET_OPEN , '[ ' ];
211- $ brackets [] = array_key_last ($ resolved );
212- $ resolved [] = [self ::T_CHAR , '] ' ];
213- $ offset += 1 ;
216+ $ resolved [] = [self ::T_BRACKET_OPEN , '[ ' ];
217+ $ brackets [] = array_key_last ($ resolved );
218+ $ resolved [] = [self ::T_CHAR , '] ' ];
219+ $ offset++ ;
214220
215221 continue ;
216222 }
217223
224+ if ($ bracketOpen && $ type === self ::T_BRACKET_OPEN && $ nextType === self ::T_COLON ) {
225+ // this looks like a named [:character:] class
226+ $ class = '' ;
227+ $ offset += 2 ;
228+
229+ // parse the character class name
230+ while (ctype_alpha ($ tokens [$ offset ][1 ])) {
231+ $ class .= $ tokens [$ offset ++][1 ];
232+ }
233+
234+ // if followed by a `:` then it's a character class
235+ if ($ tokens [$ offset ][0 ] === self ::T_COLON ) {
236+ $ offset ++;
237+ $ resolved [] = [self ::T_CHAR_CLASS , $ class ];
238+
239+ continue ;
240+ }
241+
242+ // otherwise it's a harmless literal
243+ $ resolved [] = [self ::T_CHAR , ': ' . $ class ];
244+ }
245+
218246 if ($ bracketOpen === true && $ type === self ::T_BRACKET_OPEN ) {
219247 // if bracket is already open, interpret everything as a
220248 // literal char
221249 $ resolved [] = [self ::T_CHAR , $ char ];
250+
222251 continue ;
223252 }
224253
225254 if ($ bracketOpen === false && $ type === self ::T_BRACKET_OPEN ) {
226255 $ bracketOpen = true ;
227- $ resolved [] = [$ type , $ char ];
228- $ brackets [] = array_key_last ($ resolved );
256+ $ resolved [] = [$ type , $ char ];
257+ $ brackets [] = array_key_last ($ resolved );
229258
230259 continue ;
231260 }
0 commit comments