Skip to content

Commit ffdaf58

Browse files
committed
Tokenizing
1 parent 0803322 commit ffdaf58

File tree

1 file changed

+91
-92
lines changed

1 file changed

+91
-92
lines changed

src/Util/FileMatcher.php

Lines changed: 91 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,29 @@
99
*/
1010
namespace PHPUnit\Util;
1111

12-
use InvalidArgumentException;
12+
use PHPUnit\Exception;
1313
use RuntimeException;
1414

1515
/**
1616
* @no-named-arguments Parameter names are not covered by the backward compatibility promise for PHPUnit
1717
*
1818
* @internal This class is not covered by the backward compatibility promise for PHPUnit
19+
* @phpstan-type token array{self::T_*,string}
1920
*/
2021
final readonly class FileMatcher
2122
{
23+
private const T_BRACKET_OPEN = 'bracket_open';
24+
private const T_BRACKET_CLOSE = 'bracket_close';
25+
private const T_BANG = 'bang';
26+
private const T_HYPHEN = 'hyphen';
27+
private const T_ASTERIX = 'asterix';
28+
private const T_SLASH = 'slash';
29+
private const T_BACKSLASH = 'backslash';
30+
private const T_CHAR = 'char';
31+
private const T_GLOBSTAR = 'globstar';
32+
private const T_QUERY = 'query';
33+
34+
2235
public static function match(string $path, FileMatcherPattern $pattern): bool
2336
{
2437
self::assertIsAbsolute($path);
@@ -37,101 +50,27 @@ public static function toRegEx($glob, $flags = 0): string
3750
{
3851
self::assertIsAbsolute($glob);
3952

40-
$regex = '';
41-
$length = strlen($glob);
42-
43-
$brackets = [];
53+
$tokens = self::tokenize($glob);
4454

45-
for ($i = 0; $i < $length; ++$i) {
46-
$c = $glob[$i];
47-
48-
switch ($c) {
49-
case '[':
50-
$regex .= '[';
51-
$brackets[] = $i;
52-
break;
53-
case ']':
54-
$regex .= ']';
55-
array_pop($brackets);
56-
break;
57-
case '?':
58-
$regex .= '.';
59-
break;
60-
case '-':
61-
$regex .= '-';
62-
break;
63-
case '!':
64-
// complementation/negation: taking into account escaped square brackets
65-
if ($glob[$i - 1] === '[' && ($glob[$i - 2] !== '\\' || ($glob[$i -2] === '\\' && $glob[$i - 3] === '\\'))) {
66-
$regex .= '^';
67-
break;
68-
}
69-
70-
// the PHPUnit file iterator will match all
71-
// files within a wildcard, not just until the
72-
// next directory separator
73-
case '*':
74-
// if this is a ** but it is NOT preceded with `/` then
75-
// it is not a globstar and just interpret it as a literal
76-
if (($glob[$i + 1] ?? null) === '*') {
77-
$regex .= '\*\*';
78-
$i++;
79-
break;
80-
}
81-
$regex .= '.*';
82-
break;
83-
case '/':
84-
// code could be refactored - handle globstars
85-
if (isset($glob[$i + 3]) && '**/' === $glob[$i + 1].$glob[$i + 2].$glob[$i + 3]) {
86-
$regex .= '/([^/]+/)*';
87-
$i += 3;
88-
break;
89-
}
90-
if ((!isset($glob[$i + 3])) && isset($glob[$i + 2]) && '**' === $glob[$i + 1].$glob[$i + 2]) {
91-
$regex .= '.*';
92-
$i += 2;
93-
break;
94-
}
95-
$regex .= '/';
96-
break;
97-
case '\\':
98-
// escape characters - this code is copy/pasted from webmozart/glob and
99-
// needs revision
100-
if (isset($glob[$i + 1])) {
101-
switch ($glob[$i + 1]) {
102-
case '*':
103-
case '?':
104-
case '[':
105-
case ']':
106-
case '\\':
107-
$regex .= '\\'.$glob[$i + 1];
108-
++$i;
109-
break;
110-
111-
default:
112-
$regex .= '\\\\';
113-
}
114-
} else {
115-
$regex .= '\\\\';
116-
}
117-
break;
118-
119-
default:
120-
$regex .= preg_quote($c);
121-
break;
122-
}
123-
}
55+
$regex = '';
12456

125-
// escape unterminated brackets
126-
$bracketOffset = 0;
127-
foreach ($brackets as $offset) {
128-
$regex = substr($regex, 0, $offset + $bracketOffset) . '\\' . substr($regex, $offset + $bracketOffset);
129-
$bracketOffset++;
57+
foreach ($tokens as $token) {
58+
$type = $token[0];
59+
$regex .= match ($type) {
60+
// literal char
61+
self::T_CHAR => $token[1] ?? throw new Exception('Expected char token to have a value'),
62+
63+
// literal directory separator
64+
self::T_SLASH => '/',
65+
self::T_QUERY => '.',
66+
67+
// match any segment up until the next directory separator
68+
self::T_ASTERIX => '[^/]*',
69+
self::T_GLOBSTAR => '.*',
70+
default => '',
71+
};
13072
}
13173

132-
$regex .= '(/|$)';
133-
134-
dump($regex);
13574
return '{^'.$regex.'}';
13675
}
13776

@@ -144,4 +83,64 @@ private static function assertIsAbsolute(string $path): void
14483
));
14584
}
14685
}
86+
87+
/**
88+
* @return list<token>
89+
*/
90+
private static function tokenize(string $glob): array
91+
{
92+
$length = strlen($glob);
93+
94+
$tokens = [];
95+
96+
for ($i = 0; $i < $length; ++$i) {
97+
$c = $glob[$i];
98+
99+
$tokens[] = match ($c) {
100+
'[' => [self::T_BRACKET_OPEN, $c],
101+
']' => [self::T_BRACKET_CLOSE, $c],
102+
'?' => [self::T_QUERY, $c],
103+
'-' => [self::T_HYPHEN, $c],
104+
'!' => [self::T_BANG, $c],
105+
'*' => [self::T_ASTERIX, $c],
106+
'/' => [self::T_SLASH, $c],
107+
'\\' => [self::T_BACKSLASH, $c],
108+
default => [self::T_CHAR, $c],
109+
};
110+
}
111+
112+
return self::processTokens($tokens);
113+
}
114+
115+
/**
116+
* @param list<token> $tokens
117+
* @return list<token>
118+
*/
119+
private static function processTokens(array $tokens): array
120+
{
121+
$resolved = [];
122+
$escaped = false;
123+
for ($offset = 0; $offset < count($tokens); $offset++) {
124+
[$type, $char] = $tokens[$offset];
125+
126+
if ($type === self::T_BACKSLASH && false === $escaped) {
127+
$escaped = true;
128+
continue;
129+
}
130+
131+
if ($escaped === true) {
132+
$resolved[] = [self::T_CHAR, $char];
133+
continue;
134+
}
135+
136+
if ($type === self::T_ASTERIX && ($tokens[$offset + 1] ?? null) === self::T_ASTERIX) {
137+
$offset++;
138+
$resolved[] = [self::T_GLOBSTAR, '**'];
139+
continue;
140+
}
141+
142+
$resolved[] = [$type, $char];
143+
}
144+
return $resolved;
145+
}
147146
}

0 commit comments

Comments
 (0)