diff --git a/resources/RegexGrammar.pp b/resources/RegexGrammar.pp index ba174feb29..3f49912a36 100644 --- a/resources/RegexGrammar.pp +++ b/resources/RegexGrammar.pp @@ -135,7 +135,7 @@ alternation() alternation: - concatenation() ( ::alternation:: concatenation() #alternation )* + concatenation()? ( concatenation()? #alternation )* concatenation: ( internal_options() | assertion() | quantification() | condition() ) @@ -154,8 +154,8 @@ | ::assertion_reference_:: alternation() #assertioncondition ) - ::_capturing:: concatenation()? - ( ::alternation:: concatenation()? )? + ::_capturing:: + alternation() ::_capturing:: assertion: @@ -165,7 +165,8 @@ | ::lookbehind_:: #lookbehind | ::negative_lookbehind_:: #negativelookbehind ) - alternation() ::_capturing:: + alternation() + ::_capturing:: quantification: ( class() | simple() ) ( quantifier() #quantification )? @@ -208,7 +209,8 @@ | ::atomic_group_:: #atomicgroup | ::capturing_:: ) - alternation() ::_capturing:: + alternation() + ::_capturing:: non_capturing_internal_options: diff --git a/src/Command/IgnoredRegexValidator.php b/src/Command/IgnoredRegexValidator.php index 613779b2e6..4340e0bd99 100644 --- a/src/Command/IgnoredRegexValidator.php +++ b/src/Command/IgnoredRegexValidator.php @@ -12,8 +12,6 @@ use PHPStan\Type\ObjectType; use PHPStan\Type\VerbosityLevel; use function count; -use function str_contains; -use function str_starts_with; use function strrpos; use function substr; @@ -34,19 +32,17 @@ public function validate(string $regex): IgnoredRegexValidatorResult try { /** @var TreeNode $ast */ $ast = $this->parser->parse($regex); - } catch (Exception $e) { - if (str_starts_with($e->getMessage(), 'Unexpected token "|" (alternation) at line 1')) { - return new IgnoredRegexValidatorResult([], false, true, '||', '\|\|'); - } - if ( - str_contains($regex, '()') - && str_starts_with($e->getMessage(), 'Unexpected token ")" (_capturing) at line 1') - ) { - return new IgnoredRegexValidatorResult([], false, true, '()', '\(\)'); - } + } catch (Exception) { return new IgnoredRegexValidatorResult([], false, false); } + if (Strings::match($regex, '~(?getIgnoredTypes($ast), $this->hasAnchorsInTheMiddle($ast), diff --git a/src/Type/Regex/RegexGroupParser.php b/src/Type/Regex/RegexGroupParser.php index a8a9755e8a..69eb455eaf 100644 --- a/src/Type/Regex/RegexGroupParser.php +++ b/src/Type/Regex/RegexGroupParser.php @@ -20,6 +20,7 @@ use PHPStan\Type\StringType; use PHPStan\Type\Type; use PHPStan\Type\TypeCombinator; +use function array_values; use function count; use function in_array; use function is_int; @@ -84,6 +85,9 @@ public function parseGroups(string $regex): ?array return null; } + $this->updateAlternationAstRemoveVerticalBarsAndAddEmptyToken($ast); + $this->updateCapturingAstAddEmptyToken($ast); + $captureOnlyNamed = false; if ($this->phpVersion->supportsPregCaptureOnlyNamedGroups()) { $captureOnlyNamed = str_contains($modifiers, 'n'); @@ -104,6 +108,51 @@ public function parseGroups(string $regex): ?array return [$astWalkResult->getCapturingGroups(), $astWalkResult->getMarkVerbs()]; } + private function createEmptyTokenTreeNode(TreeNode $parentAst): TreeNode + { + return new TreeNode('token', ['token' => 'literal', 'value' => '', 'namespace' => 'default'], [], $parentAst); + } + + private function updateAlternationAstRemoveVerticalBarsAndAddEmptyToken(TreeNode $ast): void + { + $children = $ast->getChildren(); + + foreach ($children as $i => $child) { + $this->updateAlternationAstRemoveVerticalBarsAndAddEmptyToken($child); + + if ($ast->getId() !== '#alternation' || $child->getValueToken() !== 'alternation') { + continue; + } + + unset($children[$i]); + + if ($i !== 0 + && isset($children[$i + 1]) + && $children[$i + 1]->getValueToken() !== 'alternation') { + continue; + } + + $children[$i] = $this->createEmptyTokenTreeNode($ast); + } + + $ast->setChildren(array_values($children)); + } + + private function updateCapturingAstAddEmptyToken(TreeNode $ast): void + { + foreach ($ast->getChildren() as $child) { + $this->updateCapturingAstAddEmptyToken($child); + } + + if ($ast->getId() !== '#capturing' || $ast->getChildren() !== []) { + return; + } + + $emptyAlternationAst = new TreeNode('#alternation', null, [], $ast); + $emptyAlternationAst->setChildren([$this->createEmptyTokenTreeNode($emptyAlternationAst)]); + $ast->setChildren([$emptyAlternationAst]); + } + private function walkRegexAst( TreeNode $ast, ?RegexAlternation $alternation, diff --git a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php index 701dc1f049..8861e9f036 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php +++ b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php @@ -901,6 +901,48 @@ function bugUnescapedDashAfterRange (string $string): void } } +function bugEmptySubexpression (string $string): void { + if (preg_match('//', $string, $matches)) { + assertType("array{string}", $matches); // could be array{''} + } + + if (preg_match('/()/', $string, $matches)) { + assertType("array{string, ''}", $matches); // could be array{'', ''} + } + + if (preg_match('/|/', $string, $matches)) { + assertType("array{string}", $matches); // could be array{''} + } + + if (preg_match('~|(a)~', $string, $matches)) { + assertType("array{0: string, 1?: 'a'}", $matches); + } + + if (preg_match('~(a)|~', $string, $matches)) { + assertType("array{0: string, 1?: 'a'}", $matches); + } + + if (preg_match('~(a)||(b)~', $string, $matches)) { + assertType("array{0: string, 1?: 'a'}|array{string, '', 'b'}", $matches); + } + + if (preg_match('~(|(a))~', $string, $matches)) { + assertType("array{0: string, 1: ''|'a', 2?: 'a'}", $matches); + } + + if (preg_match('~((a)|)~', $string, $matches)) { + assertType("array{0: string, 1: ''|'a', 2?: 'a'}", $matches); + } + + if (preg_match('~((a)||(b))~', $string, $matches)) { + assertType("array{0: string, 1: ''|'a'|'b', 2?: ''|'a', 3?: 'b'}", $matches); + } + + if (preg_match('~((a)|()|(b))~', $string, $matches)) { + assertType("array{0: string, 1: ''|'a'|'b', 2?: ''|'a', 3?: '', 4?: 'b'}", $matches); + } +} + function bug11744(string $string): void { if (!preg_match('~^((/[a-z]+)?)~', $string, $matches)) { diff --git a/tests/PHPStan/Command/IgnoredRegexValidatorTest.php b/tests/PHPStan/Command/IgnoredRegexValidatorTest.php index 98a6dc58cb..39902aa01f 100644 --- a/tests/PHPStan/Command/IgnoredRegexValidatorTest.php +++ b/tests/PHPStan/Command/IgnoredRegexValidatorTest.php @@ -100,12 +100,48 @@ public function dataValidate(): array false, false, ], + [ + '~(a\()~', + [], + false, + false, + ], + [ + '~b\\\()~', + [], + false, + true, + ], + [ + '~(c\\\\\()~', + [], + false, + false, + ], [ '~Result of || is always true.~', [], false, true, ], + [ + '~a\||~', + [], + false, + false, + ], + [ + '~b\\\||~', + [], + false, + true, + ], + [ + '~c\\\\\||~', + [], + false, + false, + ], [ '#Method PragmaRX\Notified\Data\Repositories\Notified::firstOrCreateByEvent() should return PragmaRX\Notified\Data\Models\Notified but returns Illuminate\Database\Eloquent\Model|null#', [],