From c55994ef870ed8f7f0508a51f1fc455f3b8bc3b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Vo=C5=99=C3=AD=C5=A1ek?= Date: Fri, 27 Sep 2024 15:35:52 +0200 Subject: [PATCH 1/8] add empty subexpression grammar and tests --- resources/RegexGrammar.pp | 12 +++--- .../Analyser/nsrt/preg_match_shapes.php | 42 +++++++++++++++++++ 2 files changed, 49 insertions(+), 5 deletions(-) diff --git a/resources/RegexGrammar.pp b/resources/RegexGrammar.pp index ba174feb29..3f49912a36 100644 --- a/resources/RegexGrammar.pp +++ b/resources/RegexGrammar.pp @@ -135,7 +135,7 @@ alternation() alternation: - concatenation() ( ::alternation:: concatenation() #alternation )* + concatenation()? ( concatenation()? #alternation )* concatenation: ( internal_options() | assertion() | quantification() | condition() ) @@ -154,8 +154,8 @@ | ::assertion_reference_:: alternation() #assertioncondition ) - ::_capturing:: concatenation()? - ( ::alternation:: concatenation()? )? + ::_capturing:: + alternation() ::_capturing:: assertion: @@ -165,7 +165,8 @@ | ::lookbehind_:: #lookbehind | ::negative_lookbehind_:: #negativelookbehind ) - alternation() ::_capturing:: + alternation() + ::_capturing:: quantification: ( class() | simple() ) ( quantifier() #quantification )? @@ -208,7 +209,8 @@ | ::atomic_group_:: #atomicgroup | ::capturing_:: ) - alternation() ::_capturing:: + alternation() + ::_capturing:: non_capturing_internal_options: diff --git a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php index 701dc1f049..053360ba07 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php +++ b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php @@ -901,6 +901,48 @@ function bugUnescapedDashAfterRange (string $string): void } } +function bugEmptySubexpression (string $string): void { + if (preg_match('//', $string, $matches)) { + assertType("array{string}", $matches); // could be array{''} + } + + if (preg_match('/()/', $string, $matches)) { + assertType("array{string, string}", $matches); // could be array{'', ''} + } + + if (preg_match('/|/', $string, $matches)) { + assertType("array{string}", $matches); // could be array{''} + } + + if (preg_match('~|(a)~', $string, $matches)) { + assertType("array{0: string, 1?: 'a'}", $matches); + } + + if (preg_match('~(a)|~', $string, $matches)) { + assertType("array{0: string, 1?: 'a'}", $matches); + } + + if (preg_match('~(a)||(b)~', $string, $matches)) { + assertType("array{0: string, 1?: 'a'}|array{string, '', 'b'}", $matches); + } + + if (preg_match('~(|(a))~', $string, $matches)) { + assertType("array{0: string, 1: ''|'a', 2?: 'a'}", $matches); + } + + if (preg_match('~((a)|)~', $string, $matches)) { + assertType("array{0: string, 1: ''|'a', 2?: 'a'}", $matches); + } + + if (preg_match('~((a)||(b))~', $string, $matches)) { + assertType("array{0: string, 1: ''|'a'|'b', 2?: ''|'a', 3?: 'b'}", $matches); + } + + if (preg_match('~((a)|()|(b))~', $string, $matches)) { + assertType("array{0: string, 1: string, 2?: ''|'a', 3?: string, 4?: 'b'}", $matches); + } +} + function bug11744(string $string): void { if (!preg_match('~^((/[a-z]+)?)~', $string, $matches)) { From 29cc941ca91549786d2a89037a44f46f8b639bc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Vo=C5=99=C3=AD=C5=A1ek?= Date: Fri, 27 Sep 2024 23:02:26 +0200 Subject: [PATCH 2/8] skip alternation (`|`) tokens --- src/Type/Regex/RegexGroupParser.php | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Type/Regex/RegexGroupParser.php b/src/Type/Regex/RegexGroupParser.php index a8a9755e8a..2138dddd93 100644 --- a/src/Type/Regex/RegexGroupParser.php +++ b/src/Type/Regex/RegexGroupParser.php @@ -302,6 +302,10 @@ private function createGroupType(TreeNode $group, bool $maybeConstant, string $p if ($rootAlternation !== null) { $types = []; foreach ($rootAlternation->getChildren() as $alternative) { + if ($alternative->getValueToken() === 'alternation') { + continue; + } + $types[] = $this->createGroupType($alternative, $maybeConstant, $patternModifiers); } From 103ea0cd313f082320903249ec1e9eb6d39c9afb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Vo=C5=99=C3=AD=C5=A1ek?= Date: Sat, 28 Sep 2024 00:39:19 +0200 Subject: [PATCH 3/8] Revert "skip alternation (`|`) tokens" --- src/Type/Regex/RegexGroupParser.php | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Type/Regex/RegexGroupParser.php b/src/Type/Regex/RegexGroupParser.php index 2138dddd93..a8a9755e8a 100644 --- a/src/Type/Regex/RegexGroupParser.php +++ b/src/Type/Regex/RegexGroupParser.php @@ -302,10 +302,6 @@ private function createGroupType(TreeNode $group, bool $maybeConstant, string $p if ($rootAlternation !== null) { $types = []; foreach ($rootAlternation->getChildren() as $alternative) { - if ($alternative->getValueToken() === 'alternation') { - continue; - } - $types[] = $this->createGroupType($alternative, $maybeConstant, $patternModifiers); } From 993be4c5a0add7190b50f9ed67d40b7ad1b62a39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Vo=C5=99=C3=AD=C5=A1ek?= Date: Sat, 28 Sep 2024 00:14:48 +0200 Subject: [PATCH 4/8] update AST with empty tokens for alternations --- src/Type/Regex/RegexGroupParser.php | 28 +++++++++++++++++++ .../Command/IgnoredRegexValidatorTest.php | 8 ++++-- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/src/Type/Regex/RegexGroupParser.php b/src/Type/Regex/RegexGroupParser.php index a8a9755e8a..abb0b417db 100644 --- a/src/Type/Regex/RegexGroupParser.php +++ b/src/Type/Regex/RegexGroupParser.php @@ -20,6 +20,7 @@ use PHPStan\Type\StringType; use PHPStan\Type\Type; use PHPStan\Type\TypeCombinator; +use function array_values; use function count; use function in_array; use function is_int; @@ -84,6 +85,8 @@ public function parseGroups(string $regex): ?array return null; } + $this->updateAlternationAstRemoveVerticalBarsAndAddEmptyToken($ast); + $captureOnlyNamed = false; if ($this->phpVersion->supportsPregCaptureOnlyNamedGroups()) { $captureOnlyNamed = str_contains($modifiers, 'n'); @@ -104,6 +107,31 @@ public function parseGroups(string $regex): ?array return [$astWalkResult->getCapturingGroups(), $astWalkResult->getMarkVerbs()]; } + private function updateAlternationAstRemoveVerticalBarsAndAddEmptyToken(TreeNode $ast): void + { + $children = $ast->getChildren(); + + foreach ($children as $i => $child) { + $this->updateAlternationAstRemoveVerticalBarsAndAddEmptyToken($child); + + if ($ast->getId() !== '#alternation' || $child->getValueToken() !== 'alternation') { + continue; + } + + unset($children[$i]); + + if ($i !== 0 + && isset($children[$i + 1]) + && $children[$i + 1]->getValueToken() !== 'alternation') { + continue; + } + + $children[$i] = new TreeNode('token', ['token' => 'literal', 'value' => '', 'namespace' => 'default'], [], $ast); + } + + $ast->setChildren(array_values($children)); + } + private function walkRegexAst( TreeNode $ast, ?RegexAlternation $alternation, diff --git a/tests/PHPStan/Command/IgnoredRegexValidatorTest.php b/tests/PHPStan/Command/IgnoredRegexValidatorTest.php index 98a6dc58cb..4dd4a78435 100644 --- a/tests/PHPStan/Command/IgnoredRegexValidatorTest.php +++ b/tests/PHPStan/Command/IgnoredRegexValidatorTest.php @@ -104,13 +104,15 @@ public function dataValidate(): array '~Result of || is always true.~', [], false, - true, + false, ], [ '#Method PragmaRX\Notified\Data\Repositories\Notified::firstOrCreateByEvent() should return PragmaRX\Notified\Data\Models\Notified but returns Illuminate\Database\Eloquent\Model|null#', - [], + [ + 'null' => 'null', + ], + false, false, - true, ], ]; } From 0c222fb65601992492020bb34af8fe65fb2b0134 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Vo=C5=99=C3=AD=C5=A1ek?= Date: Sat, 28 Sep 2024 01:01:06 +0200 Subject: [PATCH 5/8] move create empty token into private method --- src/Type/Regex/RegexGroupParser.php | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Type/Regex/RegexGroupParser.php b/src/Type/Regex/RegexGroupParser.php index abb0b417db..89d3da9d7f 100644 --- a/src/Type/Regex/RegexGroupParser.php +++ b/src/Type/Regex/RegexGroupParser.php @@ -107,6 +107,11 @@ public function parseGroups(string $regex): ?array return [$astWalkResult->getCapturingGroups(), $astWalkResult->getMarkVerbs()]; } + private function createEmptyTokenTreeNode(TreeNode $parentAst): TreeNode + { + return new TreeNode('token', ['token' => 'literal', 'value' => '', 'namespace' => 'default'], [], $parentAst); + } + private function updateAlternationAstRemoveVerticalBarsAndAddEmptyToken(TreeNode $ast): void { $children = $ast->getChildren(); @@ -126,7 +131,7 @@ private function updateAlternationAstRemoveVerticalBarsAndAddEmptyToken(TreeNode continue; } - $children[$i] = new TreeNode('token', ['token' => 'literal', 'value' => '', 'namespace' => 'default'], [], $ast); + $children[$i] = $this->createEmptyTokenTreeNode($ast); } $ast->setChildren(array_values($children)); From 5281a8c5d8d9d64147bbb8250bd351b5e5fa0731 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Vo=C5=99=C3=AD=C5=A1ek?= Date: Sat, 28 Sep 2024 01:17:37 +0200 Subject: [PATCH 6/8] fix empty token parse in capturing group --- src/Type/Regex/RegexGroupParser.php | 16 ++++++++++++++++ .../PHPStan/Analyser/nsrt/preg_match_shapes.php | 4 ++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/Type/Regex/RegexGroupParser.php b/src/Type/Regex/RegexGroupParser.php index 89d3da9d7f..69eb455eaf 100644 --- a/src/Type/Regex/RegexGroupParser.php +++ b/src/Type/Regex/RegexGroupParser.php @@ -86,6 +86,7 @@ public function parseGroups(string $regex): ?array } $this->updateAlternationAstRemoveVerticalBarsAndAddEmptyToken($ast); + $this->updateCapturingAstAddEmptyToken($ast); $captureOnlyNamed = false; if ($this->phpVersion->supportsPregCaptureOnlyNamedGroups()) { @@ -137,6 +138,21 @@ private function updateAlternationAstRemoveVerticalBarsAndAddEmptyToken(TreeNode $ast->setChildren(array_values($children)); } + private function updateCapturingAstAddEmptyToken(TreeNode $ast): void + { + foreach ($ast->getChildren() as $child) { + $this->updateCapturingAstAddEmptyToken($child); + } + + if ($ast->getId() !== '#capturing' || $ast->getChildren() !== []) { + return; + } + + $emptyAlternationAst = new TreeNode('#alternation', null, [], $ast); + $emptyAlternationAst->setChildren([$this->createEmptyTokenTreeNode($emptyAlternationAst)]); + $ast->setChildren([$emptyAlternationAst]); + } + private function walkRegexAst( TreeNode $ast, ?RegexAlternation $alternation, diff --git a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php index 053360ba07..8861e9f036 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php +++ b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php @@ -907,7 +907,7 @@ function bugEmptySubexpression (string $string): void { } if (preg_match('/()/', $string, $matches)) { - assertType("array{string, string}", $matches); // could be array{'', ''} + assertType("array{string, ''}", $matches); // could be array{'', ''} } if (preg_match('/|/', $string, $matches)) { @@ -939,7 +939,7 @@ function bugEmptySubexpression (string $string): void { } if (preg_match('~((a)|()|(b))~', $string, $matches)) { - assertType("array{0: string, 1: string, 2?: ''|'a', 3?: string, 4?: 'b'}", $matches); + assertType("array{0: string, 1: ''|'a'|'b', 2?: ''|'a', 3?: '', 4?: 'b'}", $matches); } } From 0a8a6b24b793dee2cd9c39070b530073f43a1e3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Vo=C5=99=C3=AD=C5=A1ek?= Date: Wed, 9 Oct 2024 06:56:36 +0200 Subject: [PATCH 7/8] keep || and () exceptions for ignore rules validations --- src/Command/IgnoredRegexValidator.php | 19 ++++++++----------- .../Command/IgnoredRegexValidatorTest.php | 8 +++----- 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/src/Command/IgnoredRegexValidator.php b/src/Command/IgnoredRegexValidator.php index 613779b2e6..6daecb33f0 100644 --- a/src/Command/IgnoredRegexValidator.php +++ b/src/Command/IgnoredRegexValidator.php @@ -13,7 +13,6 @@ use PHPStan\Type\VerbosityLevel; use function count; use function str_contains; -use function str_starts_with; use function strrpos; use function substr; @@ -34,19 +33,17 @@ public function validate(string $regex): IgnoredRegexValidatorResult try { /** @var TreeNode $ast */ $ast = $this->parser->parse($regex); - } catch (Exception $e) { - if (str_starts_with($e->getMessage(), 'Unexpected token "|" (alternation) at line 1')) { - return new IgnoredRegexValidatorResult([], false, true, '||', '\|\|'); - } - if ( - str_contains($regex, '()') - && str_starts_with($e->getMessage(), 'Unexpected token ")" (_capturing) at line 1') - ) { - return new IgnoredRegexValidatorResult([], false, true, '()', '\(\)'); - } + } catch (Exception) { return new IgnoredRegexValidatorResult([], false, false); } + if (str_contains($regex, '||')) { + return new IgnoredRegexValidatorResult([], false, true, '||', '\|\|'); + } + if (str_contains($regex, '()')) { + return new IgnoredRegexValidatorResult([], false, true, '()', '\(\)'); + } + return new IgnoredRegexValidatorResult( $this->getIgnoredTypes($ast), $this->hasAnchorsInTheMiddle($ast), diff --git a/tests/PHPStan/Command/IgnoredRegexValidatorTest.php b/tests/PHPStan/Command/IgnoredRegexValidatorTest.php index 4dd4a78435..98a6dc58cb 100644 --- a/tests/PHPStan/Command/IgnoredRegexValidatorTest.php +++ b/tests/PHPStan/Command/IgnoredRegexValidatorTest.php @@ -104,15 +104,13 @@ public function dataValidate(): array '~Result of || is always true.~', [], false, - false, + true, ], [ '#Method PragmaRX\Notified\Data\Repositories\Notified::firstOrCreateByEvent() should return PragmaRX\Notified\Data\Models\Notified but returns Illuminate\Database\Eloquent\Model|null#', - [ - 'null' => 'null', - ], - false, + [], false, + true, ], ]; } From f40aae6aebbe7a487704edd6c40b695a2d39ad98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Vo=C5=99=C3=AD=C5=A1ek?= Date: Fri, 11 Oct 2024 11:56:55 +0200 Subject: [PATCH 8/8] allow first "|" and "(" to be escaped --- src/Command/IgnoredRegexValidator.php | 5 ++- .../Command/IgnoredRegexValidatorTest.php | 36 +++++++++++++++++++ 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/src/Command/IgnoredRegexValidator.php b/src/Command/IgnoredRegexValidator.php index 6daecb33f0..4340e0bd99 100644 --- a/src/Command/IgnoredRegexValidator.php +++ b/src/Command/IgnoredRegexValidator.php @@ -12,7 +12,6 @@ use PHPStan\Type\ObjectType; use PHPStan\Type\VerbosityLevel; use function count; -use function str_contains; use function strrpos; use function substr; @@ -37,10 +36,10 @@ public function validate(string $regex): IgnoredRegexValidatorResult return new IgnoredRegexValidatorResult([], false, false); } - if (str_contains($regex, '||')) { + if (Strings::match($regex, '~(?