From 30b63e3442e9edaf43f456a75d1cd4c1360520bb Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Sat, 3 Aug 2024 16:01:07 +0200 Subject: [PATCH 1/8] Support literal strings in RegexGroupParser --- src/Type/Php/RegexGroupParser.php | 31 +++++++++++++------ .../Analyser/nsrt/preg_match_shapes.php | 28 ++++++++++++----- 2 files changed, 42 insertions(+), 17 deletions(-) diff --git a/src/Type/Php/RegexGroupParser.php b/src/Type/Php/RegexGroupParser.php index c6782c16be..e48519fc9a 100644 --- a/src/Type/Php/RegexGroupParser.php +++ b/src/Type/Php/RegexGroupParser.php @@ -21,6 +21,7 @@ use PHPStan\Type\TypeCombinator; use function array_key_exists; use function count; +use function implode; use function in_array; use function is_int; use function rtrim; @@ -264,9 +265,13 @@ private function createGroupType(TreeNode $group): Type $isNonEmpty = TrinaryLogic::createMaybe(); $isNumeric = TrinaryLogic::createMaybe(); $inOptionalQuantification = false; + $onlyLiterals = []; - $this->walkGroupAst($group, $isNonEmpty, $isNumeric, $inOptionalQuantification); + $this->walkGroupAst($group, $isNonEmpty, $isNumeric, $inOptionalQuantification, $onlyLiterals); + if ($onlyLiterals !== null && $onlyLiterals !== []) { + return new ConstantStringType(implode('', $onlyLiterals)); + } if ($isNumeric->yes()) { $result = new IntersectionType([new StringType(), new AccessoryNumericStringType()]); if (!$isNonEmpty->yes()) { @@ -280,7 +285,7 @@ private function createGroupType(TreeNode $group): Type return new StringType(); } - private function walkGroupAst(TreeNode $ast, TrinaryLogic &$isNonEmpty, TrinaryLogic &$isNumeric, bool &$inOptionalQuantification): void + private function walkGroupAst(TreeNode $ast, TrinaryLogic &$isNonEmpty, TrinaryLogic &$isNumeric, bool &$inOptionalQuantification, ?array &$onlyLiterals): void { $children = $ast->getChildren(); @@ -289,9 +294,8 @@ private function walkGroupAst(TreeNode $ast, TrinaryLogic &$isNonEmpty, TrinaryL && count($children) > 0 ) { $isNonEmpty = TrinaryLogic::createYes(); - } - - if ($ast->getId() === '#quantification') { + $onlyLiterals = null; + } elseif ($ast->getId() === '#quantification') { [$min] = $this->getQuantificationRange($ast); if ($min === 0) { @@ -301,10 +305,10 @@ private function walkGroupAst(TreeNode $ast, TrinaryLogic &$isNonEmpty, TrinaryL $isNonEmpty = TrinaryLogic::createYes(); $inOptionalQuantification = false; } - } - if ($ast->getId() === 'token') { - $literalValue = $this->getLiteralValue($ast); + $onlyLiterals = null; + } elseif ($ast->getId() === 'token') { + $literalValue = $this->getLiteralValue($ast, $onlyLiterals); if ($literalValue !== null) { if (Strings::match($literalValue, '/^\d+$/') === null) { $isNumeric = TrinaryLogic::createNo(); @@ -315,7 +319,13 @@ private function walkGroupAst(TreeNode $ast, TrinaryLogic &$isNonEmpty, TrinaryL if (!$inOptionalQuantification) { $isNonEmpty = TrinaryLogic::createYes(); } + } else { + $onlyLiterals = null; } + } elseif (!in_array($ast->getId(), ['#capturing'], true)) { + $onlyLiterals = null; + } else { + $x = 1; } // [^0-9] should not parse as numeric-string, and [^list-everything-but-numbers] is technically @@ -331,11 +341,12 @@ private function walkGroupAst(TreeNode $ast, TrinaryLogic &$isNonEmpty, TrinaryL $isNonEmpty, $isNumeric, $inOptionalQuantification, + $onlyLiterals, ); } } - private function getLiteralValue(TreeNode $node): ?string + private function getLiteralValue(TreeNode $node, ?array &$onlyLiterals): ?string { if ($node->getId() !== 'token') { return null; @@ -348,6 +359,8 @@ private function getLiteralValue(TreeNode $node): ?string if (in_array($token, ['literal', 'escaped_end_class'], true)) { if (strlen($node->getValueValue()) > 1 && $value[0] === '\\') { return substr($value, 1); + } elseif ($token === 'literal' && $onlyLiterals !== null && !in_array($value, ['.'], true)) { + $onlyLiterals[] = $value; } return $value; diff --git a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php index af57075078..5034357e57 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php +++ b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php @@ -44,9 +44,9 @@ function doMatch(string $s): void { assertType('array{}|array{0: string, 1: non-empty-string, 2: string, 3: non-empty-string, 4?: non-empty-string}', $matches); if (preg_match('/(a)(?b)*(c)(d)*/', $s, $matches)) { - assertType('array{0: string, 1: non-empty-string, name: string, 2: string, 3: non-empty-string, 4?: non-empty-string}', $matches); + assertType("array{0: string, 1: 'a', name: string, 2: string, 3: 'c', 4?: 'd'}", $matches); } - assertType('array{}|array{0: string, 1: non-empty-string, name: string, 2: string, 3: non-empty-string, 4?: non-empty-string}', $matches); + assertType("array{}|array{0: string, 1: 'a', name: string, 2: string, 3: 'c', 4?: 'd'}", $matches); if (preg_match('/(a)(b)*(c)(?d)*/', $s, $matches)) { assertType('array{0: string, 1: non-empty-string, 2: string, 3: non-empty-string, name?: non-empty-string, 4?: non-empty-string}', $matches); @@ -233,13 +233,13 @@ function testUnionPattern(string $s): void function doFoo(string $row): void { if (preg_match('~^(a(b))$~', $row, $matches) === 1) { - assertType('array{string, non-empty-string, non-empty-string}', $matches); + assertType("array{string, non-empty-string, 'b'}", $matches); } if (preg_match('~^(a(b)?)$~', $row, $matches) === 1) { assertType('array{0: string, 1: non-empty-string, 2?: non-empty-string}', $matches); } if (preg_match('~^(a(b)?)?$~', $row, $matches) === 1) { - assertType('array{0: string, 1?: non-empty-string, 2?: non-empty-string}', $matches); + assertType("array{0: string, 1?: non-empty-string, 2?: 'b'}", $matches); } } @@ -390,11 +390,11 @@ function unmatchedAsNullWithMandatoryGroup(string $s): void { function (string $s): void { if (preg_match('{' . preg_quote('xxx') . '(z)}', $s, $matches)) { - assertType('array{string, non-empty-string}', $matches); + assertType("array{string, 'z'}", $matches); } else { assertType('array{}', $matches); } - assertType('array{}|array{string, non-empty-string}', $matches); + assertType("array{}|array{string, 'z'}", $matches); }; function (string $s): void { @@ -417,11 +417,11 @@ function (string $s): void { function (string $s): void { if (preg_match('{' . preg_quote($s) . '(z)' . preg_quote($s) . '(?:abc)(def)?}', $s, $matches)) { - assertType('array{0: string, 1: non-empty-string, 2?: non-empty-string}', $matches); + assertType("array{0: string, 1: 'z', 2?: non-empty-string", $matches); } else { assertType('array{}', $matches); } - assertType('array{}|array{0: string, 1: non-empty-string, 2?: non-empty-string}', $matches); + assertType("array{}|array{0: string, 1: 'z', 2?: non-empty-string}", $matches); }; function (string $s, $mixed): void { @@ -546,3 +546,15 @@ public function test2(string $str): void } } } + +function (string $s): void { + if (rand(0,1)) { + $p = '/Price: (£)(abc)/i'; + } else { + $p = '/Price: (\d)(b)/i'; + } + + if (preg_match($p, $s, $matches)) { + assertType("array{string, '£', 'abc'}|array{string, numeric-string, 'b'}", $matches); + } +}; From b307034f5a2ec4eff626133da5728f7d3abc0df2 Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Sat, 3 Aug 2024 16:24:33 +0200 Subject: [PATCH 2/8] fix --- src/Type/Php/RegexGroupParser.php | 37 ++++++++--- tests/PHPStan/Analyser/nsrt/bug-11311.php | 8 +-- tests/PHPStan/Analyser/nsrt/bug11384.php | 2 +- .../Analyser/nsrt/preg_match_all_shapes.php | 22 +++---- .../Analyser/nsrt/preg_match_shapes.php | 62 +++++++++---------- .../Analyser/nsrt/preg_match_shapes_php80.php | 4 +- .../nsrt/preg_replace_callback_shapes.php | 6 +- 7 files changed, 79 insertions(+), 62 deletions(-) diff --git a/src/Type/Php/RegexGroupParser.php b/src/Type/Php/RegexGroupParser.php index e48519fc9a..3cb5c8605d 100644 --- a/src/Type/Php/RegexGroupParser.php +++ b/src/Type/Php/RegexGroupParser.php @@ -90,6 +90,7 @@ public function parseGroups(string $regex): ?array $groupCombinations, $markVerbs, $captureOnlyNamed, + false, ); return [$capturingGroups, $groupCombinations, $markVerbs]; @@ -112,6 +113,7 @@ private function walkRegexAst( array &$groupCombinations, array &$markVerbs, bool $captureOnlyNamed, + bool $repeatedMoreThenOnce, ): void { $group = null; @@ -122,7 +124,7 @@ private function walkRegexAst( $inAlternation ? $alternationId : null, $inOptionalQuantification, $parentGroup, - $this->createGroupType($ast), + $this->createGroupType($ast, $repeatedMoreThenOnce), ); $parentGroup = $group; } elseif ($ast->getId() === '#namedcapturing') { @@ -133,7 +135,7 @@ private function walkRegexAst( $inAlternation ? $alternationId : null, $inOptionalQuantification, $parentGroup, - $this->createGroupType($ast), + $this->createGroupType($ast, $repeatedMoreThenOnce), ); $parentGroup = $group; } elseif ($ast->getId() === '#noncapturing') { @@ -156,11 +158,15 @@ private function walkRegexAst( $inOptionalQuantification = false; if ($ast->getId() === '#quantification') { - [$min] = $this->getQuantificationRange($ast); + [$min, $max] = $this->getQuantificationRange($ast); if ($min === 0) { $inOptionalQuantification = true; } + + if ($max === null || $max > 1) { + $repeatedMoreThenOnce = true; + } } if ($ast->getId() === '#alternation') { @@ -201,6 +207,7 @@ private function walkRegexAst( $groupCombinations, $markVerbs, $captureOnlyNamed, + $repeatedMoreThenOnce, ); if ($ast->getId() !== '#alternation') { @@ -260,7 +267,7 @@ private function getQuantificationRange(TreeNode $node): array return [$min, $max]; } - private function createGroupType(TreeNode $group): Type + private function createGroupType(TreeNode $group, bool $repeatedMoreThenOnce): Type { $isNonEmpty = TrinaryLogic::createMaybe(); $isNumeric = TrinaryLogic::createMaybe(); @@ -269,9 +276,10 @@ private function createGroupType(TreeNode $group): Type $this->walkGroupAst($group, $isNonEmpty, $isNumeric, $inOptionalQuantification, $onlyLiterals); - if ($onlyLiterals !== null && $onlyLiterals !== []) { + if (!$repeatedMoreThenOnce && $onlyLiterals !== null && $onlyLiterals !== []) { return new ConstantStringType(implode('', $onlyLiterals)); } + if ($isNumeric->yes()) { $result = new IntersectionType([new StringType(), new AccessoryNumericStringType()]); if (!$isNonEmpty->yes()) { @@ -285,6 +293,9 @@ private function createGroupType(TreeNode $group): Type return new StringType(); } + /** + * @param array|null $onlyLiterals + */ private function walkGroupAst(TreeNode $ast, TrinaryLogic &$isNonEmpty, TrinaryLogic &$isNumeric, bool &$inOptionalQuantification, ?array &$onlyLiterals): void { $children = $ast->getChildren(); @@ -294,7 +305,6 @@ private function walkGroupAst(TreeNode $ast, TrinaryLogic &$isNonEmpty, TrinaryL && count($children) > 0 ) { $isNonEmpty = TrinaryLogic::createYes(); - $onlyLiterals = null; } elseif ($ast->getId() === '#quantification') { [$min] = $this->getQuantificationRange($ast); @@ -319,10 +329,10 @@ private function walkGroupAst(TreeNode $ast, TrinaryLogic &$isNonEmpty, TrinaryL if (!$inOptionalQuantification) { $isNonEmpty = TrinaryLogic::createYes(); } - } else { + } elseif (!in_array($ast->getValueToken(), ['capturing_name'], true)) { $onlyLiterals = null; } - } elseif (!in_array($ast->getId(), ['#capturing'], true)) { + } elseif (!in_array($ast->getId(), ['#capturing', '#namedcapturing'], true)) { $onlyLiterals = null; } else { $x = 1; @@ -346,6 +356,9 @@ private function walkGroupAst(TreeNode $ast, TrinaryLogic &$isNonEmpty, TrinaryL } } + /** + * @param array|null $onlyLiterals + */ private function getLiteralValue(TreeNode $node, ?array &$onlyLiterals): ?string { if ($node->getId() !== 'token') { @@ -357,9 +370,13 @@ private function getLiteralValue(TreeNode $node, ?array &$onlyLiterals): ?string $value = $node->getValueValue(); if (in_array($token, ['literal', 'escaped_end_class'], true)) { - if (strlen($node->getValueValue()) > 1 && $value[0] === '\\') { + if (strlen($value) > 1 && $value[0] === '\\') { return substr($value, 1); - } elseif ($token === 'literal' && $onlyLiterals !== null && !in_array($value, ['.'], true)) { + } elseif ( + $token === 'literal' + && $onlyLiterals !== null + && !in_array($value, ['.'], true) + ) { $onlyLiterals[] = $value; } diff --git a/tests/PHPStan/Analyser/nsrt/bug-11311.php b/tests/PHPStan/Analyser/nsrt/bug-11311.php index 40cbb49557..0dfb7cc4b4 100644 --- a/tests/PHPStan/Analyser/nsrt/bug-11311.php +++ b/tests/PHPStan/Analyser/nsrt/bug-11311.php @@ -14,9 +14,9 @@ function doFoo(string $s) { function doUnmatchedAsNull(string $s): void { if (preg_match('/(foo)?(bar)?(baz)?/', $s, $matches, PREG_UNMATCHED_AS_NULL)) { - assertType('array{string, non-empty-string|null, non-empty-string|null, non-empty-string|null}', $matches); + assertType("array{string, 'foo'|null, 'bar'|null, 'baz'|null}", $matches); } - assertType('array{}|array{string, non-empty-string|null, non-empty-string|null, non-empty-string|null}', $matches); + assertType("array{}|array{string, 'foo'|null, 'bar'|null, 'baz'|null}", $matches); } // see https://3v4l.org/VeDob @@ -70,13 +70,13 @@ function bug11331c(string $url):void { class UnmatchedAsNullWithTopLevelAlternation { function doFoo(string $s): void { if (preg_match('/Price: (?:(£)|(€))\d+/', $s, $matches, PREG_UNMATCHED_AS_NULL)) { - assertType('array{string, non-empty-string|null, non-empty-string|null}', $matches); // could be array{0: string, 1: null, 2: non-empty-string}|array{0: string, 1: non-empty-string, 2: null} + assertType("array{string, '£'|null, '€'|null}", $matches); // could be tagged union } } function doBar(string $s): void { if (preg_match('/Price: (?:(£)|(€))?\d+/', $s, $matches, PREG_UNMATCHED_AS_NULL)) { - assertType('array{string, non-empty-string|null, non-empty-string|null}', $matches); + assertType("array{string, '£'|null, '€'|null}", $matches); // could be tagged union } } } diff --git a/tests/PHPStan/Analyser/nsrt/bug11384.php b/tests/PHPStan/Analyser/nsrt/bug11384.php index 3996587786..12020de0b9 100644 --- a/tests/PHPStan/Analyser/nsrt/bug11384.php +++ b/tests/PHPStan/Analyser/nsrt/bug11384.php @@ -14,7 +14,7 @@ class HelloWorld public function sayHello(string $s): void { if (preg_match('{(' . Bar::VAL . ')}', $s, $m)) { - assertType('array{string, numeric-string}', $m); + assertType("array{string, '3'}", $m); } } } diff --git a/tests/PHPStan/Analyser/nsrt/preg_match_all_shapes.php b/tests/PHPStan/Analyser/nsrt/preg_match_all_shapes.php index 626f519a4c..9575402af7 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_match_all_shapes.php +++ b/tests/PHPStan/Analyser/nsrt/preg_match_all_shapes.php @@ -71,65 +71,65 @@ function (string $size): void { function (string $size): void { preg_match_all('/a(b)(\d+)?/', $size, $matches, PREG_SET_ORDER); - assertType("list", $matches); + assertType("list", $matches); }; function (string $size): void { if (preg_match_all('/ab(?P\d+)(?Pab)?/', $size, $matches)) { - assertType("array{0: list, num: list, 1: list, suffix: list, 2: list}", $matches); + assertType("array{0: list, num: list, 1: list, suffix: list<''|'ab'>, 2: list<''|'ab'>}", $matches); } }; function (string $size): void { if (preg_match_all('/ab(?P\d+)(?Pab)?/', $size, $matches, PREG_UNMATCHED_AS_NULL)) { - assertType("array{0: list, num: list, 1: list, suffix: list, 2: list}", $matches); + assertType("array{0: list, num: list, 1: list, suffix: list<'ab'|null>, 2: list<'ab'|null>}", $matches); } }; function (string $size): void { if (preg_match_all('/ab(?P\d+)(?Pab)?/', $size, $matches, PREG_SET_ORDER)) { - assertType("list", $matches); + assertType("list", $matches); } }; function (string $size): void { if (preg_match_all('/ab(?P\d+)(?Pab)?/', $size, $matches, PREG_PATTERN_ORDER)) { - assertType("array{0: list, num: list, 1: list, suffix: list, 2: list}", $matches); + assertType("array{0: list, num: list, 1: list, suffix: list<''|'ab'>, 2: list<''|'ab'>}", $matches); } }; function (string $size): void { if (preg_match_all('/ab(?P\d+)(?Pab)?/', $size, $matches, PREG_UNMATCHED_AS_NULL|PREG_SET_ORDER)) { - assertType("list", $matches); + assertType("list", $matches); } }; function (string $size): void { if (preg_match_all('/ab(?P\d+)(?Pab)?/', $size, $matches, PREG_UNMATCHED_AS_NULL|PREG_PATTERN_ORDER)) { - assertType("array{0: list, num: list, 1: list, suffix: list, 2: list}", $matches); + assertType("array{0: list, num: list, 1: list, suffix: list<'ab'|null>, 2: list<'ab'|null>}", $matches); } }; function (string $size): void { if (preg_match_all('/ab(?P\d+)(?Pab)?/', $size, $matches, PREG_SET_ORDER|PREG_OFFSET_CAPTURE)) { - assertType("list}, num: array{numeric-string, int<0, max>}, 1: array{numeric-string, int<0, max>}, suffix?: array{non-empty-string, int<0, max>}, 2?: array{non-empty-string, int<0, max>}}>", $matches); + assertType("list}, num: array{numeric-string, int<0, max>}, 1: array{numeric-string, int<0, max>}, suffix?: array{'ab', int<0, max>}, 2?: array{'ab', int<0, max>}}>", $matches); } }; function (string $size): void { if (preg_match_all('/ab(?P\d+)(?Pab)?/', $size, $matches, PREG_PATTERN_ORDER|PREG_OFFSET_CAPTURE)) { - assertType("array{0: list}>, num: list}>, 1: list}>, suffix: list<''|array{non-empty-string, int<0, max>}>, 2: list<''|array{non-empty-string, int<0, max>}>}", $matches); + assertType("array{0: list}>, num: list}>, 1: list}>, suffix: list<''|array{'ab', int<0, max>}>, 2: list<''|array{'ab', int<0, max>}>}", $matches); } }; function (string $size): void { if (preg_match_all('/ab(?P\d+)(?Pab)?/', $size, $matches, PREG_UNMATCHED_AS_NULL|PREG_SET_ORDER|PREG_OFFSET_CAPTURE)) { - assertType("list}, num: array{numeric-string|null, int<-1, max>}, 1: array{numeric-string|null, int<-1, max>}, suffix: array{non-empty-string|null, int<-1, max>}, 2: array{non-empty-string|null, int<-1, max>}}>", $matches); + assertType("list}, num: array{numeric-string|null, int<-1, max>}, 1: array{numeric-string|null, int<-1, max>}, suffix: array{'ab'|null, int<-1, max>}, 2: array{'ab'|null, int<-1, max>}}>", $matches); } }; function (string $size): void { if (preg_match_all('/ab(?P\d+)(?Pab)?/', $size, $matches, PREG_UNMATCHED_AS_NULL|PREG_PATTERN_ORDER|PREG_OFFSET_CAPTURE)) { - assertType("array{0: list}>, num: list}>, 1: list}>, suffix: list}>, 2: list}>}", $matches); + assertType("array{0: list}>, num: list}>, 1: list}>, suffix: list}>, 2: list}>}", $matches); } }; diff --git a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php index 5034357e57..cda127aaed 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php +++ b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php @@ -39,19 +39,19 @@ function doMatch(string $s): void { assertType('array{}|array{string, non-empty-string}', $matches); if (preg_match('/(a)(b)*(c)(d)*/', $s, $matches)) { - assertType('array{0: string, 1: non-empty-string, 2: string, 3: non-empty-string, 4?: non-empty-string}', $matches); + assertType("array{0: string, 1: 'a', 2: string, 3: 'c', 4?: non-empty-string}", $matches); } - assertType('array{}|array{0: string, 1: non-empty-string, 2: string, 3: non-empty-string, 4?: non-empty-string}', $matches); + assertType("array{}|array{0: string, 1: 'a', 2: string, 3: 'c', 4?: non-empty-string}", $matches); if (preg_match('/(a)(?b)*(c)(d)*/', $s, $matches)) { - assertType("array{0: string, 1: 'a', name: string, 2: string, 3: 'c', 4?: 'd'}", $matches); + assertType("array{0: string, 1: 'a', name: string, 2: string, 3: 'c', 4?: non-empty-string}", $matches); } - assertType("array{}|array{0: string, 1: 'a', name: string, 2: string, 3: 'c', 4?: 'd'}", $matches); + assertType("array{}|array{0: string, 1: 'a', name: string, 2: string, 3: 'c', 4?: non-empty-string}", $matches); if (preg_match('/(a)(b)*(c)(?d)*/', $s, $matches)) { - assertType('array{0: string, 1: non-empty-string, 2: string, 3: non-empty-string, name?: non-empty-string, 4?: non-empty-string}', $matches); + assertType("array{0: string, 1: 'a', 2: string, 3: 'c', name?: non-empty-string, 4?: non-empty-string}", $matches); } - assertType('array{}|array{0: string, 1: non-empty-string, 2: string, 3: non-empty-string, name?: non-empty-string, 4?: non-empty-string}', $matches); + assertType("array{}|array{0: string, 1: 'a', 2: string, 3: 'c', name?: non-empty-string, 4?: non-empty-string}", $matches); if (preg_match('/(a|b)|(?:c)/', $s, $matches)) { assertType('array{0: string, 1?: non-empty-string}', $matches); @@ -59,34 +59,34 @@ function doMatch(string $s): void { assertType('array{}|array{0: string, 1?: non-empty-string}', $matches); if (preg_match('/(foo)(bar)(baz)+/', $s, $matches)) { - assertType('array{string, non-empty-string, non-empty-string, non-empty-string}', $matches); + assertType("array{string, 'foo', 'bar', non-empty-string}", $matches); } - assertType('array{}|array{string, non-empty-string, non-empty-string, non-empty-string}', $matches); + assertType("array{}|array{string, 'foo', 'bar', non-empty-string}", $matches); if (preg_match('/(foo)(bar)(baz)*/', $s, $matches)) { - assertType('array{0: string, 1: non-empty-string, 2: non-empty-string, 3?: non-empty-string}', $matches); + assertType("array{0: string, 1: 'foo', 2: 'bar', 3?: non-empty-string}", $matches); } - assertType('array{}|array{0: string, 1: non-empty-string, 2: non-empty-string, 3?: non-empty-string}', $matches); + assertType("array{}|array{0: string, 1: 'foo', 2: 'bar', 3?: non-empty-string}", $matches); if (preg_match('/(foo)(bar)(baz)?/', $s, $matches)) { - assertType('array{0: string, 1: non-empty-string, 2: non-empty-string, 3?: non-empty-string}', $matches); + assertType("array{0: string, 1: 'foo', 2: 'bar', 3?: 'baz'}", $matches); } - assertType('array{}|array{0: string, 1: non-empty-string, 2: non-empty-string, 3?: non-empty-string}', $matches); + assertType("array{}|array{0: string, 1: 'foo', 2: 'bar', 3?: 'baz'}", $matches); if (preg_match('/(foo)(bar)(baz){0,3}/', $s, $matches)) { - assertType('array{0: string, 1: non-empty-string, 2: non-empty-string, 3?: non-empty-string}', $matches); + assertType("array{0: string, 1: 'foo', 2: 'bar', 3?: non-empty-string}", $matches); } - assertType('array{}|array{0: string, 1: non-empty-string, 2: non-empty-string, 3?: non-empty-string}', $matches); + assertType("array{}|array{0: string, 1: 'foo', 2: 'bar', 3?: non-empty-string}", $matches); if (preg_match('/(foo)(bar)(baz){2,3}/', $s, $matches)) { - assertType('array{string, non-empty-string, non-empty-string, non-empty-string}', $matches); + assertType("array{string, 'foo', 'bar', non-empty-string}", $matches); } - assertType('array{}|array{string, non-empty-string, non-empty-string, non-empty-string}', $matches); + assertType("array{}|array{string, 'foo', 'bar', non-empty-string}", $matches); if (preg_match('/(foo)(bar)(baz){2}/', $s, $matches)) { - assertType('array{string, non-empty-string, non-empty-string, non-empty-string}', $matches); + assertType("array{string, 'foo', 'bar', non-empty-string}", $matches); } - assertType('array{}|array{string, non-empty-string, non-empty-string, non-empty-string}', $matches); + assertType("array{}|array{string, 'foo', 'bar', non-empty-string}", $matches); } function doNonCapturingGroup(string $s): void { @@ -115,9 +115,9 @@ function doNamedSubpattern(string $s): void { function doOffsetCapture(string $s): void { if (preg_match('/(foo)(bar)(baz)/', $s, $matches, PREG_OFFSET_CAPTURE)) { - assertType('array{array{string, int<0, max>}, array{non-empty-string, int<0, max>}, array{non-empty-string, int<0, max>}, array{non-empty-string, int<0, max>}}', $matches); + assertType("array{array{string, int<0, max>}, array{'foo', int<0, max>}, array{'bar', int<0, max>}, array{'baz', int<0, max>}}", $matches); } - assertType('array{}|array{array{string, int<0, max>}, array{non-empty-string, int<0, max>}, array{non-empty-string, int<0, max>}, array{non-empty-string, int<0, max>}}', $matches); + assertType("array{}|array{array{string, int<0, max>}, array{'foo', int<0, max>}, array{'bar', int<0, max>}, array{'baz', int<0, max>}}", $matches); } function doUnknownFlags(string $s, int $flags): void { @@ -233,10 +233,10 @@ function testUnionPattern(string $s): void function doFoo(string $row): void { if (preg_match('~^(a(b))$~', $row, $matches) === 1) { - assertType("array{string, non-empty-string, 'b'}", $matches); + assertType("array{string, 'ab', 'b'}", $matches); } if (preg_match('~^(a(b)?)$~', $row, $matches) === 1) { - assertType('array{0: string, 1: non-empty-string, 2?: non-empty-string}', $matches); + assertType("array{0: string, 1: non-empty-string, 2?: 'b'}", $matches); } if (preg_match('~^(a(b)?)?$~', $row, $matches) === 1) { assertType("array{0: string, 1?: non-empty-string, 2?: 'b'}", $matches); @@ -300,7 +300,7 @@ function (string $size): void { if (preg_match('~^a\.(b)?(c)?d~', $size, $matches) !== 1) { throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size)); } - assertType('array{0: string, 1?: non-empty-string, 2?: non-empty-string}', $matches); + assertType("array{0: string, 1?: 'b', 2?: 'c'}", $matches); }; function (string $size): void { @@ -321,7 +321,7 @@ function (string $size): void { if (preg_match('~\{(?:(include)\\s+(?:[$]?\\w+(?}, array{non-empty-string|null, int<-1, max>}, array{non-empty-string|null, int<-1, max>}, array{non-empty-string|null, int<-1, max>}}', $matches); + assertType("array{array{string|null, int<-1, max>}, array{'foo'|null, int<-1, max>}, array{'bar'|null, int<-1, max>}, array{'baz'|null, int<-1, max>}}", $matches); } - assertType('array{}|array{array{string|null, int<-1, max>}, array{non-empty-string|null, int<-1, max>}, array{non-empty-string|null, int<-1, max>}, array{non-empty-string|null, int<-1, max>}}', $matches); + assertType("array{}|array{array{string|null, int<-1, max>}, array{'foo'|null, int<-1, max>}, array{'bar'|null, int<-1, max>}, array{'baz'|null, int<-1, max>}}", $matches); } function doNonAutoCapturingModifier(string $s): void { diff --git a/tests/PHPStan/Analyser/nsrt/preg_replace_callback_shapes.php b/tests/PHPStan/Analyser/nsrt/preg_replace_callback_shapes.php index 574be40769..621051467e 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_replace_callback_shapes.php +++ b/tests/PHPStan/Analyser/nsrt/preg_replace_callback_shapes.php @@ -8,7 +8,7 @@ function (string $s): void { preg_replace_callback( '/(foo)?(bar)?(baz)?/', function ($matches) { - assertType('array{string, non-empty-string|null, non-empty-string|null, non-empty-string|null}', $matches); + assertType("array{string, 'foo'|null, 'bar'|null, 'baz'|null}", $matches); return ''; }, $s, @@ -22,7 +22,7 @@ function (string $s): void { preg_replace_callback( '/(foo)?(bar)?(baz)?/', function ($matches) { - assertType('array{0: array{string, int<0, max>}, 1?: array{non-empty-string, int<0, max>}, 2?: array{non-empty-string, int<0, max>}, 3?: array{non-empty-string, int<0, max>}}', $matches); + assertType("array{0: array{string, int<0, max>}, 1?: array{'foo', int<0, max>}, 2?: array{'bar', int<0, max>}, 3?: array{'baz', int<0, max>}}", $matches); return ''; }, $s, @@ -36,7 +36,7 @@ function (string $s): void { preg_replace_callback( '/(foo)?(bar)?(baz)?/', function ($matches) { - assertType('array{array{string|null, int<-1, max>}, array{non-empty-string|null, int<-1, max>}, array{non-empty-string|null, int<-1, max>}, array{non-empty-string|null, int<-1, max>}}', $matches); + assertType("array{array{string|null, int<-1, max>}, array{'foo'|null, int<-1, max>}, array{'bar'|null, int<-1, max>}, array{'baz'|null, int<-1, max>}}", $matches); return ''; }, $s, From fd8c5001eee2ccb42ef3293d96653c3cf03e5a95 Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Sun, 4 Aug 2024 08:39:13 +0200 Subject: [PATCH 3/8] fix reset groups --- src/Type/Php/RegexGroupParser.php | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/Type/Php/RegexGroupParser.php b/src/Type/Php/RegexGroupParser.php index 3cb5c8605d..567339ce47 100644 --- a/src/Type/Php/RegexGroupParser.php +++ b/src/Type/Php/RegexGroupParser.php @@ -118,16 +118,26 @@ private function walkRegexAst( { $group = null; if ($ast->getId() === '#capturing') { + $maybeConstant = !$repeatedMoreThenOnce; + if ($parentGroup !== null && $parentGroup->resetsGroupCounter()) { + $maybeConstant = false; + } + $group = new RegexCapturingGroup( $captureGroupId++, null, $inAlternation ? $alternationId : null, $inOptionalQuantification, $parentGroup, - $this->createGroupType($ast, $repeatedMoreThenOnce), + $this->createGroupType($ast, $maybeConstant), ); $parentGroup = $group; } elseif ($ast->getId() === '#namedcapturing') { + $maybeConstant = !$repeatedMoreThenOnce; + if ($parentGroup !== null && $parentGroup->resetsGroupCounter()) { + $maybeConstant = false; + } + $name = $ast->getChild(0)->getValueValue(); $group = new RegexCapturingGroup( $captureGroupId++, @@ -135,7 +145,7 @@ private function walkRegexAst( $inAlternation ? $alternationId : null, $inOptionalQuantification, $parentGroup, - $this->createGroupType($ast, $repeatedMoreThenOnce), + $this->createGroupType($ast, $maybeConstant), ); $parentGroup = $group; } elseif ($ast->getId() === '#noncapturing') { @@ -267,7 +277,7 @@ private function getQuantificationRange(TreeNode $node): array return [$min, $max]; } - private function createGroupType(TreeNode $group, bool $repeatedMoreThenOnce): Type + private function createGroupType(TreeNode $group, bool $maybeConstant): Type { $isNonEmpty = TrinaryLogic::createMaybe(); $isNumeric = TrinaryLogic::createMaybe(); @@ -276,7 +286,7 @@ private function createGroupType(TreeNode $group, bool $repeatedMoreThenOnce): T $this->walkGroupAst($group, $isNonEmpty, $isNumeric, $inOptionalQuantification, $onlyLiterals); - if (!$repeatedMoreThenOnce && $onlyLiterals !== null && $onlyLiterals !== []) { + if ($maybeConstant && $onlyLiterals !== null && $onlyLiterals !== []) { return new ConstantStringType(implode('', $onlyLiterals)); } From b46b7ded6aea1b4be1bdc9589a01896af0821ec4 Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Sun, 4 Aug 2024 08:40:02 +0200 Subject: [PATCH 4/8] Update preg_match_shapes.php --- tests/PHPStan/Analyser/nsrt/preg_match_shapes.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php index cda127aaed..e75e5f6fb6 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php +++ b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php @@ -492,7 +492,7 @@ function bug11323(string $s): void { assertType('array{string, non-empty-string, non-empty-string, non-empty-string}', $matches); } if (preg_match('{(a)??(b)*+(c++)(d)+?}', $s, $matches)) { - assertType("array{string, string, string, non-empty-string, 'd'}", $matches); + assertType("array{string, ''|'a', string, non-empty-string, non-empty-string}", $matches); } if (preg_match('{(.\d)}', $s, $matches)) { assertType('array{string, non-empty-string}', $matches); From cfa1882301ed872a80328912730d6a550f9b1910 Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Sun, 4 Aug 2024 08:44:08 +0200 Subject: [PATCH 5/8] Update bug-11311-php72.php --- tests/PHPStan/Analyser/nsrt/bug-11311-php72.php | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/PHPStan/Analyser/nsrt/bug-11311-php72.php b/tests/PHPStan/Analyser/nsrt/bug-11311-php72.php index 3b60727e84..7b75770c54 100644 --- a/tests/PHPStan/Analyser/nsrt/bug-11311-php72.php +++ b/tests/PHPStan/Analyser/nsrt/bug-11311-php72.php @@ -14,17 +14,17 @@ function doFoo(string $s) { function doUnmatchedAsNull(string $s): void { if (preg_match('/(foo)?(bar)?(baz)?/', $s, $matches, PREG_UNMATCHED_AS_NULL)) { - assertType('array{0: string, 1?: non-empty-string, 2?: non-empty-string, 3?: non-empty-string}', $matches); + assertType("array{0: string, 1?: 'foo', 2?: 'bar', 3?: 'baz'}", $matches); } - assertType('array{}|array{0: string, 1?: non-empty-string, 2?: non-empty-string, 3?: non-empty-string}', $matches); + assertType("array{}|array{0: string, 1?: 'foo', 2?: 'bar', 3?: 'baz'}", $matches); } // see https://3v4l.org/VeDob#veol function unmatchedAsNullWithOptionalGroup(string $s): void { if (preg_match('/Price: (£|€)?\d+/', $s, $matches, PREG_UNMATCHED_AS_NULL)) { - assertType('array{0: string, 1?: non-empty-string}', $matches); + assertType("array{0: string, 1?: 'foo', 2?: 'bar', 3?: 'baz'}", $matches); } else { assertType('array{}', $matches); } - assertType('array{}|array{0: string, 1?: non-empty-string}', $matches); + assertType("array{}|array{0: string, 1?: 'foo', 2?: 'bar', 3?: 'baz'}", $matches); } From 37fbdbb8a121cfa41a658946e2941aac214f1b64 Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Sun, 4 Aug 2024 08:49:00 +0200 Subject: [PATCH 6/8] fix php 7.2 --- src/Type/Php/RegexGroupParser.php | 2 -- tests/PHPStan/Analyser/nsrt/bug-11311-php72.php | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Type/Php/RegexGroupParser.php b/src/Type/Php/RegexGroupParser.php index 567339ce47..ff08a0dfba 100644 --- a/src/Type/Php/RegexGroupParser.php +++ b/src/Type/Php/RegexGroupParser.php @@ -344,8 +344,6 @@ private function walkGroupAst(TreeNode $ast, TrinaryLogic &$isNonEmpty, TrinaryL } } elseif (!in_array($ast->getId(), ['#capturing', '#namedcapturing'], true)) { $onlyLiterals = null; - } else { - $x = 1; } // [^0-9] should not parse as numeric-string, and [^list-everything-but-numbers] is technically diff --git a/tests/PHPStan/Analyser/nsrt/bug-11311-php72.php b/tests/PHPStan/Analyser/nsrt/bug-11311-php72.php index 7b75770c54..51fc1a05be 100644 --- a/tests/PHPStan/Analyser/nsrt/bug-11311-php72.php +++ b/tests/PHPStan/Analyser/nsrt/bug-11311-php72.php @@ -22,9 +22,9 @@ function doUnmatchedAsNull(string $s): void { // see https://3v4l.org/VeDob#veol function unmatchedAsNullWithOptionalGroup(string $s): void { if (preg_match('/Price: (£|€)?\d+/', $s, $matches, PREG_UNMATCHED_AS_NULL)) { - assertType("array{0: string, 1?: 'foo', 2?: 'bar', 3?: 'baz'}", $matches); + assertType("array{0: string, 1?: non-empty-string}", $matches); } else { assertType('array{}', $matches); } - assertType("array{}|array{0: string, 1?: 'foo', 2?: 'bar', 3?: 'baz'}", $matches); + assertType("array{}|array{0: string, 1?: non-empty-string}", $matches); } From cc7adc2a079a1b32a44bf4df8fac9cec608d1113 Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Sun, 4 Aug 2024 08:59:17 +0200 Subject: [PATCH 7/8] Update preg_match_shapes.php --- tests/PHPStan/Analyser/nsrt/preg_match_shapes.php | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php index e75e5f6fb6..a940e62ff4 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php +++ b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php @@ -558,3 +558,15 @@ function (string $s): void { assertType("array{string, '£', 'abc'}|array{string, numeric-string, 'b'}", $matches); } }; + +function (string $s): void { + if (rand(0,1)) { + $p = '/Price: (£)/i'; + } else { + $p = '/Price: (£|(\d)|(x))/i'; + } + + if (preg_match($p, $s, $matches)) { + assertType("array{0: string, 1: non-empty-string, 2?: numeric-string, 3?: 'x'}", $matches); + } +}; From edcd9fe117a0d40a894ed3ac8af667f1c52d2cc3 Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Sun, 4 Aug 2024 09:28:40 +0200 Subject: [PATCH 8/8] typo --- src/Type/Php/RegexGroupParser.php | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Type/Php/RegexGroupParser.php b/src/Type/Php/RegexGroupParser.php index ff08a0dfba..f63959e78d 100644 --- a/src/Type/Php/RegexGroupParser.php +++ b/src/Type/Php/RegexGroupParser.php @@ -113,12 +113,12 @@ private function walkRegexAst( array &$groupCombinations, array &$markVerbs, bool $captureOnlyNamed, - bool $repeatedMoreThenOnce, + bool $repeatedMoreThanOnce, ): void { $group = null; if ($ast->getId() === '#capturing') { - $maybeConstant = !$repeatedMoreThenOnce; + $maybeConstant = !$repeatedMoreThanOnce; if ($parentGroup !== null && $parentGroup->resetsGroupCounter()) { $maybeConstant = false; } @@ -133,7 +133,7 @@ private function walkRegexAst( ); $parentGroup = $group; } elseif ($ast->getId() === '#namedcapturing') { - $maybeConstant = !$repeatedMoreThenOnce; + $maybeConstant = !$repeatedMoreThanOnce; if ($parentGroup !== null && $parentGroup->resetsGroupCounter()) { $maybeConstant = false; } @@ -175,7 +175,7 @@ private function walkRegexAst( } if ($max === null || $max > 1) { - $repeatedMoreThenOnce = true; + $repeatedMoreThanOnce = true; } } @@ -217,7 +217,7 @@ private function walkRegexAst( $groupCombinations, $markVerbs, $captureOnlyNamed, - $repeatedMoreThenOnce, + $repeatedMoreThanOnce, ); if ($ast->getId() !== '#alternation') {