diff --git a/resources/RegexGrammar.pp b/resources/RegexGrammar.pp index ec3b0e7adb..b8bea027d3 100644 --- a/resources/RegexGrammar.pp +++ b/resources/RegexGrammar.pp @@ -41,10 +41,6 @@ // @license New BSD License // - -// Skip. -%skip nl \n - // Character classes. %token negative_class_ \[\^ -> class %token class_ \[ -> class @@ -58,7 +54,7 @@ %token class:character \\([aefnrtb]|c[\x00-\x7f]) %token class:dynamic_character \\([0-7]{3}|x[0-9a-zA-Z]{2}|x{[0-9a-zA-Z]+}) %token class:character_type \\([CdDhHNRsSvVwWX]|[pP]{[^}]+}) -%token class:literal \\.|. +%token class:literal \\.|.|\n // Internal options. // See https://www.regular-expressions.info/refmodifiers.html @@ -82,6 +78,11 @@ %token co:_comment \) -> default %token co:comment .*?(?=(? mark +%token mark:name [^)]+ +%token mark:_marker \) -> default + // Capturing group. %token named_capturing_ \(\?P?< -> nc %token nc:_named_capturing > -> default @@ -122,7 +123,7 @@ %token character_type \\([CdDhHNRsSvVwWX]|[pP]{[^}]+}) %token anchor \\([bBAZzG])|\^|\$ %token match_point_reset \\K -%token literal \\.|. +%token literal \\.|.|\n // Rules. @@ -190,7 +191,8 @@ | literal() #capturing: - ::comment_:: ? ::_comment:: #comment + ::marker_:: ::_marker:: #mark + | ::comment_:: ? ::_comment:: #comment | ( ::named_capturing_:: ::_named_capturing:: #namedcapturing | ::non_capturing_:: #noncapturing diff --git a/src/Type/Php/RegexArrayShapeMatcher.php b/src/Type/Php/RegexArrayShapeMatcher.php index 6f7bfcb504..1b37617834 100644 --- a/src/Type/Php/RegexArrayShapeMatcher.php +++ b/src/Type/Php/RegexArrayShapeMatcher.php @@ -125,7 +125,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched // regex could not be parsed by Hoa/Regex return null; } - [$groupList, $groupCombinations] = $parseResult; + [$groupList, $groupCombinations, $markVerbs] = $parseResult; $trailingOptionals = 0; foreach (array_reverse($groupList) as $captureGroup) { @@ -152,6 +152,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched $wasMatched, $trailingOptionals, $flags ?? 0, + $markVerbs, ); if (!$this->containsUnmatchedAsNull($flags ?? 0)) { @@ -189,6 +190,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched $wasMatched, $trailingOptionals, $flags ?? 0, + $markVerbs, ); $combiTypes[] = $combiType; @@ -211,6 +213,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched $wasMatched, $trailingOptionals, $flags ?? 0, + $markVerbs, ); } @@ -266,12 +269,14 @@ private function getOnlyTopLevelAlternationId(array $captureGroups): ?int /** * @param array $captureGroups + * @param list $markVerbs */ private function buildArrayType( array $captureGroups, TrinaryLogic $wasMatched, int $trailingOptionals, int $flags, + array $markVerbs, ): Type { $builder = ConstantArrayTypeBuilder::createEmpty(); @@ -325,6 +330,18 @@ private function buildArrayType( $i++; } + if (count($markVerbs) > 0) { + $markTypes = []; + foreach ($markVerbs as $mark) { + $markTypes[] = new ConstantStringType($mark); + } + $builder->setOffsetValueType( + $this->getKeyType('MARK'), + TypeCombinator::union(...$markTypes), + true, + ); + } + return $builder->getArray(); } @@ -372,7 +389,7 @@ private function getValueType(Type $baseType, int $flags): Type } /** - * @return array{array, array>}|null + * @return array{array, array>, list}|null */ private function parseGroups(string $regex): ?array { @@ -398,6 +415,7 @@ private function parseGroups(string $regex): ?array $groupCombinations = []; $alternationId = -1; $captureGroupId = 100; + $markVerbs = []; $this->walkRegexAst( $ast, false, @@ -408,14 +426,16 @@ private function parseGroups(string $regex): ?array $captureGroupId, $capturingGroups, $groupCombinations, + $markVerbs, ); - return [$capturingGroups, $groupCombinations]; + return [$capturingGroups, $groupCombinations, $markVerbs]; } /** * @param array $capturingGroups * @param array> $groupCombinations + * @param list $markVerbs */ private function walkRegexAst( TreeNode $ast, @@ -427,6 +447,7 @@ private function walkRegexAst( int &$captureGroupId, array &$capturingGroups, array &$groupCombinations, + array &$markVerbs, ): void { $group = null; @@ -441,7 +462,7 @@ private function walkRegexAst( ); $parentGroup = $group; } elseif ($ast->getId() === '#namedcapturing') { - $name = $ast->getChild(0)->getValue()['value']; + $name = $ast->getChild(0)->getValueValue(); $group = new RegexCapturingGroup( $captureGroupId++, $name, @@ -483,6 +504,11 @@ private function walkRegexAst( $inAlternation = true; } + if ($ast->getId() === '#mark') { + $markVerbs[] = $ast->getChild(0)->getValueValue(); + return; + } + if ($group instanceof RegexCapturingGroup) { $capturingGroups[$group->getId()] = $group; @@ -506,6 +532,7 @@ private function walkRegexAst( $captureGroupId, $capturingGroups, $groupCombinations, + $markVerbs, ); if ($ast->getId() !== '#alternation') { diff --git a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php index 5fb5f3a3df..c273a83a47 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php +++ b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php @@ -518,6 +518,12 @@ function bug11323(string $s): void { if (preg_match('{([^1-4])}', $s, $matches)) { assertType('array{string, non-empty-string}', $matches); } + if (preg_match("{([\r\n]+)(\n)([\n])}", $s, $matches)) { + assertType('array{string, non-empty-string, non-empty-string, non-empty-string}', $matches); + } + if (preg_match('/foo(*:first)|bar(*:second)([x])/', $s, $matches)) { + assertType("array{0: string, 1?: non-empty-string, MARK?: 'first'|'second'}", $matches); + } } function (string $s): void {