diff --git a/resources/RegexGrammar.pp b/resources/RegexGrammar.pp index b8bea027d3..ba174feb29 100644 --- a/resources/RegexGrammar.pp +++ b/resources/RegexGrammar.pp @@ -42,14 +42,16 @@ // // Character classes. +// tokens suffixed with "fc_" are the same as without such suffix but followed by "class:_class" +%token negative_class_fc_ \[\^(?=\]) -> class_fc +%token class_fc_ \[(?=\]) -> class_fc +%token class_fc:_class \] -> class %token negative_class_ \[\^ -> class %token class_ \[ -> class %token class:posix_class \[:\^?[a-z]+:\] %token class:class_ \[ -%token class:_class_literal (?<=[^\\]\[|[^\\]\[\^)\] %token class:_class \] -> default %token class:range \- -%token class:escaped_end_class \\\] // taken over from literals but class:character has \b support on top (backspace in character classes) %token class:character \\([aefnrtb]|c[\x00-\x7f]) %token class:dynamic_character \\([0-7]{3}|x[0-9a-zA-Z]{2}|x{[0-9a-zA-Z]+}) @@ -58,7 +60,8 @@ // Internal options. // See https://www.regular-expressions.info/refmodifiers.html -%token internal_option \(\?([imsxnJUX^]|xx)?-?([imsxnJUX^]|xx)\) +// and https://www.php.net/manual/en/regexp.reference.internal-options.php +%token internal_option \(\?[imsxnJUX^]*-?[imsxnJUX^]+\) // Lookahead and lookbehind assertions. %token lookahead_ \(\?= @@ -88,7 +91,7 @@ %token nc:_named_capturing > -> default %token nc:capturing_name .+?(?=(?) %token non_capturing_ \(\?: -%token non_capturing_internal_option \(\?([imsxnJUX^]|xx)?-?([imsxnJUX^]|xx): +%token non_capturing_internal_option \(\?[imsxnJUX^]*-?[imsxnJUX^]+: %token non_capturing_reset_ \(\?\| %token atomic_group_ \(\?> %token capturing_ \( @@ -177,10 +180,14 @@ #class: ( - ::negative_class_:: #negativeclass + ::negative_class_fc_:: #negativeclass + <_class> + | ::class_fc_:: + <_class> + | ::negative_class_:: #negativeclass | ::class_:: ) - ( | <_class_literal> )? ( | | range() | literal() | )* ? + ? ( | | range() ? | literal() )* ? ::_class:: #range: diff --git a/src/Type/Regex/RegexGroupParser.php b/src/Type/Regex/RegexGroupParser.php index 9780b2c69a..ec944bb630 100644 --- a/src/Type/Regex/RegexGroupParser.php +++ b/src/Type/Regex/RegexGroupParser.php @@ -525,11 +525,11 @@ private function getLiteralValue(TreeNode $node, ?array &$onlyLiterals, bool $ap if ( in_array($token, [ - 'literal', 'escaped_end_class', + 'literal', // literal "-" in front/back of a character class like '[-a-z]' or '[abc-]', not forming a range 'range', // literal "[" or "]" inside character classes '[[]' or '[]]' - 'class_', '_class_literal', + 'class_', '_class', ], true) ) { if (str_contains($patternModifiers, 'x') && trim($value) === '') { @@ -544,7 +544,6 @@ private function getLiteralValue(TreeNode $node, ?array &$onlyLiterals, bool $ap if ( $appendLiterals - && in_array($token, ['literal', 'range', 'class_', '_class_literal'], true) && $onlyLiterals !== null && (!in_array($value, ['.'], true) || $isEscaped || $inCharacterClass) ) { diff --git a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php index f92af453fb..0a6b883e4c 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php +++ b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php @@ -467,6 +467,9 @@ function bug11323(string $s): void { if (preg_match('{([-\p{L}[\]*|\x03\a\b+?{}(?:)-]+[^[:digit:]?{}a-z0-9#-k]+)(a-z)}', $s, $matches)) { assertType("array{string, non-falsy-string, 'a-z'}", $matches); } + if (preg_match('{(\d+)(?i)insensitive((?xs-i)case SENSITIVE here.+and dot matches new lines)}', $s, $matches)) { + assertType('array{string, numeric-string, non-falsy-string}', $matches); + } if (preg_match('{(\d+)(?i)insensitive((?x-i)case SENSITIVE here(?i:insensitive non-capturing group))}', $s, $matches)) { assertType('array{string, numeric-string, non-falsy-string}', $matches); } @@ -778,3 +781,121 @@ function testLtrimDelimiter (string $string): void { assertType("array{string, 'x'}", $matches); } } + +function testUnescapeBackslash (string $string): void { + if (preg_match(<<<'EOD' + ~(\[)~ + EOD, $string, $matches)) { + assertType("array{string, '['}", $matches); + } + + if (preg_match(<<<'EOD' + ~(\d)~ + EOD, $string, $matches)) { + assertType("array{string, numeric-string}", $matches); + } + + if (preg_match(<<<'EOD' + ~(\\d)~ + EOD, $string, $matches)) { + assertType("array{string, '\\\d'}", $matches); + } + + if (preg_match(<<<'EOD' + ~(\\\d)~ + EOD, $string, $matches)) { + assertType("array{string, non-falsy-string}", $matches); + } + + if (preg_match(<<<'EOD' + ~(\\\\d)~ + EOD, $string, $matches)) { + assertType("array{string, '\\\\\\\d'}", $matches); + } +} + +function testEscapedDelimiter (string $string): void { + if (preg_match(<<<'EOD' + /(\/)/ + EOD, $string, $matches)) { + assertType("array{string, '/'}", $matches); + } + + if (preg_match(<<<'EOD' + ~(\~)~ + EOD, $string, $matches)) { + assertType("array{string, '~'}", $matches); + } + + if (preg_match(<<<'EOD' + ~(\[2])~ + EOD, $string, $matches)) { + assertType("array{string, '[2]'}", $matches); + } + + if (preg_match(<<<'EOD' + [(\[2\])] + EOD, $string, $matches)) { + assertType("array{string, '[2]'}", $matches); + } + + if (preg_match(<<<'EOD' + ~(\{2})~ + EOD, $string, $matches)) { + assertType("array{string, '{2}'}", $matches); + } + + if (preg_match(<<<'EOD' + {(\{2\})} + EOD, $string, $matches)) { + assertType("array{string, '{2}'}", $matches); + } + + if (preg_match(<<<'EOD' + ~([a\]])~ + EOD, $string, $matches)) { + assertType("array{string, ']'|'a'}", $matches); + } + + if (preg_match(<<<'EOD' + ~([a[])~ + EOD, $string, $matches)) { + assertType("array{string, '['|'a'}", $matches); + } + + if (preg_match(<<<'EOD' + ~([a\]b])~ + EOD, $string, $matches)) { + assertType("array{string, ']'|'a'|'b'}", $matches); + } + + if (preg_match(<<<'EOD' + ~([a[b])~ + EOD, $string, $matches)) { + assertType("array{string, '['|'a'|'b'}", $matches); + } + + if (preg_match(<<<'EOD' + ~([a\[b])~ + EOD, $string, $matches)) { + assertType("array{string, '['|'a'|'b'}", $matches); + } + + if (preg_match(<<<'EOD' + [([a\[b])] + EOD, $string, $matches)) { + assertType("array{string, '['|'a'|'b'}", $matches); + } + + if (preg_match(<<<'EOD' + {(x\\\{)|(y\\\\\})} + EOD, $string, $matches)) { + assertType("array{string, '', 'y\\\\\\\}'}|array{string, 'x\\\{'}", $matches); + } +} + +function bugUnescapedDashAfterRange (string $string): void { + if (preg_match('/([0-1-y])/', $string, $matches)) { + assertType("array{string, non-empty-string}", $matches); + } +}