Skip to content

Commit 0ce2624

Browse files
committed
Implement array shapes for preg_match()
fix n modifier support PREG_OFFSET_CAPTURE cs separated 8.2 tests support PREG_OFFSET_CAPTURE|PREG_UNMATCHED_AS_NULL fix
1 parent 16d5b01 commit 0ce2624

File tree

6 files changed

+291
-0
lines changed

6 files changed

+291
-0
lines changed

conf/config.neon

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1450,6 +1450,14 @@ services:
14501450
tags:
14511451
- phpstan.dynamicFunctionThrowTypeExtension
14521452

1453+
-
1454+
class: PHPStan\Type\Php\PregMatchTypeSpecifyingExtension
1455+
tags:
1456+
- phpstan.typeSpecifier.functionTypeSpecifyingExtension
1457+
1458+
-
1459+
class: PHPStan\Type\Php\RegexShapeMatcher
1460+
14531461
-
14541462
class: PHPStan\Type\Php\ReflectionClassConstructorThrowTypeExtension
14551463
tags:
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
<?php declare(strict_types = 1);
2+
3+
namespace PHPStan\Type\Php;
4+
5+
use PhpParser\Node\Expr\FuncCall;
6+
use PHPStan\Analyser\Scope;
7+
use PHPStan\Analyser\SpecifiedTypes;
8+
use PHPStan\Analyser\TypeSpecifier;
9+
use PHPStan\Analyser\TypeSpecifierAwareExtension;
10+
use PHPStan\Analyser\TypeSpecifierContext;
11+
use PHPStan\Reflection\FunctionReflection;
12+
use PHPStan\Type\Constant\ConstantIntegerType;
13+
use PHPStan\Type\FunctionTypeSpecifyingExtension;
14+
use PHPStan\Type\TypeCombinator;
15+
use function count;
16+
use function in_array;
17+
use function strtolower;
18+
use const PREG_OFFSET_CAPTURE;
19+
use const PREG_UNMATCHED_AS_NULL;
20+
21+
final class PregMatchTypeSpecifyingExtension implements FunctionTypeSpecifyingExtension, TypeSpecifierAwareExtension
22+
{
23+
24+
private TypeSpecifier $typeSpecifier;
25+
26+
public function __construct(private RegexShapeMatcher $regexShapeMatcher)
27+
{
28+
}
29+
30+
public function setTypeSpecifier(TypeSpecifier $typeSpecifier): void
31+
{
32+
$this->typeSpecifier = $typeSpecifier;
33+
}
34+
35+
public function isFunctionSupported(FunctionReflection $functionReflection, FuncCall $node, TypeSpecifierContext $context): bool
36+
{
37+
return in_array(strtolower($functionReflection->getName()), ['preg_match'], true);
38+
}
39+
40+
public function specifyTypes(FunctionReflection $functionReflection, FuncCall $node, Scope $scope, TypeSpecifierContext $context): SpecifiedTypes
41+
{
42+
$args = $node->getArgs();
43+
if (count($args) < 2) {
44+
return new SpecifiedTypes();
45+
}
46+
47+
$patternArg = $args[0] ?? null;
48+
$matchesArg = $args[2] ?? null;
49+
$flagsArg = $args[3] ?? null;
50+
51+
if ($patternArg === null || $matchesArg === null) {
52+
return new SpecifiedTypes();
53+
}
54+
55+
$patternType = $scope->getType($patternArg->value);
56+
$constantStrings = $patternType->getConstantStrings();
57+
if (count($constantStrings) === 0) {
58+
return new SpecifiedTypes();
59+
}
60+
61+
$flags = null;
62+
if ($flagsArg !== null) {
63+
$flagsType = $scope->getType($flagsArg->value);
64+
65+
if (
66+
!$flagsType instanceof ConstantIntegerType
67+
|| !in_array($flagsType->getValue(), [PREG_OFFSET_CAPTURE, PREG_UNMATCHED_AS_NULL, PREG_OFFSET_CAPTURE | PREG_UNMATCHED_AS_NULL], true)
68+
) {
69+
return new SpecifiedTypes();
70+
}
71+
72+
$flags = $flagsType->getValue();
73+
}
74+
75+
$matchedTypes = [];
76+
foreach ($constantStrings as $constantString) {
77+
$matchedTypes[] = $this->regexShapeMatcher->matchType($constantString->getValue(), $flags, $context);
78+
}
79+
80+
return $this->typeSpecifier->create(
81+
$matchesArg->value,
82+
TypeCombinator::union(...$matchedTypes),
83+
$context,
84+
false,
85+
$scope,
86+
);
87+
}
88+
89+
}

src/Type/Php/RegexShapeMatcher.php

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
<?php declare(strict_types = 1);
2+
3+
namespace PHPStan\Type\Php;
4+
5+
use PHPStan\Analyser\TypeSpecifierContext;
6+
use PHPStan\Type\ArrayType;
7+
use PHPStan\Type\Constant\ConstantArrayTypeBuilder;
8+
use PHPStan\Type\Constant\ConstantIntegerType;
9+
use PHPStan\Type\Constant\ConstantStringType;
10+
use PHPStan\Type\IntegerRangeType;
11+
use PHPStan\Type\MixedType;
12+
use PHPStan\Type\StringType;
13+
use PHPStan\Type\Type;
14+
use PHPStan\Type\TypeCombinator;
15+
use function array_key_last;
16+
use function array_keys;
17+
use function is_string;
18+
use function preg_match;
19+
use function preg_replace;
20+
use const PREG_OFFSET_CAPTURE;
21+
use const PREG_UNMATCHED_AS_NULL;
22+
23+
final class RegexShapeMatcher
24+
{
25+
26+
/**
27+
* @param int-mask<PREG_OFFSET_CAPTURE|PREG_UNMATCHED_AS_NULL>|null $flags
28+
*/
29+
public function matchType(string $regex, ?int $flags, TypeSpecifierContext $context): Type
30+
{
31+
if ($flags !== null) {
32+
$trickFlags = PREG_UNMATCHED_AS_NULL | $flags;
33+
} else {
34+
$trickFlags = PREG_UNMATCHED_AS_NULL;
35+
}
36+
37+
// add one capturing group to the end so all capture group keys
38+
// are present in the $matches
39+
// see https://3v4l.org/sOXbn, https://3v4l.org/3SdDM
40+
$regex = preg_replace('~^(.)(.*)\K(\1\w*$)~', '|(?<phpstan_named_capture_group_last>)$3', $regex);
41+
42+
if (
43+
$regex === null
44+
|| @preg_match($regex, '', $matches, $trickFlags) === false
45+
) {
46+
return new ArrayType(new MixedType(), new StringType());
47+
}
48+
unset($matches[array_key_last($matches)]);
49+
unset($matches['phpstan_named_capture_group_last']);
50+
51+
$builder = ConstantArrayTypeBuilder::createEmpty();
52+
foreach (array_keys($matches) as $key) {
53+
// atm we can't differentiate optional from mandatory groups based on the pattern.
54+
// So we assume all are optional
55+
$optional = true;
56+
57+
$keyType = $this->getKeyType($key);
58+
$valueType = $this->getValueType($flags ?? 0);
59+
60+
if ($context->true() && $key === 0) {
61+
$optional = false;
62+
}
63+
64+
$builder->setOffsetValueType(
65+
$keyType,
66+
$valueType,
67+
$optional,
68+
);
69+
}
70+
71+
return $builder->getArray();
72+
}
73+
74+
private function getKeyType(int|string $key): Type
75+
{
76+
if (is_string($key)) {
77+
return new ConstantStringType($key);
78+
}
79+
80+
return new ConstantIntegerType($key);
81+
}
82+
83+
private function getValueType(int $flags): Type
84+
{
85+
$valueType = new StringType();
86+
$offsetType = IntegerRangeType::fromInterval(0, null);
87+
if (($flags & PREG_UNMATCHED_AS_NULL) !== 0) {
88+
$valueType = TypeCombinator::addNull($valueType);
89+
// unmatched groups return -1 as offset
90+
$offsetType = IntegerRangeType::fromInterval(-1, null);
91+
}
92+
93+
if (($flags & PREG_OFFSET_CAPTURE) !== 0) {
94+
$builder = ConstantArrayTypeBuilder::createEmpty();
95+
96+
$builder->setOffsetValueType(
97+
new ConstantIntegerType(0),
98+
$valueType,
99+
);
100+
$builder->setOffsetValueType(
101+
new ConstantIntegerType(1),
102+
$offsetType,
103+
);
104+
105+
return $builder->getArray();
106+
}
107+
108+
return $valueType;
109+
}
110+
111+
}

tests/PHPStan/Analyser/NodeScopeResolverTest.php

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1480,6 +1480,11 @@ public function dataFileAsserts(): iterable
14801480
yield from $this->gatherAssertTypes(__DIR__ . '/data/bug-10952b.php');
14811481
yield from $this->gatherAssertTypes(__DIR__ . '/data/case-insensitive-parent.php');
14821482
yield from $this->gatherAssertTypes(__DIR__ . '/data/bug-10893.php');
1483+
1484+
if (PHP_VERSION_ID >= 80200) {
1485+
yield from $this->gatherAssertTypes(__DIR__ . '/data/preg_match_shapes_php82.php');
1486+
}
1487+
yield from $this->gatherAssertTypes(__DIR__ . '/data/preg_match_shapes.php');
14831488
}
14841489

14851490
/**
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
<?php
2+
3+
namespace PregMatchShapes;
4+
5+
use function PHPStan\Testing\assertType;
6+
7+
function doMatch(string $s): void {
8+
if (preg_match('/Price: (£|€)\d+/', $s, $matches)) {
9+
assertType('array{0: string, 1?: string}', $matches);
10+
}
11+
assertType('array<string>', $matches);
12+
13+
if (preg_match('/Price: (£|€)(\d+)/i', $s, $matches)) {
14+
assertType('array{0: string, 1?: string, 2?: string}', $matches);
15+
}
16+
assertType('array<string>', $matches);
17+
18+
if (preg_match('/(a)(b)*(c)(d)*/', $s, $matches)) {
19+
assertType('array{0: string, 1?: string, 2?: string, 3?: string, 4?: string}', $matches);
20+
}
21+
assertType('array<string>', $matches);
22+
}
23+
24+
function doNonCapturingGroup(string $s): void {
25+
if (preg_match('/Price: (?:£|€)(\d+)/', $s, $matches)) {
26+
assertType('array{0: string, 1?: string}', $matches);
27+
}
28+
assertType('array<string>', $matches);
29+
}
30+
31+
function doNamedSubpattern(string $s): void {
32+
if (preg_match('/\w-(?P<num>\d+)-(\w)/', $s, $matches)) {
33+
assertType('array{0: string, num?: string, 1?: string, 2?: string}', $matches);
34+
}
35+
assertType('array<string>', $matches);
36+
}
37+
38+
function doOffsetCapture(string $s): void {
39+
if (preg_match('/(foo)(bar)(baz)/', $s, $matches, PREG_OFFSET_CAPTURE)) {
40+
assertType('array{0: array{string, int<0, max>}, 1?: array{string, int<0, max>}, 2?: array{string, int<0, max>}, 3?: array{string, int<0, max>}}', $matches);
41+
}
42+
assertType('array<array{string, int<-1, max>}>', $matches);
43+
}
44+
45+
function doUnmatchedAsNull(string $s): void {
46+
if (preg_match('/(foo)?(bar)?(baz)?/', $s, $matches, PREG_UNMATCHED_AS_NULL)) {
47+
assertType('array{0: string|null, 1?: string|null, 2?: string|null, 3?: string|null}', $matches);
48+
}
49+
assertType('array<string|null>', $matches);
50+
}
51+
52+
function doOffsetCaptureWithUnmatchedNull(string $s): void {
53+
// see https://3v4l.org/07rBO#v8.2.9
54+
if (preg_match('/(foo)(bar)(baz)/', $s, $matches, PREG_OFFSET_CAPTURE|PREG_UNMATCHED_AS_NULL)) {
55+
assertType('array{0: array{null, -1}|array{string, int<0, max>}, 1?: array{null, -1}|array{string, int<0, max>}, 2?: array{null, -1}|array{string, int<0, max>}, 3?: array{null, -1}|array{string, int<0, max>}}', $matches);
56+
}
57+
assertType('array<array{null, -1}|array{string, int<0, max>}>', $matches);
58+
}
59+
60+
function doUnknownFlags(string $s, int $flags): void {
61+
if (preg_match('/(foo)(bar)(baz)/', 'foobarbaz', $matches, $flags)) {
62+
assertType('array<array{string|null, int<-1, max>}|string|null>', $matches);
63+
}
64+
assertType('array<array{string|null, int<-1, max>}|string|null>', $matches);
65+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
<?php
2+
3+
namespace PregMatchShapesPhp82;
4+
5+
use function PHPStan\Testing\assertType;
6+
7+
function doOnlyNamedSubpattern(string $s): void {
8+
// n modifier captures only named groups
9+
if (preg_match('/(\w)-(?P<num>\d+)-(\w)/n', $s, $matches)) {
10+
assertType('array{0: string, num?: string, 1?: string}', $matches);
11+
}
12+
assertType('array<string>', $matches);
13+
}

0 commit comments

Comments
 (0)