Skip to content

Commit 271766e

Browse files
authored
RegexArrayShapeMatcher - when all groups are optional return a more precise union
1 parent 877ff0a commit 271766e

File tree

4 files changed

+228
-35
lines changed

4 files changed

+228
-35
lines changed

src/Type/Php/RegexArrayShapeMatcher.php

Lines changed: 134 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -73,14 +73,88 @@ public function matchType(Type $patternType, ?Type $flagsType, TrinaryLogic $was
7373
*/
7474
private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched): ?Type
7575
{
76-
$captureGroups = $this->parseGroups($regex);
77-
if ($captureGroups === null) {
76+
$groupList = $this->parseGroups($regex);
77+
if ($groupList === null) {
7878
// regex could not be parsed by Hoa/Regex
7979
return null;
8080
}
8181

82-
$builder = ConstantArrayTypeBuilder::createEmpty();
82+
$trailingOptionals = 0;
83+
foreach (array_reverse($groupList) as $captureGroup) {
84+
if (!$captureGroup->isOptional()) {
85+
break;
86+
}
87+
$trailingOptionals++;
88+
}
89+
8390
$valueType = $this->getValueType($flags ?? 0);
91+
$onlyOptionalTopLevelGroup = $this->getOnlyOptionalTopLevelGroup($groupList);
92+
if (
93+
$wasMatched->yes()
94+
&& $onlyOptionalTopLevelGroup !== null
95+
) {
96+
// if only one top level capturing optional group exists
97+
// we build a more precise constant union of a empty-match and a match with the group
98+
99+
$onlyOptionalTopLevelGroup->removeOptionalQualification();
100+
101+
$combiType = $this->buildArrayType(
102+
$groupList,
103+
$valueType,
104+
$wasMatched,
105+
$trailingOptionals,
106+
);
107+
108+
return TypeCombinator::union(
109+
new ConstantArrayType([new ConstantIntegerType(0)], [new StringType()]),
110+
$combiType,
111+
);
112+
}
113+
114+
return $this->buildArrayType(
115+
$groupList,
116+
$valueType,
117+
$wasMatched,
118+
$trailingOptionals,
119+
);
120+
}
121+
122+
/**
123+
* @param list<RegexCapturingGroup> $captureGroups
124+
*/
125+
private function getOnlyOptionalTopLevelGroup(array $captureGroups): ?RegexCapturingGroup
126+
{
127+
$group = null;
128+
foreach ($captureGroups as $captureGroup) {
129+
if (!$captureGroup->isTopLevel()) {
130+
continue;
131+
}
132+
133+
if (!$captureGroup->isOptional()) {
134+
return null;
135+
}
136+
137+
if ($group !== null) {
138+
return null;
139+
}
140+
141+
$group = $captureGroup;
142+
}
143+
144+
return $group;
145+
}
146+
147+
/**
148+
* @param list<RegexCapturingGroup> $captureGroups
149+
*/
150+
private function buildArrayType(
151+
array $captureGroups,
152+
Type $valueType,
153+
TrinaryLogic $wasMatched,
154+
int $trailingOptionals,
155+
): Type
156+
{
157+
$builder = ConstantArrayTypeBuilder::createEmpty();
84158

85159
// first item in matches contains the overall match.
86160
$builder->setOffsetValueType(
@@ -89,21 +163,14 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
89163
!$wasMatched->yes(),
90164
);
91165

92-
$trailingOptionals = 0;
93-
foreach (array_reverse($captureGroups) as $captureGroup) {
94-
if (!$captureGroup->isOptional()) {
95-
break;
96-
}
97-
$trailingOptionals++;
98-
}
99-
100-
for ($i = 0; $i < count($captureGroups); $i++) {
166+
$countGroups = count($captureGroups);
167+
for ($i = 0; $i < $countGroups; $i++) {
101168
$captureGroup = $captureGroups[$i];
102169

103170
if (!$wasMatched->yes()) {
104171
$optional = true;
105172
} else {
106-
if ($i < count($captureGroups) - $trailingOptionals) {
173+
if ($i < $countGroups - $trailingOptionals) {
107174
$optional = false;
108175
} else {
109176
$optional = $captureGroup->isOptional();
@@ -181,46 +248,84 @@ private function parseGroups(string $regex): ?array
181248
return null;
182249
}
183250

184-
$capturings = [];
185-
$this->walkRegexAst($ast, 0, 0, $capturings);
251+
$capturingGroups = [];
252+
$this->walkRegexAst(
253+
$ast,
254+
false,
255+
false,
256+
null,
257+
$capturingGroups,
258+
);
186259

187-
return $capturings;
260+
return $capturingGroups;
188261
}
189262

190263
/**
191-
* @param list<RegexCapturingGroup> $capturings
264+
* @param list<RegexCapturingGroup> $capturingGroups
192265
*/
193-
private function walkRegexAst(TreeNode $ast, int $inAlternation, int $inOptionalQuantification, array &$capturings): void
266+
private function walkRegexAst(
267+
TreeNode $ast,
268+
bool $inAlternation,
269+
bool $inOptionalQuantification,
270+
RegexCapturingGroup|RegexNonCapturingGroup|null $parentGroup,
271+
array &$capturingGroups,
272+
): void
194273
{
274+
$group = null;
195275
if ($ast->getId() === '#capturing') {
196-
$capturings[] = RegexCapturingGroup::unnamed($inAlternation > 0 || $inOptionalQuantification > 0);
276+
$group = RegexCapturingGroup::unnamed(
277+
$inAlternation,
278+
$inOptionalQuantification,
279+
$parentGroup,
280+
);
281+
$parentGroup = $group;
197282
} elseif ($ast->getId() === '#namedcapturing') {
198283
$name = $ast->getChild(0)->getValue()['value'];
199-
$capturings[] = RegexCapturingGroup::named(
284+
$group = RegexCapturingGroup::named(
200285
$name,
201-
$inAlternation > 0 || $inOptionalQuantification > 0,
286+
$inAlternation,
287+
$inOptionalQuantification,
288+
$parentGroup,
202289
);
290+
$parentGroup = $group;
291+
} elseif ($ast->getId() === '#noncapturing') {
292+
$group = RegexNonCapturingGroup::create(
293+
$inOptionalQuantification,
294+
$parentGroup,
295+
);
296+
$parentGroup = $group;
203297
}
204298

205-
if ($ast->getId() === '#alternation') {
206-
$inAlternation++;
207-
}
208-
299+
$inOptionalQuantification = false;
209300
if ($ast->getId() === '#quantification') {
210301
$lastChild = $ast->getChild($ast->getChildrenNumber() - 1);
211302
$value = $lastChild->getValue();
212303

213304
if ($value['token'] === 'n_to_m' && str_contains($value['value'], '{0,')) {
214-
$inOptionalQuantification++;
305+
$inOptionalQuantification = true;
215306
} elseif ($value['token'] === 'zero_or_one') {
216-
$inOptionalQuantification++;
307+
$inOptionalQuantification = true;
217308
} elseif ($value['token'] === 'zero_or_more') {
218-
$inOptionalQuantification++;
309+
$inOptionalQuantification = true;
219310
}
220311
}
221312

313+
if ($ast->getId() === '#alternation') {
314+
$inAlternation = true;
315+
}
316+
317+
if ($group instanceof RegexCapturingGroup) {
318+
$capturingGroups[] = $group;
319+
}
320+
222321
foreach ($ast->getChildren() as $child) {
223-
$this->walkRegexAst($child, $inAlternation, $inOptionalQuantification, $capturings);
322+
$this->walkRegexAst(
323+
$child,
324+
$inAlternation,
325+
$inOptionalQuantification,
326+
$parentGroup,
327+
$capturingGroups,
328+
);
224329
}
225330
}
226331

src/Type/Php/RegexCapturingGroup.php

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,23 +5,49 @@
55
class RegexCapturingGroup
66
{
77

8-
private function __construct(private ?string $name, private bool $optional)
8+
private function __construct(
9+
private ?string $name,
10+
private bool $inAlternation,
11+
private bool $inOptionalQuantification,
12+
private RegexCapturingGroup|RegexNonCapturingGroup|null $parent,
13+
)
914
{
1015
}
1116

12-
public static function unnamed(bool $optional): self
17+
public static function unnamed(
18+
bool $inAlternation,
19+
bool $inOptionalQuantification,
20+
RegexCapturingGroup|RegexNonCapturingGroup|null $parent,
21+
): self
1322
{
14-
return new self(null, $optional);
23+
return new self(null, $inAlternation, $inOptionalQuantification, $parent);
1524
}
1625

17-
public static function named(string $name, bool $optional): self
26+
public static function named(
27+
string $name,
28+
bool $inAlternation,
29+
bool $inOptionalQuantification,
30+
RegexCapturingGroup|RegexNonCapturingGroup|null $parent,
31+
): self
1832
{
19-
return new self($name, $optional);
33+
return new self($name, $inAlternation, $inOptionalQuantification, $parent);
34+
}
35+
36+
public function removeOptionalQualification(): void
37+
{
38+
$this->inOptionalQuantification = false;
2039
}
2140

2241
public function isOptional(): bool
2342
{
24-
return $this->optional;
43+
return $this->inAlternation
44+
|| $this->inOptionalQuantification
45+
|| ($this->parent !== null && $this->parent->isOptional());
46+
}
47+
48+
public function isTopLevel(): bool
49+
{
50+
return $this->parent === null;
2551
}
2652

2753
/** @phpstan-assert-if-true !null $this->getName() */
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
<?php declare(strict_types = 1);
2+
3+
namespace PHPStan\Type\Php;
4+
5+
class RegexNonCapturingGroup
6+
{
7+
8+
private function __construct(
9+
private bool $inOptionalQuantification,
10+
private RegexCapturingGroup|RegexNonCapturingGroup|null $parent,
11+
)
12+
{
13+
}
14+
15+
public static function create(
16+
bool $inOptionalQuantification,
17+
RegexCapturingGroup|RegexNonCapturingGroup|null $parent,
18+
): self
19+
{
20+
return new self($inOptionalQuantification, $parent);
21+
}
22+
23+
public function isOptional(): bool
24+
{
25+
return $this->inOptionalQuantification
26+
|| ($this->parent !== null && $this->parent->isOptional());
27+
}
28+
29+
}

tests/PHPStan/Analyser/nsrt/preg_match_shapes_php.php renamed to tests/PHPStan/Analyser/nsrt/preg_match_shapes.php

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,3 +273,36 @@ function doFoo3(string $row): void
273273

274274
assertType('array{string, string, string, string, string, string, string}', $matches);
275275
}
276+
277+
function groupsOptional(string $size): void
278+
{
279+
if (preg_match('~^a\.b(c(\d+)(\d+)(\s+))?d~', $size, $matches) !== 1) {
280+
throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size));
281+
}
282+
assertType('array{string, string, string, string, string}|array{string}', $matches);
283+
284+
if (preg_match('~^a\.b(c(\d+))?d~', $size, $matches) !== 1) {
285+
throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size));
286+
}
287+
assertType('array{string, string, string}|array{string}', $matches);
288+
289+
if (preg_match('~^a\.b(c(\d+)?)d~', $size, $matches) !== 1) {
290+
throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size));
291+
}
292+
assertType('array{0: string, 1: string, 2?: string}', $matches);
293+
294+
if (preg_match('~^a\.b(c(\d+)?)?d~', $size, $matches) !== 1) {
295+
throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size));
296+
}
297+
assertType('array{0: string, 1?: string, 2?: string}', $matches);
298+
299+
if (preg_match('~^a\.b(c(\d+))d~', $size, $matches) !== 1) {
300+
throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size));
301+
}
302+
assertType('array{string, string, string}', $matches);
303+
304+
if (preg_match('~^a\.(b)?(c)?d~', $size, $matches) !== 1) {
305+
throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size));
306+
}
307+
assertType('array{0: string, 1?: string, 2?: string}', $matches);
308+
}

0 commit comments

Comments
 (0)