Skip to content

Commit 579402b

Browse files
authored
RegexArrayShapeMatcher - Narrow types in alternation of top level capturing groups
1 parent cb9978d commit 579402b

File tree

4 files changed

+232
-47
lines changed

4 files changed

+232
-47
lines changed

src/Type/Php/RegexArrayShapeMatcher.php

Lines changed: 129 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
use PHPStan\Type\StringType;
1717
use PHPStan\Type\Type;
1818
use PHPStan\Type\TypeCombinator;
19+
use function array_key_exists;
1920
use function array_reverse;
2021
use function count;
2122
use function in_array;
@@ -65,6 +66,10 @@ public function matchType(Type $patternType, ?Type $flagsType, TrinaryLogic $was
6566
$matchedTypes[] = $matched;
6667
}
6768

69+
if (count($matchedTypes) === 1) {
70+
return $matchedTypes[0];
71+
}
72+
6873
return TypeCombinator::union(...$matchedTypes);
6974
}
7075

@@ -73,11 +78,12 @@ public function matchType(Type $patternType, ?Type $flagsType, TrinaryLogic $was
7378
*/
7479
private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched): ?Type
7580
{
76-
$groupList = $this->parseGroups($regex);
77-
if ($groupList === null) {
81+
$parseResult = $this->parseGroups($regex);
82+
if ($parseResult === null) {
7883
// regex could not be parsed by Hoa/Regex
7984
return null;
8085
}
86+
[$groupList, $groupCombinations] = $parseResult;
8187

8288
$trailingOptionals = 0;
8389
foreach (array_reverse($groupList) as $captureGroup) {
@@ -89,14 +95,16 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
8995

9096
$valueType = $this->getValueType($flags ?? 0);
9197
$onlyOptionalTopLevelGroup = $this->getOnlyOptionalTopLevelGroup($groupList);
98+
$onlyTopLevelAlternationId = $this->getOnlyTopLevelAlternationId($groupList);
99+
92100
if (
93101
$wasMatched->yes()
94102
&& $onlyOptionalTopLevelGroup !== null
95103
) {
96104
// if only one top level capturing optional group exists
97105
// we build a more precise constant union of a empty-match and a match with the group
98106

99-
$onlyOptionalTopLevelGroup->removeOptionalQualification();
107+
$onlyOptionalTopLevelGroup->forceNonOptional();
100108

101109
$combiType = $this->buildArrayType(
102110
$groupList,
@@ -109,6 +117,49 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
109117
new ConstantArrayType([new ConstantIntegerType(0)], [new StringType()]),
110118
$combiType,
111119
);
120+
} elseif (
121+
$wasMatched->yes()
122+
&& $onlyTopLevelAlternationId !== null
123+
&& array_key_exists($onlyTopLevelAlternationId, $groupCombinations)
124+
) {
125+
$combiTypes = [];
126+
$isOptionalAlternation = false;
127+
foreach ($groupCombinations[$onlyTopLevelAlternationId] as $groupCombo) {
128+
$comboList = $groupList;
129+
130+
$beforeCurrentCombo = true;
131+
foreach ($comboList as $groupId => $group) {
132+
if (in_array($groupId, $groupCombo, true)) {
133+
$isOptionalAlternation = $group->inOptionalAlternation();
134+
$group->forceNonOptional();
135+
$beforeCurrentCombo = false;
136+
} elseif ($beforeCurrentCombo) {
137+
$group->forceNonOptional();
138+
} elseif ($group->getAlternationId() === $onlyTopLevelAlternationId) {
139+
unset($comboList[$groupId]);
140+
}
141+
}
142+
143+
$combiType = $this->buildArrayType(
144+
$comboList,
145+
$valueType,
146+
$wasMatched,
147+
$trailingOptionals,
148+
);
149+
150+
$combiTypes[] = $combiType;
151+
152+
foreach ($groupCombo as $groupId) {
153+
$group = $comboList[$groupId];
154+
$group->restoreNonOptional();
155+
}
156+
}
157+
158+
if ($isOptionalAlternation) {
159+
$combiTypes[] = new ConstantArrayType([new ConstantIntegerType(0)], [new StringType()]);
160+
}
161+
162+
return TypeCombinator::union(...$combiTypes);
112163
}
113164

114165
return $this->buildArrayType(
@@ -120,7 +171,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
120171
}
121172

122173
/**
123-
* @param list<RegexCapturingGroup> $captureGroups
174+
* @param array<int, RegexCapturingGroup> $captureGroups
124175
*/
125176
private function getOnlyOptionalTopLevelGroup(array $captureGroups): ?RegexCapturingGroup
126177
{
@@ -145,7 +196,32 @@ private function getOnlyOptionalTopLevelGroup(array $captureGroups): ?RegexCaptu
145196
}
146197

147198
/**
148-
* @param list<RegexCapturingGroup> $captureGroups
199+
* @param array<int, RegexCapturingGroup> $captureGroups
200+
*/
201+
private function getOnlyTopLevelAlternationId(array $captureGroups): ?int
202+
{
203+
$alternationId = null;
204+
foreach ($captureGroups as $captureGroup) {
205+
if (!$captureGroup->isTopLevel()) {
206+
continue;
207+
}
208+
209+
if (!$captureGroup->inAlternation()) {
210+
return null;
211+
}
212+
213+
if ($alternationId === null) {
214+
$alternationId = $captureGroup->getAlternationId();
215+
} elseif ($alternationId !== $captureGroup->getAlternationId()) {
216+
return null;
217+
}
218+
}
219+
220+
return $alternationId;
221+
}
222+
223+
/**
224+
* @param array<RegexCapturingGroup> $captureGroups
149225
*/
150226
private function buildArrayType(
151227
array $captureGroups,
@@ -164,9 +240,8 @@ private function buildArrayType(
164240
);
165241

166242
$countGroups = count($captureGroups);
167-
for ($i = 0; $i < $countGroups; $i++) {
168-
$captureGroup = $captureGroups[$i];
169-
243+
$i = 0;
244+
foreach ($captureGroups as $captureGroup) {
170245
if (!$wasMatched->yes()) {
171246
$optional = true;
172247
} else {
@@ -190,6 +265,8 @@ private function buildArrayType(
190265
$valueType,
191266
$optional,
192267
);
268+
269+
$i++;
193270
}
194271

195272
return $builder->getArray();
@@ -233,7 +310,7 @@ private function getValueType(int $flags): Type
233310
}
234311

235312
/**
236-
* @return list<RegexCapturingGroup>|null
313+
* @return array{array<int, RegexCapturingGroup>, array<int, array<int, int[]>>}|null
237314
*/
238315
private function parseGroups(string $regex): ?array
239316
{
@@ -249,47 +326,63 @@ private function parseGroups(string $regex): ?array
249326
}
250327

251328
$capturingGroups = [];
329+
$groupCombinations = [];
330+
$alternationId = -1;
331+
$captureGroupId = 100;
252332
$this->walkRegexAst(
253333
$ast,
254334
false,
335+
$alternationId,
336+
0,
255337
false,
256338
null,
339+
$captureGroupId,
257340
$capturingGroups,
341+
$groupCombinations,
258342
);
259343

260-
return $capturingGroups;
344+
return [$capturingGroups, $groupCombinations];
261345
}
262346

263347
/**
264-
* @param list<RegexCapturingGroup> $capturingGroups
348+
* @param array<int, RegexCapturingGroup> $capturingGroups
349+
* @param array<int, array<int, int[]>> $groupCombinations
265350
*/
266351
private function walkRegexAst(
267352
TreeNode $ast,
268353
bool $inAlternation,
354+
int &$alternationId,
355+
int $combinationIndex,
269356
bool $inOptionalQuantification,
270357
RegexCapturingGroup|RegexNonCapturingGroup|null $parentGroup,
358+
int &$captureGroupId,
271359
array &$capturingGroups,
360+
array &$groupCombinations,
272361
): void
273362
{
274363
$group = null;
275364
if ($ast->getId() === '#capturing') {
276-
$group = RegexCapturingGroup::unnamed(
277-
$inAlternation,
365+
$group = new RegexCapturingGroup(
366+
$captureGroupId++,
367+
null,
368+
$inAlternation ? $alternationId : null,
278369
$inOptionalQuantification,
279370
$parentGroup,
280371
);
281372
$parentGroup = $group;
282373
} elseif ($ast->getId() === '#namedcapturing') {
283374
$name = $ast->getChild(0)->getValue()['value'];
284-
$group = RegexCapturingGroup::named(
375+
$group = new RegexCapturingGroup(
376+
$captureGroupId++,
285377
$name,
286-
$inAlternation,
378+
$inAlternation ? $alternationId : null,
287379
$inOptionalQuantification,
288380
$parentGroup,
289381
);
290382
$parentGroup = $group;
291383
} elseif ($ast->getId() === '#noncapturing') {
292-
$group = RegexNonCapturingGroup::create(
384+
$group = new RegexNonCapturingGroup(
385+
$inAlternation ? $alternationId : null,
293386
$inOptionalQuantification,
294387
$parentGroup,
295388
);
@@ -311,21 +404,40 @@ private function walkRegexAst(
311404
}
312405

313406
if ($ast->getId() === '#alternation') {
407+
$alternationId++;
314408
$inAlternation = true;
315409
}
316410

317411
if ($group instanceof RegexCapturingGroup) {
318-
$capturingGroups[] = $group;
412+
$capturingGroups[$group->getId()] = $group;
413+
414+
if (!array_key_exists($alternationId, $groupCombinations)) {
415+
$groupCombinations[$alternationId] = [];
416+
}
417+
if (!array_key_exists($combinationIndex, $groupCombinations[$alternationId])) {
418+
$groupCombinations[$alternationId][$combinationIndex] = [];
419+
}
420+
$groupCombinations[$alternationId][$combinationIndex][] = $group->getId();
319421
}
320422

321423
foreach ($ast->getChildren() as $child) {
322424
$this->walkRegexAst(
323425
$child,
324426
$inAlternation,
427+
$alternationId,
428+
$combinationIndex,
325429
$inOptionalQuantification,
326430
$parentGroup,
431+
$captureGroupId,
327432
$capturingGroups,
433+
$groupCombinations,
328434
);
435+
436+
if ($ast->getId() !== '#alternation') {
437+
continue;
438+
}
439+
440+
$combinationIndex++;
329441
}
330442
}
331443

src/Type/Php/RegexCapturingGroup.php

Lines changed: 46 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5,49 +5,75 @@
55
class RegexCapturingGroup
66
{
77

8-
private function __construct(
8+
private bool $forceNonOptional = false;
9+
10+
public function __construct(
11+
private int $id,
912
private ?string $name,
10-
private bool $inAlternation,
13+
private ?int $alternationId,
1114
private bool $inOptionalQuantification,
1215
private RegexCapturingGroup|RegexNonCapturingGroup|null $parent,
1316
)
1417
{
1518
}
1619

17-
public static function unnamed(
18-
bool $inAlternation,
19-
bool $inOptionalQuantification,
20-
RegexCapturingGroup|RegexNonCapturingGroup|null $parent,
21-
): self
20+
public function getId(): int
21+
{
22+
return $this->id;
23+
}
24+
25+
public function forceNonOptional(): void
26+
{
27+
$this->forceNonOptional = true;
28+
}
29+
30+
public function restoreNonOptional(): void
2231
{
23-
return new self(null, $inAlternation, $inOptionalQuantification, $parent);
32+
$this->forceNonOptional = false;
2433
}
2534

26-
public static function named(
27-
string $name,
28-
bool $inAlternation,
29-
bool $inOptionalQuantification,
30-
RegexCapturingGroup|RegexNonCapturingGroup|null $parent,
31-
): self
35+
/** @phpstan-assert-if-true !null $this->getAlternationId() */
36+
public function inAlternation(): bool
3237
{
33-
return new self($name, $inAlternation, $inOptionalQuantification, $parent);
38+
return $this->alternationId !== null;
3439
}
3540

36-
public function removeOptionalQualification(): void
41+
public function getAlternationId(): ?int
3742
{
38-
$this->inOptionalQuantification = false;
43+
return $this->alternationId;
3944
}
4045

4146
public function isOptional(): bool
4247
{
43-
return $this->inAlternation
48+
if ($this->forceNonOptional) {
49+
return false;
50+
}
51+
52+
return $this->inAlternation()
4453
|| $this->inOptionalQuantification
45-
|| ($this->parent !== null && $this->parent->isOptional());
54+
|| $this->parent !== null && $this->parent->isOptional();
55+
}
56+
57+
public function inOptionalAlternation(): bool
58+
{
59+
if (!$this->inAlternation()) {
60+
return false;
61+
}
62+
63+
$parent = $this->parent;
64+
while ($parent !== null && $parent->getAlternationId() === $this->getAlternationId()) {
65+
if (!$parent instanceof RegexNonCapturingGroup) {
66+
return false;
67+
}
68+
$parent = $parent->getParent();
69+
}
70+
return $parent !== null && $parent->isOptional();
4671
}
4772

4873
public function isTopLevel(): bool
4974
{
50-
return $this->parent === null;
75+
return $this->parent === null
76+
|| $this->parent instanceof RegexNonCapturingGroup && $this->parent->isTopLevel();
5177
}
5278

5379
/** @phpstan-assert-if-true !null $this->getName() */

0 commit comments

Comments
 (0)