Skip to content

Commit 0132833

Browse files
authored
RegexArrayShapeMatcher - Fix shape of single top level alternations
1 parent 427a319 commit 0132833

File tree

6 files changed

+104
-24
lines changed

6 files changed

+104
-24
lines changed

src/Type/Php/RegexArrayShapeMatcher.php

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
use PHPStan\Type\Constant\ConstantStringType;
1515
use PHPStan\Type\IntegerRangeType;
1616
use PHPStan\Type\IntegerType;
17+
use PHPStan\Type\NullType;
1718
use PHPStan\Type\Regex\RegexAlternation;
1819
use PHPStan\Type\Regex\RegexCapturingGroup;
1920
use PHPStan\Type\Regex\RegexExpressionHelper;
@@ -140,7 +141,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
140141
&& $onlyOptionalTopLevelGroup !== null
141142
) {
142143
// if only one top level capturing optional group exists
143-
// we build a more precise constant union of a empty-match and a match with the group
144+
// we build a more precise tagged union of a empty-match and a match with the group
144145

145146
$onlyOptionalTopLevelGroup->forceNonOptional();
146147

@@ -154,18 +155,24 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
154155
);
155156

156157
if (!$this->containsUnmatchedAsNull($flags, $matchesAll)) {
158+
// positive match has a subject but not any capturing group
157159
$combiType = TypeCombinator::union(
158160
new ConstantArrayType([new ConstantIntegerType(0)], [$this->createSubjectValueType($flags, $matchesAll)], [0], [], true),
159161
$combiType,
160162
);
161163
}
162164

165+
$onlyOptionalTopLevelGroup->clearOverrides();
166+
163167
return $combiType;
164168
} elseif (
165169
!$matchesAll
166-
&& $wasMatched->yes()
170+
&& $onlyOptionalTopLevelGroup === null
167171
&& $onlyTopLevelAlternation !== null
172+
&& !$wasMatched->no()
168173
) {
174+
// if only a single top level alternation exist built a more precise tagged union
175+
169176
$combiTypes = [];
170177
$isOptionalAlternation = false;
171178
foreach ($onlyTopLevelAlternation->getGroupCombinations() as $groupCombo) {
@@ -179,6 +186,9 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
179186
$beforeCurrentCombo = false;
180187
} elseif ($beforeCurrentCombo && !$group->resetsGroupCounter()) {
181188
$group->forceNonOptional();
189+
$group->forceType(
190+
$this->containsUnmatchedAsNull($flags, $matchesAll) ? new NullType() : new ConstantStringType(''),
191+
);
182192
} elseif (
183193
$group->getAlternationId() === $onlyTopLevelAlternation->getId()
184194
&& !$this->containsUnmatchedAsNull($flags, $matchesAll)
@@ -200,17 +210,26 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
200210

201211
foreach ($groupCombo as $groupId) {
202212
$group = $comboList[$groupId];
203-
$group->restoreNonOptional();
213+
$group->clearOverrides();
204214
}
205215
}
206216

207-
if ($isOptionalAlternation && !$this->containsUnmatchedAsNull($flags, $matchesAll)) {
217+
if (
218+
!$this->containsUnmatchedAsNull($flags, $matchesAll)
219+
&& (
220+
$onlyTopLevelAlternation->getAlternationsCount() !== count($onlyTopLevelAlternation->getGroupCombinations())
221+
|| $isOptionalAlternation
222+
)
223+
) {
224+
// positive match has a subject but not any capturing group
208225
$combiTypes[] = new ConstantArrayType([new ConstantIntegerType(0)], [$this->createSubjectValueType($flags, $matchesAll)], [0], [], true);
209226
}
210227

211228
return TypeCombinator::union(...$combiTypes);
212229
}
213230

231+
// the general case, which should work in all cases but does not yield the most
232+
// precise result possible in some cases
214233
return $this->buildArrayType(
215234
$groupList,
216235
$wasMatched,

src/Type/Regex/RegexAlternation.php

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,10 @@ final class RegexAlternation
1010
/** @var array<int, list<int>> */
1111
private array $groupCombinations = [];
1212

13-
public function __construct(private readonly int $alternationId)
13+
public function __construct(
14+
private readonly int $alternationId,
15+
private readonly int $alternationsCount,
16+
)
1417
{
1518
}
1619

@@ -28,6 +31,11 @@ public function pushGroup(int $combinationIndex, RegexCapturingGroup $group): vo
2831
$this->groupCombinations[$combinationIndex][] = $group->getId();
2932
}
3033

34+
public function getAlternationsCount(): int
35+
{
36+
return $this->alternationsCount;
37+
}
38+
3139
/**
3240
* @return array<int, list<int>>
3341
*/

src/Type/Regex/RegexCapturingGroup.php

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ final class RegexCapturingGroup
99

1010
private bool $forceNonOptional = false;
1111

12+
private ?Type $forceType = null;
13+
1214
public function __construct(
1315
private readonly int $id,
1416
private readonly ?string $name,
@@ -30,9 +32,15 @@ public function forceNonOptional(): void
3032
$this->forceNonOptional = true;
3133
}
3234

33-
public function restoreNonOptional(): void
35+
public function forceType(Type $type): void
36+
{
37+
$this->forceType = $type;
38+
}
39+
40+
public function clearOverrides(): void
3441
{
3542
$this->forceNonOptional = false;
43+
$this->forceType = null;
3644
}
3745

3846
public function resetsGroupCounter(): bool
@@ -109,6 +117,9 @@ public function getName(): ?string
109117

110118
public function getType(): Type
111119
{
120+
if ($this->forceType !== null) {
121+
return $this->forceType;
122+
}
112123
return $this->type;
113124
}
114125

src/Type/Regex/RegexGroupParser.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ private function walkRegexAst(
177177

178178
if ($ast->getId() === '#alternation') {
179179
$alternationId++;
180-
$alternation = new RegexAlternation($alternationId);
180+
$alternation = new RegexAlternation($alternationId, count($ast->getChildren()));
181181
}
182182

183183
if ($ast->getId() === '#mark') {

tests/PHPStan/Analyser/nsrt/bug-11311.php

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,3 +198,29 @@ function (string $s): void {
198198
preg_match('/%a(\d*)?/', $s, $matches, PREG_UNMATCHED_AS_NULL);
199199
assertType("array{0?: string, 1?: ''|numeric-string|null}", $matches); // could be array{0?: string, 1?: ''|numeric-string}
200200
};
201+
202+
function (string $s): void {
203+
if (preg_match('~a|(\d)|(\s)~', $s, $matches, PREG_UNMATCHED_AS_NULL)) {
204+
assertType("array{string, numeric-string|null, non-empty-string|null}", $matches);
205+
} else {
206+
assertType("array{}", $matches);
207+
}
208+
assertType("array{}|array{string, numeric-string|null, non-empty-string|null}", $matches);
209+
};
210+
211+
function (string $s): void {
212+
if (preg_match('~a|(\d)|(\s)~', $s, $matches, PREG_UNMATCHED_AS_NULL|PREG_OFFSET_CAPTURE) === 1) {
213+
assertType("array{array{string|null, int<-1, max>}, array{numeric-string|null, int<-1, max>}, array{non-empty-string|null, int<-1, max>}}", $matches);
214+
}
215+
};
216+
217+
function (string $s): void {
218+
if (preg_match('~a|((u)x)|((v)y)~', $s, $matches, PREG_UNMATCHED_AS_NULL) === 1) {
219+
assertType("array{string, 'ux'|null, 'u'|null, 'vy'|null, 'v'|null}", $matches);
220+
}
221+
};
222+
223+
function (string $s): void {
224+
preg_match('~a|(\d)|(\s)~', $s, $matches, PREG_UNMATCHED_AS_NULL);
225+
assertType("array{0?: string, 1?: numeric-string|null, 2?: non-empty-string|null}", $matches);
226+
};

tests/PHPStan/Analyser/nsrt/preg_match_shapes.php

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -129,16 +129,16 @@ function doUnknownFlags(string $s, int $flags): void {
129129

130130
function doMultipleAlternativeCaptureGroupsWithSameNameWithModifier(string $s): void {
131131
if (preg_match('/(?J)(?<Foo>[a-z]+)|(?<Foo>[0-9]+)/', $s, $matches)) {
132-
assertType('array{0: string, Foo: numeric-string|non-empty-string, 1: non-empty-string, 2?: numeric-string}', $matches);
132+
assertType("array{0: string, Foo: non-empty-string, 1: non-empty-string}|array{0: string, Foo: numeric-string, 1: '', 2: numeric-string}", $matches);
133133
}
134-
assertType('array{}|array{0: string, Foo: numeric-string|non-empty-string, 1: non-empty-string, 2?: numeric-string}', $matches);
134+
assertType("array{}|array{0: string, Foo: non-empty-string, 1: non-empty-string}|array{0: string, Foo: numeric-string, 1: '', 2: numeric-string}", $matches);
135135
}
136136

137137
function doMultipleConsecutiveCaptureGroupsWithSameNameWithModifier(string $s): void {
138138
if (preg_match('/(?J)(?<Foo>[a-z]+)|(?<Foo>[0-9]+)/', $s, $matches)) {
139-
assertType('array{0: string, Foo: numeric-string|non-empty-string, 1: non-empty-string, 2?: numeric-string}', $matches);
139+
assertType("array{0: string, Foo: non-empty-string, 1: non-empty-string}|array{0: string, Foo: numeric-string, 1: '', 2: numeric-string}", $matches);
140140
}
141-
assertType('array{}|array{0: string, Foo: numeric-string|non-empty-string, 1: non-empty-string, 2?: numeric-string}', $matches);
141+
assertType("array{}|array{0: string, Foo: non-empty-string, 1: non-empty-string}|array{0: string, Foo: numeric-string, 1: '', 2: numeric-string}", $matches);
142142
}
143143

144144
// https://github.com/hoaproject/Regex/issues/31
@@ -307,21 +307,21 @@ function (string $size): void {
307307
if (preg_match('~^(?:(\\d+)x(\\d+)|(\\d+)|x(\\d+))$~', $size, $matches) !== 1) {
308308
throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size));
309309
}
310-
assertType('array{0: string, 1: numeric-string, 2: numeric-string, 3?: numeric-string, 4?: numeric-string}', $matches);
310+
assertType("array{string, '', '', '', numeric-string}|array{string, '', '', numeric-string}|array{string, numeric-string, numeric-string}", $matches);
311311
};
312312

313313
function (string $size): void {
314314
if (preg_match('~^(?:(\\d+)x(\\d+)|(\\d+)|x(\\d+))?$~', $size, $matches) !== 1) {
315315
throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size));
316316
}
317-
assertType('array{0: string, 1: numeric-string, 2: numeric-string, 3?: numeric-string, 4?: numeric-string}|array{string}', $matches);
317+
assertType("array{string, '', '', '', numeric-string}|array{string, '', '', numeric-string}|array{string, numeric-string, numeric-string}|array{string}", $matches);
318318
};
319319

320320
function (string $size): void {
321321
if (preg_match('~\{(?:(include)\\s+(?:[$]?\\w+(?<!file))\\s)|(?:(include\\s+file)\\s+(?:[$]?\\w+)\\s)|(?:(include(?:Template|(?:\\s+file)))\\s+(?:\'?.*?\.latte\'?)\\s)~', $size, $matches) !== 1) {
322322
throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size));
323323
}
324-
assertType("array{0: string, 1: 'include', 2?: non-falsy-string, 3?: non-falsy-string}", $matches);
324+
assertType("array{string, '', '', non-falsy-string}|array{string, '', non-falsy-string}|array{string, 'include'}", $matches);
325325
};
326326

327327

@@ -338,13 +338,7 @@ function bug11277a(string $value): void
338338
function bug11277b(string $value): void
339339
{
340340
if (preg_match('/^(?:(.+,?)|(x))*$/', $value, $matches)) {
341-
assertType('array{0: string, 1?: non-empty-string, 2?: non-empty-string}', $matches);
342-
if (count($matches) === 2) {
343-
assertType('array{string, string}', $matches); // could be array{string, non-empty-string}
344-
}
345-
if (count($matches) === 3) {
346-
assertType('array{string, string, string}', $matches); // could be array{string, non-empty-string, non-empty-string}
347-
}
341+
assertType("array{0: string, 1?: non-empty-string}|array{string, '', non-empty-string}", $matches);
348342
}
349343
}
350344

@@ -656,10 +650,9 @@ function (string $value): void
656650
}
657651
};
658652

659-
function (string $value): void
660-
{
653+
function (string $value): void {
661654
if (preg_match('/^(?:(x)|(y))*$/', $value, $matches, PREG_OFFSET_CAPTURE)) {
662-
assertType("array{0: array{string, int<-1, max>}, 1?: array{non-empty-string, int<-1, max>}, 2?: array{non-empty-string, int<-1, max>}}", $matches);
655+
assertType("array{0: array{string, int<-1, max>}, 1?: array{non-empty-string, int<-1, max>}}|array{array{string, int<-1, max>}, array{'', int<-1, max>}, array{non-empty-string, int<-1, max>}}", $matches);
663656
}
664657
};
665658

@@ -683,3 +676,26 @@ static public function sayHello(string $source): void
683676
assertType("array{0?: string, dateFrom?: ''|numeric-string, 1?: ''|numeric-string, dateTo?: numeric-string, 2?: numeric-string}", $matches);
684677
}
685678
}
679+
680+
function (string $s): void {
681+
if (preg_match('~a|(\d)|(\s)~', $s, $matches) === 1) {
682+
assertType("array{0: string, 1?: numeric-string}|array{string, '', non-empty-string}", $matches);
683+
}
684+
};
685+
686+
function (string $s): void {
687+
if (preg_match('~a|((u)x)|((v)y)~', $s, $matches) === 1) {
688+
assertType("array{string, '', '', 'vy', 'v'}|array{string, 'ux', 'u'}|array{string}", $matches);
689+
}
690+
};
691+
692+
function (string $s): void {
693+
if (preg_match('~a|(\d)|(\s)~', $s, $matches, PREG_OFFSET_CAPTURE) === 1) {
694+
assertType("array{0: array{string, int<-1, max>}, 1?: array{numeric-string, int<-1, max>}}|array{array{string, int<-1, max>}, array{'', int<-1, max>}, array{non-empty-string, int<-1, max>}}", $matches);
695+
}
696+
};
697+
698+
function (string $s): void {
699+
preg_match('~a|(\d)|(\s)~', $s, $matches);
700+
assertType("array{0?: string, 1?: '', 2?: non-empty-string}|array{0?: string, 1?: numeric-string}", $matches);
701+
};

0 commit comments

Comments
 (0)