Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 23 additions & 4 deletions src/Type/Php/RegexArrayShapeMatcher.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
use PHPStan\Type\Constant\ConstantStringType;
use PHPStan\Type\IntegerRangeType;
use PHPStan\Type\IntegerType;
use PHPStan\Type\NullType;
use PHPStan\Type\Regex\RegexAlternation;
use PHPStan\Type\Regex\RegexCapturingGroup;
use PHPStan\Type\Regex\RegexExpressionHelper;
Expand Down Expand Up @@ -140,7 +141,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
&& $onlyOptionalTopLevelGroup !== null
) {
// if only one top level capturing optional group exists
// we build a more precise constant union of a empty-match and a match with the group
// we build a more precise tagged union of a empty-match and a match with the group

$onlyOptionalTopLevelGroup->forceNonOptional();

Expand All @@ -154,18 +155,24 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
);

if (!$this->containsUnmatchedAsNull($flags, $matchesAll)) {
// positive match has a subject but not any capturing group
$combiType = TypeCombinator::union(
new ConstantArrayType([new ConstantIntegerType(0)], [$this->createSubjectValueType($flags, $matchesAll)], [0], [], true),
$combiType,
);
}

$onlyOptionalTopLevelGroup->clearOverrides();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This smells of too much statefulness and mutability. Is this pattern in more places in Regex? I'd much prefer immutable stateless value objects.

Copy link
Contributor Author

@staabm staabm Aug 8, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

totally agree.

I tried doing it in a immutable way, but my attempts don't work because the objects beeing changed here, are referenced by other objects via parent property and we need the objects referenced by $onlyOptionalTopLevelGroup to change, no matter which object in the $groupList directly or nested references it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this pattern in more places in Regex?

we use this pattern for the 2 tagged union special cases in this method here. nowhere else.


return $combiType;
} elseif (
!$matchesAll
&& $wasMatched->yes()
&& $onlyOptionalTopLevelGroup === null
&& $onlyTopLevelAlternation !== null
&& !$wasMatched->no()
) {
// if only a single top level alternation exist built a more precise tagged union

$combiTypes = [];
$isOptionalAlternation = false;
foreach ($onlyTopLevelAlternation->getGroupCombinations() as $groupCombo) {
Expand All @@ -179,6 +186,9 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
$beforeCurrentCombo = false;
} elseif ($beforeCurrentCombo && !$group->resetsGroupCounter()) {
$group->forceNonOptional();
$group->forceType(
$this->containsUnmatchedAsNull($flags, $matchesAll) ? new NullType() : new ConstantStringType(''),
);
} elseif (
$group->getAlternationId() === $onlyTopLevelAlternation->getId()
&& !$this->containsUnmatchedAsNull($flags, $matchesAll)
Expand All @@ -200,17 +210,26 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched

foreach ($groupCombo as $groupId) {
$group = $comboList[$groupId];
$group->restoreNonOptional();
$group->clearOverrides();
}
}

if ($isOptionalAlternation && !$this->containsUnmatchedAsNull($flags, $matchesAll)) {
if (
!$this->containsUnmatchedAsNull($flags, $matchesAll)
&& (
$onlyTopLevelAlternation->getAlternationsCount() !== count($onlyTopLevelAlternation->getGroupCombinations())
|| $isOptionalAlternation
)
) {
// positive match has a subject but not any capturing group
$combiTypes[] = new ConstantArrayType([new ConstantIntegerType(0)], [$this->createSubjectValueType($flags, $matchesAll)], [0], [], true);
}

return TypeCombinator::union(...$combiTypes);
}

// the general case, which should work in all cases but does not yield the most
// precise result possible in some cases
return $this->buildArrayType(
$groupList,
$wasMatched,
Expand Down
10 changes: 9 additions & 1 deletion src/Type/Regex/RegexAlternation.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@ final class RegexAlternation
/** @var array<int, list<int>> */
private array $groupCombinations = [];

public function __construct(private readonly int $alternationId)
public function __construct(
private readonly int $alternationId,
private readonly int $alternationsCount,
)
{
}

Expand All @@ -28,6 +31,11 @@ public function pushGroup(int $combinationIndex, RegexCapturingGroup $group): vo
$this->groupCombinations[$combinationIndex][] = $group->getId();
}

public function getAlternationsCount(): int
{
return $this->alternationsCount;
}

/**
* @return array<int, list<int>>
*/
Expand Down
13 changes: 12 additions & 1 deletion src/Type/Regex/RegexCapturingGroup.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ final class RegexCapturingGroup

private bool $forceNonOptional = false;

private ?Type $forceType = null;

public function __construct(
private readonly int $id,
private readonly ?string $name,
Expand All @@ -30,9 +32,15 @@ public function forceNonOptional(): void
$this->forceNonOptional = true;
}

public function restoreNonOptional(): void
public function forceType(Type $type): void
{
$this->forceType = $type;
}

public function clearOverrides(): void
{
$this->forceNonOptional = false;
$this->forceType = null;
}

public function resetsGroupCounter(): bool
Expand Down Expand Up @@ -109,6 +117,9 @@ public function getName(): ?string

public function getType(): Type
{
if ($this->forceType !== null) {
return $this->forceType;
}
return $this->type;
}

Expand Down
2 changes: 1 addition & 1 deletion src/Type/Regex/RegexGroupParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ private function walkRegexAst(

if ($ast->getId() === '#alternation') {
$alternationId++;
$alternation = new RegexAlternation($alternationId);
$alternation = new RegexAlternation($alternationId, count($ast->getChildren()));
}

if ($ast->getId() === '#mark') {
Expand Down
26 changes: 26 additions & 0 deletions tests/PHPStan/Analyser/nsrt/bug-11311.php
Original file line number Diff line number Diff line change
Expand Up @@ -198,3 +198,29 @@ function (string $s): void {
preg_match('/%a(\d*)?/', $s, $matches, PREG_UNMATCHED_AS_NULL);
assertType("array{0?: string, 1?: ''|numeric-string|null}", $matches); // could be array{0?: string, 1?: ''|numeric-string}
};

function (string $s): void {
if (preg_match('~a|(\d)|(\s)~', $s, $matches, PREG_UNMATCHED_AS_NULL)) {
assertType("array{string, numeric-string|null, non-empty-string|null}", $matches);
} else {
assertType("array{}", $matches);
}
assertType("array{}|array{string, numeric-string|null, non-empty-string|null}", $matches);
};

function (string $s): void {
if (preg_match('~a|(\d)|(\s)~', $s, $matches, PREG_UNMATCHED_AS_NULL|PREG_OFFSET_CAPTURE) === 1) {
assertType("array{array{string|null, int<-1, max>}, array{numeric-string|null, int<-1, max>}, array{non-empty-string|null, int<-1, max>}}", $matches);
}
};

function (string $s): void {
if (preg_match('~a|((u)x)|((v)y)~', $s, $matches, PREG_UNMATCHED_AS_NULL) === 1) {
assertType("array{string, 'ux'|null, 'u'|null, 'vy'|null, 'v'|null}", $matches);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I really like this tagged unions

}
};

function (string $s): void {
preg_match('~a|(\d)|(\s)~', $s, $matches, PREG_UNMATCHED_AS_NULL);
assertType("array{0?: string, 1?: numeric-string|null, 2?: non-empty-string|null}", $matches);
};
50 changes: 33 additions & 17 deletions tests/PHPStan/Analyser/nsrt/preg_match_shapes.php
Original file line number Diff line number Diff line change
Expand Up @@ -129,16 +129,16 @@ function doUnknownFlags(string $s, int $flags): void {

function doMultipleAlternativeCaptureGroupsWithSameNameWithModifier(string $s): void {
if (preg_match('/(?J)(?<Foo>[a-z]+)|(?<Foo>[0-9]+)/', $s, $matches)) {
assertType('array{0: string, Foo: numeric-string|non-empty-string, 1: non-empty-string, 2?: numeric-string}', $matches);
assertType("array{0: string, Foo: non-empty-string, 1: non-empty-string}|array{0: string, Foo: numeric-string, 1: '', 2: numeric-string}", $matches);
}
assertType('array{}|array{0: string, Foo: numeric-string|non-empty-string, 1: non-empty-string, 2?: numeric-string}', $matches);
assertType("array{}|array{0: string, Foo: non-empty-string, 1: non-empty-string}|array{0: string, Foo: numeric-string, 1: '', 2: numeric-string}", $matches);
}

function doMultipleConsecutiveCaptureGroupsWithSameNameWithModifier(string $s): void {
if (preg_match('/(?J)(?<Foo>[a-z]+)|(?<Foo>[0-9]+)/', $s, $matches)) {
assertType('array{0: string, Foo: numeric-string|non-empty-string, 1: non-empty-string, 2?: numeric-string}', $matches);
assertType("array{0: string, Foo: non-empty-string, 1: non-empty-string}|array{0: string, Foo: numeric-string, 1: '', 2: numeric-string}", $matches);
}
assertType('array{}|array{0: string, Foo: numeric-string|non-empty-string, 1: non-empty-string, 2?: numeric-string}', $matches);
assertType("array{}|array{0: string, Foo: non-empty-string, 1: non-empty-string}|array{0: string, Foo: numeric-string, 1: '', 2: numeric-string}", $matches);
}

// https://github.com/hoaproject/Regex/issues/31
Expand Down Expand Up @@ -307,21 +307,21 @@ function (string $size): void {
if (preg_match('~^(?:(\\d+)x(\\d+)|(\\d+)|x(\\d+))$~', $size, $matches) !== 1) {
throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size));
}
assertType('array{0: string, 1: numeric-string, 2: numeric-string, 3?: numeric-string, 4?: numeric-string}', $matches);
assertType("array{string, '', '', '', numeric-string}|array{string, '', '', numeric-string}|array{string, numeric-string, numeric-string}", $matches);
};

function (string $size): void {
if (preg_match('~^(?:(\\d+)x(\\d+)|(\\d+)|x(\\d+))?$~', $size, $matches) !== 1) {
throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size));
}
assertType('array{0: string, 1: numeric-string, 2: numeric-string, 3?: numeric-string, 4?: numeric-string}|array{string}', $matches);
assertType("array{string, '', '', '', numeric-string}|array{string, '', '', numeric-string}|array{string, numeric-string, numeric-string}|array{string}", $matches);
};

function (string $size): void {
if (preg_match('~\{(?:(include)\\s+(?:[$]?\\w+(?<!file))\\s)|(?:(include\\s+file)\\s+(?:[$]?\\w+)\\s)|(?:(include(?:Template|(?:\\s+file)))\\s+(?:\'?.*?\.latte\'?)\\s)~', $size, $matches) !== 1) {
throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size));
}
assertType("array{0: string, 1: 'include', 2?: non-falsy-string, 3?: non-falsy-string}", $matches);
assertType("array{string, '', '', non-falsy-string}|array{string, '', non-falsy-string}|array{string, 'include'}", $matches);
};


Expand All @@ -338,13 +338,7 @@ function bug11277a(string $value): void
function bug11277b(string $value): void
{
if (preg_match('/^(?:(.+,?)|(x))*$/', $value, $matches)) {
assertType('array{0: string, 1?: non-empty-string, 2?: non-empty-string}', $matches);
if (count($matches) === 2) {
assertType('array{string, string}', $matches); // could be array{string, non-empty-string}
}
if (count($matches) === 3) {
assertType('array{string, string, string}', $matches); // could be array{string, non-empty-string, non-empty-string}
}
assertType("array{0: string, 1?: non-empty-string}|array{string, '', non-empty-string}", $matches);
}
}

Expand Down Expand Up @@ -656,10 +650,9 @@ function (string $value): void
}
};

function (string $value): void
{
function (string $value): void {
if (preg_match('/^(?:(x)|(y))*$/', $value, $matches, PREG_OFFSET_CAPTURE)) {
assertType("array{0: array{string, int<-1, max>}, 1?: array{non-empty-string, int<-1, max>}, 2?: array{non-empty-string, int<-1, max>}}", $matches);
assertType("array{0: array{string, int<-1, max>}, 1?: array{non-empty-string, int<-1, max>}}|array{array{string, int<-1, max>}, array{'', int<-1, max>}, array{non-empty-string, int<-1, max>}}", $matches);
}
};

Expand All @@ -683,3 +676,26 @@ static public function sayHello(string $source): void
assertType("array{0?: string, dateFrom?: ''|numeric-string, 1?: ''|numeric-string, dateTo?: numeric-string, 2?: numeric-string}", $matches);
}
}

function (string $s): void {
if (preg_match('~a|(\d)|(\s)~', $s, $matches) === 1) {
assertType("array{0: string, 1?: numeric-string}|array{string, '', non-empty-string}", $matches);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM, thank you for fixing this!

}
};

function (string $s): void {
if (preg_match('~a|((u)x)|((v)y)~', $s, $matches) === 1) {
assertType("array{string, '', '', 'vy', 'v'}|array{string, 'ux', 'u'}|array{string}", $matches);
}
};

function (string $s): void {
if (preg_match('~a|(\d)|(\s)~', $s, $matches, PREG_OFFSET_CAPTURE) === 1) {
assertType("array{0: array{string, int<-1, max>}, 1?: array{numeric-string, int<-1, max>}}|array{array{string, int<-1, max>}, array{'', int<-1, max>}, array{non-empty-string, int<-1, max>}}", $matches);
}
};

function (string $s): void {
preg_match('~a|(\d)|(\s)~', $s, $matches);
assertType("array{0?: string, 1?: '', 2?: non-empty-string}|array{0?: string, 1?: numeric-string}", $matches);
};
Loading