diff --git a/src/Type/Regex/RegexAstWalkResult.php b/src/Type/Regex/RegexAstWalkResult.php new file mode 100644 index 0000000000..32e017a254 --- /dev/null +++ b/src/Type/Regex/RegexAstWalkResult.php @@ -0,0 +1,105 @@ + $capturingGroups + * @param list $markVerbs + */ + public function __construct( + private int $alternationId, + private int $captureGroupId, + private array $capturingGroups, + private array $markVerbs, + ) + { + } + + public static function createEmpty(): self + { + return new self( + -1, + // use different start-index for groups to make it easier to distinguish groupids from other ids + 100, + [], + [], + ); + } + + public function nextAlternationId(): self + { + return new self( + $this->alternationId + 1, + $this->captureGroupId, + $this->capturingGroups, + $this->markVerbs, + ); + } + + public function nextCaptureGroupId(): self + { + return new self( + $this->alternationId, + $this->captureGroupId + 1, + $this->capturingGroups, + $this->markVerbs, + ); + } + + public function addCapturingGroup(RegexCapturingGroup $group): self + { + $capturingGroups = $this->capturingGroups; + $capturingGroups[$group->getId()] = $group; + + return new self( + $this->alternationId, + $this->captureGroupId, + $capturingGroups, + $this->markVerbs, + ); + } + + public function markVerb(string $markVerb): self + { + $verbs = $this->markVerbs; + $verbs[] = $markVerb; + + return new self( + $this->alternationId, + $this->captureGroupId, + $this->capturingGroups, + $verbs, + ); + } + + public function getAlternationId(): int + { + return $this->alternationId; + } + + public function getCaptureGroupId(): int + { + return $this->captureGroupId; + } + + /** + * @return array + */ + public function getCapturingGroups(): array + { + return $this->capturingGroups; + } + + /** + * @return list + */ + public function getMarkVerbs(): array + { + return $this->markVerbs; + } + +} diff --git a/src/Type/Regex/RegexGroupParser.php b/src/Type/Regex/RegexGroupParser.php index 9780b2c69a..0a0ea12f30 100644 --- a/src/Type/Regex/RegexGroupParser.php +++ b/src/Type/Regex/RegexGroupParser.php @@ -73,51 +73,39 @@ public function parseGroups(string $regex): ?array $captureOnlyNamed = str_contains($modifiers, 'n'); } - $capturingGroups = []; - $alternationId = -1; - $captureGroupId = 100; - $markVerbs = []; - $this->walkRegexAst( + $astWalkResult = $this->walkRegexAst( $ast, null, - $alternationId, 0, false, null, - $captureGroupId, - $capturingGroups, - $markVerbs, $captureOnlyNamed, false, $modifiers, + RegexAstWalkResult::createEmpty(), ); - return [$capturingGroups, $markVerbs]; + return [$astWalkResult->getCapturingGroups(), $astWalkResult->getMarkVerbs()]; } - /** - * @param array $capturingGroups - * @param list $markVerbs - */ private function walkRegexAst( TreeNode $ast, ?RegexAlternation $alternation, - int &$alternationId, int $combinationIndex, bool $inOptionalQuantification, RegexCapturingGroup|RegexNonCapturingGroup|null $parentGroup, - int &$captureGroupId, - array &$capturingGroups, - array &$markVerbs, bool $captureOnlyNamed, bool $repeatedMoreThanOnce, string $patternModifiers, - ): void + RegexAstWalkResult $astWalkResult, + ): RegexAstWalkResult { $group = null; if ($ast->getId() === '#capturing') { + $astWalkResult = $astWalkResult->nextCaptureGroupId(); + $group = new RegexCapturingGroup( - $captureGroupId++, + $astWalkResult->getCaptureGroupId(), null, $alternation, $inOptionalQuantification, @@ -130,9 +118,11 @@ private function walkRegexAst( ); $parentGroup = $group; } elseif ($ast->getId() === '#namedcapturing') { + $astWalkResult = $astWalkResult->nextCaptureGroupId(); + $name = $ast->getChild(0)->getValueValue(); $group = new RegexCapturingGroup( - $captureGroupId++, + $astWalkResult->getCaptureGroupId(), $name, $alternation, $inOptionalQuantification, @@ -176,20 +166,19 @@ private function walkRegexAst( } if ($ast->getId() === '#alternation') { - $alternationId++; - $alternation = new RegexAlternation($alternationId, count($ast->getChildren())); + $astWalkResult = $astWalkResult->nextAlternationId(); + $alternation = new RegexAlternation($astWalkResult->getAlternationId(), count($ast->getChildren())); } if ($ast->getId() === '#mark') { - $markVerbs[] = $ast->getChild(0)->getValueValue(); - return; + return $astWalkResult->markVerb($ast->getChild(0)->getValueValue()); } if ( $group instanceof RegexCapturingGroup && (!$captureOnlyNamed || $group->isNamed()) ) { - $capturingGroups[$group->getId()] = $group; + $astWalkResult = $astWalkResult->addCapturingGroup($group); if ($alternation !== null) { $alternation->pushGroup($combinationIndex, $group); @@ -197,19 +186,16 @@ private function walkRegexAst( } foreach ($ast->getChildren() as $child) { - $this->walkRegexAst( + $astWalkResult = $this->walkRegexAst( $child, $alternation, - $alternationId, $combinationIndex, $inOptionalQuantification, $parentGroup, - $captureGroupId, - $capturingGroups, - $markVerbs, $captureOnlyNamed, $repeatedMoreThanOnce, $patternModifiers, + $astWalkResult, ); if ($ast->getId() !== '#alternation') { @@ -218,6 +204,8 @@ private function walkRegexAst( $combinationIndex++; } + + return $astWalkResult; } private function allowConstantTypes(