|
| 1 | +<?php |
| 2 | + |
| 3 | +declare(strict_types=1); |
| 4 | + |
| 5 | +namespace Tempest\Http\Routing\Construction; |
| 6 | + |
| 7 | +use Tempest\Http\Routing\Matching\MatchingRegex; |
| 8 | + |
| 9 | +final readonly class RouteMatchingRegexBuilder |
| 10 | +{ |
| 11 | + // This limit is guesstimated using a small script with an ever in pattern feed into preg_match |
| 12 | + private const int PREG_REGEX_SIZE_LIMIT = 32768; |
| 13 | + |
| 14 | + private const int REGEX_SIZE_MARGIN = 256; |
| 15 | + |
| 16 | + private const REGEX_SIZE_LIMIT = self::PREG_REGEX_SIZE_LIMIT - self::REGEX_SIZE_MARGIN; |
| 17 | + |
| 18 | + public function __construct(private RouteTreeNode $rootNode) |
| 19 | + { |
| 20 | + } |
| 21 | + |
| 22 | + public function toRegex(): MatchingRegex |
| 23 | + { |
| 24 | + // Holds all regex "chunks" |
| 25 | + $regexes = []; |
| 26 | + |
| 27 | + // Current regex chunk |
| 28 | + $regex = ''; |
| 29 | + // Used to track how to 'end' a regex chunk partially in the building process |
| 30 | + $regexBack = ''; |
| 31 | + |
| 32 | + /** @var (RouteTreeNode|null)[] $workingSet */ |
| 33 | + $workingSet = [$this->rootNode]; |
| 34 | + |
| 35 | + // Track how 'deep' we are in the tree to be able to rebuild the regex prefix when chunking |
| 36 | + /** @var RouteTreeNode[] $stack */ |
| 37 | + $stack = []; |
| 38 | + |
| 39 | + // Processes the working set until it is empty |
| 40 | + while ($workingSet !== []) { |
| 41 | + // Use array_pop for performance reasons, this does mean that the working set works in a fifo order |
| 42 | + /** @var RouteTreeNode|null $node */ |
| 43 | + $node = array_pop($workingSet); |
| 44 | + |
| 45 | + // null values are used as an end-marker, if one is found pop the stack and 'close' the regex |
| 46 | + if ($node === null) { |
| 47 | + array_pop($stack); |
| 48 | + $regex .= $regexBack[0]; |
| 49 | + |
| 50 | + $regexBack = substr($regexBack, 1); |
| 51 | + |
| 52 | + continue; |
| 53 | + } |
| 54 | + |
| 55 | + // Checks if the regex is getting to big, and thus if we need to chunk it. |
| 56 | + if (strlen($regex) > self::REGEX_SIZE_LIMIT) { |
| 57 | + $regexes[] = '#' . substr($regex, 1) . $regexBack . '#'; |
| 58 | + $regex = ''; |
| 59 | + |
| 60 | + // Rebuild the regex match prefix based on the current visited parent nodes, known as 'the stack' |
| 61 | + foreach ($stack as $previousNode) { |
| 62 | + $regex .= '|' . self::routeNodeSegmentRegex($previousNode); |
| 63 | + $regex .= '(?'; |
| 64 | + } |
| 65 | + } |
| 66 | + |
| 67 | + // Add the node route segment to the current regex |
| 68 | + $regex .= '|' . self::routeNodeSegmentRegex($node); |
| 69 | + $targetRouteRegex = self::routeNodeTargetRegex($node); |
| 70 | + |
| 71 | + // Check if node has children to ensure we only use branches if the node has children |
| 72 | + if ($node->dynamicPaths !== [] || $node->staticPaths !== []) { |
| 73 | + // The regex uses "Branch reset group" to match different available paths. |
| 74 | + // two available routes /a and /b will create the regex (?|a|b) |
| 75 | + $regex .= '(?'; |
| 76 | + $regexBack .= ')'; |
| 77 | + $stack[] = $node; |
| 78 | + |
| 79 | + // Add target route regex as an alteration group |
| 80 | + if ($targetRouteRegex) { |
| 81 | + $regex .= '|' . $targetRouteRegex; |
| 82 | + } |
| 83 | + |
| 84 | + // Add an end marker to the working set, this will be processed after the children has been processed |
| 85 | + $workingSet[] = null; |
| 86 | + |
| 87 | + // Add dynamic routes to the working set, these will be processed before the end marker |
| 88 | + foreach ($node->dynamicPaths as $child) { |
| 89 | + $workingSet[] = $child; |
| 90 | + } |
| 91 | + |
| 92 | + // Add static routes to the working set, these will be processed first due to the array_pop |
| 93 | + foreach ($node->staticPaths as $child) { |
| 94 | + $workingSet[] = $child; |
| 95 | + } |
| 96 | + |
| 97 | + } else { |
| 98 | + // Add target route to main regex without any children |
| 99 | + $regex .= $targetRouteRegex; |
| 100 | + } |
| 101 | + } |
| 102 | + |
| 103 | + // Return all regex chunks including the current one |
| 104 | + return new MatchingRegex([ |
| 105 | + ...$regexes, |
| 106 | + '#' . substr($regex, 1) . '#', |
| 107 | + ]); |
| 108 | + } |
| 109 | + |
| 110 | + /** |
| 111 | + * Create regex for the targetRoute in node with optional slash and end of line match `$`. |
| 112 | + * The `(*MARK:x)` is a marker which when this regex is matched will cause the matches array to contain |
| 113 | + * a key `"MARK"` with value `"x"`, it is used to track which route has been matched. |
| 114 | + * Returns an empty string for nodes without a target. |
| 115 | + */ |
| 116 | + private static function routeNodeTargetRegex(RouteTreeNode $node): string |
| 117 | + { |
| 118 | + if ($node->targetRoute === null) { |
| 119 | + return ''; |
| 120 | + } |
| 121 | + |
| 122 | + return '\/?$(*' . MarkedRoute::REGEX_MARK_TOKEN . ':' . $node->targetRoute->mark . ')'; |
| 123 | + } |
| 124 | + |
| 125 | + /** |
| 126 | + * Creates the regex for a route node's segment |
| 127 | + */ |
| 128 | + private static function routeNodeSegmentRegex(RouteTreeNode $node): string |
| 129 | + { |
| 130 | + return match($node->type) { |
| 131 | + RouteTreeNodeType::Root => '^', |
| 132 | + RouteTreeNodeType::Static => "/{$node->segment}", |
| 133 | + RouteTreeNodeType::Dynamic => '/(' . $node->segment . ')', |
| 134 | + }; |
| 135 | + } |
| 136 | +} |
0 commit comments