Skip to content

Commit 3eb0c59

Browse files
authored
feat(routing): add regex chunking to route regex (#714)
1 parent f89eb5d commit 3eb0c59

File tree

14 files changed

+324
-93
lines changed

14 files changed

+324
-93
lines changed

phpunit.xml.dist

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,5 +33,6 @@
3333
<env name="BASE_URI" value="" />
3434
<env name="CACHE" value="null" />
3535
<env name="DISCOVERY_CACHE" value="true" />
36+
<ini name="memory_limit" value="256M" />
3637
</php>
3738
</phpunit>

src/Tempest/Http/src/RouteConfig.php

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,16 @@
44

55
namespace Tempest\Http;
66

7+
use Tempest\Http\Routing\Matching\MatchingRegex;
8+
79
final class RouteConfig
810
{
911
public function __construct(
1012
/** @var array<string, array<string, Route>> */
1113
public array $staticRoutes = [],
1214
/** @var array<string, array<string, Route>> */
1315
public array $dynamicRoutes = [],
14-
/** @var array<string, string> */
16+
/** @var array<string, MatchingRegex> */
1517
public array $matchingRegexes = [],
1618
) {
1719
}

src/Tempest/Http/src/Routing/Construction/RouteConfigurator.php

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ final class RouteConfigurator
2929

3030
public function __construct()
3131
{
32-
3332
$this->routingTree = new RoutingTree();
3433
}
3534

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Tempest\Http\Routing\Construction;
6+
7+
use Tempest\Http\Routing\Matching\MatchingRegex;
8+
9+
final readonly class RouteMatchingRegexBuilder
10+
{
11+
// This limit is guesstimated using a small script with an ever in pattern feed into preg_match
12+
private const int PREG_REGEX_SIZE_LIMIT = 32768;
13+
14+
private const int REGEX_SIZE_MARGIN = 256;
15+
16+
private const REGEX_SIZE_LIMIT = self::PREG_REGEX_SIZE_LIMIT - self::REGEX_SIZE_MARGIN;
17+
18+
public function __construct(private RouteTreeNode $rootNode)
19+
{
20+
}
21+
22+
public function toRegex(): MatchingRegex
23+
{
24+
// Holds all regex "chunks"
25+
$regexes = [];
26+
27+
// Current regex chunk
28+
$regex = '';
29+
// Used to track how to 'end' a regex chunk partially in the building process
30+
$regexBack = '';
31+
32+
/** @var (RouteTreeNode|null)[] $workingSet */
33+
$workingSet = [$this->rootNode];
34+
35+
// Track how 'deep' we are in the tree to be able to rebuild the regex prefix when chunking
36+
/** @var RouteTreeNode[] $stack */
37+
$stack = [];
38+
39+
// Processes the working set until it is empty
40+
while ($workingSet !== []) {
41+
// Use array_pop for performance reasons, this does mean that the working set works in a fifo order
42+
/** @var RouteTreeNode|null $node */
43+
$node = array_pop($workingSet);
44+
45+
// null values are used as an end-marker, if one is found pop the stack and 'close' the regex
46+
if ($node === null) {
47+
array_pop($stack);
48+
$regex .= $regexBack[0];
49+
50+
$regexBack = substr($regexBack, 1);
51+
52+
continue;
53+
}
54+
55+
// Checks if the regex is getting to big, and thus if we need to chunk it.
56+
if (strlen($regex) > self::REGEX_SIZE_LIMIT) {
57+
$regexes[] = '#' . substr($regex, 1) . $regexBack . '#';
58+
$regex = '';
59+
60+
// Rebuild the regex match prefix based on the current visited parent nodes, known as 'the stack'
61+
foreach ($stack as $previousNode) {
62+
$regex .= '|' . self::routeNodeSegmentRegex($previousNode);
63+
$regex .= '(?';
64+
}
65+
}
66+
67+
// Add the node route segment to the current regex
68+
$regex .= '|' . self::routeNodeSegmentRegex($node);
69+
$targetRouteRegex = self::routeNodeTargetRegex($node);
70+
71+
// Check if node has children to ensure we only use branches if the node has children
72+
if ($node->dynamicPaths !== [] || $node->staticPaths !== []) {
73+
// The regex uses "Branch reset group" to match different available paths.
74+
// two available routes /a and /b will create the regex (?|a|b)
75+
$regex .= '(?';
76+
$regexBack .= ')';
77+
$stack[] = $node;
78+
79+
// Add target route regex as an alteration group
80+
if ($targetRouteRegex) {
81+
$regex .= '|' . $targetRouteRegex;
82+
}
83+
84+
// Add an end marker to the working set, this will be processed after the children has been processed
85+
$workingSet[] = null;
86+
87+
// Add dynamic routes to the working set, these will be processed before the end marker
88+
foreach ($node->dynamicPaths as $child) {
89+
$workingSet[] = $child;
90+
}
91+
92+
// Add static routes to the working set, these will be processed first due to the array_pop
93+
foreach ($node->staticPaths as $child) {
94+
$workingSet[] = $child;
95+
}
96+
97+
} else {
98+
// Add target route to main regex without any children
99+
$regex .= $targetRouteRegex;
100+
}
101+
}
102+
103+
// Return all regex chunks including the current one
104+
return new MatchingRegex([
105+
...$regexes,
106+
'#' . substr($regex, 1) . '#',
107+
]);
108+
}
109+
110+
/**
111+
* Create regex for the targetRoute in node with optional slash and end of line match `$`.
112+
* The `(*MARK:x)` is a marker which when this regex is matched will cause the matches array to contain
113+
* a key `"MARK"` with value `"x"`, it is used to track which route has been matched.
114+
* Returns an empty string for nodes without a target.
115+
*/
116+
private static function routeNodeTargetRegex(RouteTreeNode $node): string
117+
{
118+
if ($node->targetRoute === null) {
119+
return '';
120+
}
121+
122+
return '\/?$(*' . MarkedRoute::REGEX_MARK_TOKEN . ':' . $node->targetRoute->mark . ')';
123+
}
124+
125+
/**
126+
* Creates the regex for a route node's segment
127+
*/
128+
private static function routeNodeSegmentRegex(RouteTreeNode $node): string
129+
{
130+
return match($node->type) {
131+
RouteTreeNodeType::Root => '^',
132+
RouteTreeNodeType::Static => "/{$node->segment}",
133+
RouteTreeNodeType::Dynamic => '/(' . $node->segment . ')',
134+
};
135+
}
136+
}

src/Tempest/Http/src/Routing/Construction/RouteTreeNode.php

Lines changed: 3 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,12 @@
1212
final class RouteTreeNode
1313
{
1414
/** @var array<string, RouteTreeNode> */
15-
private array $staticPaths = [];
15+
public array $staticPaths = [];
1616

1717
/** @var array<string, RouteTreeNode> */
18-
private array $dynamicPaths = [];
18+
public array $dynamicPaths = [];
1919

20-
private ?MarkedRoute $targetRoute = null;
20+
public ?MarkedRoute $targetRoute = null;
2121

2222
private function __construct(
2323
public readonly RouteTreeNodeType $type,
@@ -72,55 +72,4 @@ private static function convertDynamicSegmentToRegex(string $uriPart): string
7272
$uriPart,
7373
);
7474
}
75-
76-
/**
77-
* Return the matching regex of this path and it's children by means of recursion
78-
*/
79-
public function toRegex(): string
80-
{
81-
$regexp = $this->regexSegment();
82-
83-
if ($this->staticPaths !== [] || $this->dynamicPaths !== []) {
84-
// The regex uses "Branch reset group" to match different available paths.
85-
// two available routes /a and /b will create the regex (?|a|b)
86-
$regexp .= "(?";
87-
88-
// Add static route alteration
89-
foreach ($this->staticPaths as $path) {
90-
$regexp .= '|' . $path->toRegex();
91-
}
92-
93-
// Add dynamic route alteration, for example routes {id:\d} and {id:\w} will create the regex (?|(\d)|(\w)).
94-
// Both these parameter matches will end up on the same index in the matches array.
95-
foreach ($this->dynamicPaths as $path) {
96-
$regexp .= '|' . $path->toRegex();
97-
}
98-
99-
// Add a leaf alteration with an optional slash and end of line match `$`.
100-
// The `(*MARK:x)` is a marker which when this regex is matched will cause the matches array to contain
101-
// a key `"MARK"` with value `"x"`, it is used to track which route has been matched
102-
if ($this->targetRoute !== null) {
103-
$regexp .= '|\/?$(*' . MarkedRoute::REGEX_MARK_TOKEN . ':' . $this->targetRoute->mark . ')';
104-
}
105-
106-
$regexp .= ")";
107-
} elseif ($this->targetRoute !== null) {
108-
// Add a singular leaf regex without alteration
109-
$regexp .= '\/?$(*' . MarkedRoute::REGEX_MARK_TOKEN . ':' . $this->targetRoute->mark . ')';
110-
}
111-
112-
return $regexp;
113-
}
114-
115-
/**
116-
* Translates the only current node segment into regex. This does not recurse into it's child nodes.
117-
*/
118-
private function regexSegment(): string
119-
{
120-
return match($this->type) {
121-
RouteTreeNodeType::Root => '^',
122-
RouteTreeNodeType::Static => "/{$this->segment}",
123-
RouteTreeNodeType::Dynamic => '/(' . $this->segment . ')',
124-
};
125-
}
12675
}

src/Tempest/Http/src/Routing/Construction/RoutingTree.php

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
namespace Tempest\Http\Routing\Construction;
66

7+
use Tempest\Http\Routing\Matching\MatchingRegex;
8+
79
/**
810
* @internal
911
*/
@@ -32,9 +34,9 @@ public function add(MarkedRoute $markedRoute): void
3234
$node->setTargetRoute($markedRoute);
3335
}
3436

35-
/** @return array<string, string> */
37+
/** @return array<string, MatchingRegex> */
3638
public function toMatchingRegexes(): array
3739
{
38-
return array_map(static fn (RouteTreeNode $node) => "#{$node->toRegex()}#", $this->roots);
40+
return array_map(static fn (RouteTreeNode $node) => (new RouteMatchingRegexBuilder($node))->toRegex(), $this->roots);
3941
}
4042
}

src/Tempest/Http/src/Routing/Matching/GenericRouteMatcher.php

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
use Tempest\Http\MatchedRoute;
99
use Tempest\Http\Route;
1010
use Tempest\Http\RouteConfig;
11-
use Tempest\Http\Routing\Construction\MarkedRoute;
1211

1312
final readonly class GenericRouteMatcher implements RouteMatcher
1413
{
@@ -50,17 +49,17 @@ private function matchDynamicRoute(PsrRequest $request): ?MatchedRoute
5049
$matchingRegexForMethod = $this->routeConfig->matchingRegexes[$request->getMethod()];
5150

5251
// Then we'll use this regex to see whether we have a match or not
53-
$matchResult = preg_match($matchingRegexForMethod, $request->getUri()->getPath(), $routingMatches);
52+
$matchResult = $matchingRegexForMethod->match($request->getUri()->getPath());
5453

55-
if (! $matchResult || ! array_key_exists(MarkedRoute::REGEX_MARK_TOKEN, $routingMatches)) {
54+
if ($matchResult === null) {
5655
return null;
5756
}
5857

5958
// Get the route based on the matched mark
60-
$route = $routesForMethod[$routingMatches[MarkedRoute::REGEX_MARK_TOKEN]];
59+
$route = $routesForMethod[$matchResult->mark];
6160

6261
// Extract the parameters based on the route and matches
63-
$routeParams = $this->extractParams($route, $routingMatches);
62+
$routeParams = $this->extractParams($route, $matchResult->matches);
6463

6564
return new MatchedRoute($route, $routeParams);
6665
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Tempest\Http\Routing\Matching;
6+
7+
use RuntimeException;
8+
use Tempest\Http\Routing\Construction\MarkedRoute;
9+
10+
final readonly class MatchingRegex
11+
{
12+
/**
13+
* @param string[] $patterns
14+
*/
15+
public function __construct(
16+
public array $patterns,
17+
) {
18+
}
19+
20+
public function match(string $uri): ?RouteMatch
21+
{
22+
foreach ($this->patterns as $pattern) {
23+
$matchResult = preg_match($pattern, $uri, $matches);
24+
25+
if ($matchResult === false) {
26+
throw new RuntimeException("Failed to use matching regex. Got error " . preg_last_error());
27+
}
28+
29+
if (! $matchResult) {
30+
continue;
31+
}
32+
33+
if (! array_key_exists(MarkedRoute::REGEX_MARK_TOKEN, $matches)) {
34+
continue;
35+
}
36+
37+
return RouteMatch::match($matches);
38+
}
39+
40+
return null;
41+
}
42+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Tempest\Http\Routing\Matching;
6+
7+
use Tempest\Http\Routing\Construction\MarkedRoute;
8+
9+
final readonly class RouteMatch
10+
{
11+
private function __construct(
12+
public string $mark,
13+
public array $matches,
14+
) {
15+
}
16+
17+
public static function match(array $params): self
18+
{
19+
return new self($params[MarkedRoute::REGEX_MARK_TOKEN], $params);
20+
}
21+
}

src/Tempest/Http/tests/RouteConfigTest.php

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
use Tempest\Http\Method;
99
use Tempest\Http\Route;
1010
use Tempest\Http\RouteConfig;
11+
use Tempest\Http\Routing\Matching\MatchingRegex;
1112

1213
/**
1314
* @internal
@@ -24,7 +25,7 @@ public function test_serialization(): void
2425
'POST' => ['b' => new Route('/', Method::POST)],
2526
],
2627
[
27-
'POST' => '#^(?|/([^/]++)(?|/1\/?$(*MARK:b)|/3\/?$(*MARK:d)))#',
28+
'POST' => new MatchingRegex(['#^(?|/([^/]++)(?|/1\/?$(*MARK:b)|/3\/?$(*MARK:d)))#']),
2829
]
2930
);
3031

0 commit comments

Comments
 (0)