Skip to content

Commit 453e1cb

Browse files
authored
feat(router): detect dead links when generating static pages (#1192)
1 parent 12e2b03 commit 453e1cb

File tree

6 files changed

+269
-19
lines changed

6 files changed

+269
-19
lines changed

packages/console/src/Testing/ConsoleTester.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ public function withPrompting(): self
319319

320320
public function dd(): self
321321
{
322-
ld($this->output->asFormattedString());
322+
ld($this->output->asUnformattedString());
323323

324324
return $this;
325325
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
<?php
2+
3+
namespace Tempest\Router\Static\Exceptions;
4+
5+
final class DeadLinksDetectedException extends StaticPageException
6+
{
7+
public function __construct(
8+
string $uri,
9+
public readonly array $links,
10+
) {
11+
parent::__construct(sprintf('%s has %s dead links', $uri, count($links)), $uri);
12+
}
13+
}

packages/router/src/Static/StaticGenerateCommand.php

Lines changed: 151 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,48 +10,62 @@
1010
use Tempest\Console\ExitCode;
1111
use Tempest\Console\HasConsole;
1212
use Tempest\Container\Container;
13+
use Tempest\Core\AppConfig;
1314
use Tempest\Core\Kernel;
1415
use Tempest\EventBus\EventBus;
1516
use Tempest\Http\GenericRequest;
1617
use Tempest\Http\Method;
1718
use Tempest\Http\Status;
19+
use Tempest\HttpClient\HttpClient;
1820
use Tempest\Router\DataProvider;
1921
use Tempest\Router\Router;
22+
use Tempest\Router\Static\Exceptions\DeadLinksDetectedException;
2023
use Tempest\Router\Static\Exceptions\InvalidStatusCodeException;
2124
use Tempest\Router\Static\Exceptions\NoTextualBodyException;
25+
use Tempest\Support\Arr;
26+
use Tempest\Support\Regex;
27+
use Tempest\Support\Str;
2228
use Tempest\View\Exceptions\ViewCompilationError;
2329
use Tempest\View\View;
2430
use Tempest\View\ViewRenderer;
2531
use Tempest\Vite\Exceptions\ManifestNotFoundException;
2632
use Throwable;
2733

34+
use function Tempest\Support\Language\pluralize;
2835
use function Tempest\Support\path;
2936
use function Tempest\uri;
3037

31-
final readonly class StaticGenerateCommand
38+
final class StaticGenerateCommand
3239
{
3340
use HasConsole;
3441

42+
private array $verifiedLinks = [];
43+
3544
public function __construct(
36-
private Console $console,
37-
private Kernel $kernel,
38-
private Container $container,
39-
private StaticPageConfig $staticPageConfig,
40-
private Router $router,
41-
private ViewRenderer $viewRenderer,
42-
private EventBus $eventBus,
45+
private readonly AppConfig $appConfig,
46+
private readonly Console $console,
47+
private readonly Kernel $kernel,
48+
private readonly Container $container,
49+
private readonly StaticPageConfig $staticPageConfig,
50+
private readonly Router $router,
51+
private readonly ViewRenderer $viewRenderer,
52+
private readonly EventBus $eventBus,
53+
private readonly HttpClient $httpClient,
4354
) {}
4455

4556
#[ConsoleCommand(name: 'static:generate', description: 'Compiles static pages')]
4657
public function __invoke(
4758
?string $filter = null,
59+
bool $allowDeadLinks = false,
60+
bool $allowExternalDeadLinks = true,
4861
#[ConsoleArgument(aliases: ['v'])]
4962
bool $verbose = false,
5063
): ExitCode {
5164
$publicPath = path($this->kernel->root, 'public');
5265

5366
$generated = 0;
5467
$failures = 0;
68+
$deadlinks = [];
5569

5670
$this->console->header('Generating static pages');
5771

@@ -64,6 +78,10 @@ public function __invoke(
6478
$failures++;
6579

6680
match (true) {
81+
$event->exception instanceof DeadLinksDetectedException => $this->keyValue(
82+
"<style='fg-gray'>{$event->path}</style>",
83+
sprintf("<style='fg-red'>%s DEAD %s</style>", count($event->exception->links), pluralize('LINK', count($event->exception->links))),
84+
),
6785
$event->exception instanceof InvalidStatusCodeException => $this->keyValue(
6886
"<style='fg-gray'>{$event->path}</style>",
6987
"<style='fg-red'>HTTP {$event->exception->status->value}</style>",
@@ -126,6 +144,11 @@ public function __invoke(
126144
mkdir($directory->toString(), recursive: true);
127145
}
128146

147+
if (! $allowDeadLinks && count($links = $this->detectDeadLinks($uri, $content, checkExternal: ! $allowExternalDeadLinks)) > 0) {
148+
$deadlinks[$uri] = $links;
149+
throw new DeadLinksDetectedException($uri, $links);
150+
}
151+
129152
file_put_contents($file->toString(), $content);
130153

131154
$this->eventBus->dispatch(new StaticPageGenerated($uri, $file->toString(), $content));
@@ -152,8 +175,127 @@ public function __invoke(
152175

153176
$this->keyValue('Static pages generated', "<style='fg-green'>{$generated}</style>");
154177

155-
return $failures > 0
178+
if ($deadlinks) {
179+
$this->console->header('Dead links');
180+
181+
foreach ($deadlinks as $uri => $links) {
182+
foreach ($links as $link) {
183+
$this->keyValue("<style='fg-gray'>{$uri}</style>", "<style='fg-red'>{$link}</style>");
184+
}
185+
}
186+
}
187+
188+
return $failures > 0 || count($deadlinks) > 0
156189
? ExitCode::ERROR
157190
: ExitCode::SUCCESS;
158191
}
192+
193+
private function detectDeadLinks(string $uri, string $html, bool $checkExternal = false): array
194+
{
195+
$deadlinks = [];
196+
$links = Regex\get_all_matches($html, '/<a\s+(?<ignore>ssg-ignore)?[^>]*href=["\'](?<url>[^"\']+)["\'][^>]*>/i', matches: ['url', 'ignore']);
197+
198+
foreach ($links as ['url' => $link, 'ignore' => $ignore]) {
199+
// Links can be ignored with the ssg-ignore attribute
200+
if ($ignore ?: false) {
201+
continue;
202+
}
203+
204+
// Check anchors (#)
205+
if (Str\starts_with($link, '#')) {
206+
if (! Regex\matches($html, "/id=\"" . preg_quote(Str\strip_start($link, '#'), '/') . "\"/")) {
207+
$deadlinks[] = $link;
208+
}
209+
210+
continue;
211+
}
212+
213+
// Resolve relative links (../ or ./)
214+
if (Str\starts_with($link, ['../', './'])) {
215+
$link = $this->resolveRelativeLink($uri, $link);
216+
}
217+
218+
// Don't ping the same link multiple times
219+
if (in_array($link, $this->verifiedLinks, strict: true)) {
220+
continue;
221+
}
222+
223+
$this->verifiedLinks[] = $link;
224+
225+
// Check internal links with router (/ or same base uri)
226+
if (Str\starts_with($link, '/') || Str\starts_with($this->getLinkWithoutProtocol($link), $this->getLinkWithoutProtocol($this->appConfig->baseUri))) {
227+
do {
228+
$target ??= match (true) {
229+
Str\starts_with($link, '/') => $this->appConfig->baseUri . '/' . Str\strip_start($link, '/'),
230+
default => $link,
231+
};
232+
233+
$response = $this->router->dispatch(new GenericRequest(
234+
method: Method::GET,
235+
uri: $target,
236+
));
237+
238+
if ($response->status->isRedirect()) {
239+
$target = Arr\first($response->getHeader('Location')->values);
240+
}
241+
} while ($response->status->isRedirect());
242+
243+
if ($response->status->isClientError() || $response->status->isServerError()) {
244+
$deadlinks[] = $link;
245+
}
246+
247+
continue;
248+
}
249+
250+
if (! $checkExternal) {
251+
continue;
252+
}
253+
254+
if (Str\starts_with($link, 'http')) {
255+
$response = $this->httpClient->get($link);
256+
257+
if ($response->status->isClientError() || $response->status->isServerError()) {
258+
$deadlinks[] = $link;
259+
}
260+
261+
continue;
262+
}
263+
264+
// If we reach this, there is an unknown kind of link.
265+
}
266+
267+
return $deadlinks;
268+
}
269+
270+
/**
271+
* Resolves paths starting with ./ or ../ to a canonical URI.
272+
*/
273+
private function resolveRelativeLink(string $basePath, string $relativePath): string
274+
{
275+
$basePath = Str\strip_end($basePath, '/');
276+
277+
if (Str\starts_with($relativePath, ['../', './'])) {
278+
$baseParts = explode('/', $basePath);
279+
$relativeParts = explode('/', $relativePath);
280+
281+
array_pop($baseParts);
282+
283+
foreach ($relativeParts as $part) {
284+
if ($part === '..') {
285+
array_pop($baseParts);
286+
} elseif ($part !== '.') {
287+
$baseParts[] = $part;
288+
}
289+
}
290+
291+
return implode('/', $baseParts);
292+
}
293+
294+
return $basePath . '/' . Str\strip_start($relativePath, './');
295+
}
296+
297+
private function getLinkWithoutProtocol(string $link): string
298+
{
299+
return Str\strip_start($link, ['https://', 'http://']);
300+
}
159301
}

packages/storage/composer.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616
"league/flysystem-ziparchive": "^3.0",
1717
"league/flysystem-sftp-v3": "^3.0",
1818
"league/flysystem-azure-blob-storage": "^3.0",
19-
"league/flysystem-google-cloud-storage": "^3.0"
19+
"league/flysystem-google-cloud-storage": "^3.0",
20+
"tempest/support": "dev-main"
2021
},
2122
"autoload": {
2223
"psr-4": {

tests/Integration/Http/Static/Fixtures/StaticPageController.php

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,15 @@
55
namespace Tests\Tempest\Integration\Http\Static\Fixtures;
66

77
use Tempest\Http\Response;
8+
use Tempest\Http\Responses\Redirect;
89
use Tempest\Http\Responses\ServerError;
910
use Tempest\Router\Get;
1011
use Tempest\Router\StaticPage;
1112
use Tempest\View\Exceptions\ViewCompilationError;
1213
use Tempest\View\View;
1314
use Tempest\Vite\Exceptions\ManifestNotFoundException;
1415

16+
use function Tempest\uri;
1517
use function Tempest\view;
1618

1719
final readonly class StaticPageController
@@ -47,4 +49,34 @@ public function vite(
4749
): void {
4850
throw new ViewCompilationError('view.php', '', new ManifestNotFoundException('fake-manifest.json'));
4951
}
52+
53+
#[Get('/static/dead-link')]
54+
#[StaticPage]
55+
public function deadLink(): string
56+
{
57+
return implode(PHP_EOL, [
58+
sprintf('<a href="%s">foo</a>', uri('/404')),
59+
'<a href="https://google.com/404">foo</a>',
60+
]);
61+
}
62+
63+
#[Get('/static/redirecting-route')]
64+
#[StaticPage]
65+
public function hasRedirect(): string
66+
{
67+
return sprintf('<a href="%s">foo</a>', uri('/redirecting-route'));
68+
}
69+
70+
#[Get('/redirecting-route')]
71+
public function redirectingRoute(): Redirect
72+
{
73+
return new Redirect('https://google.com');
74+
}
75+
76+
#[Get('/static/allowed-dead-link')]
77+
#[StaticPage]
78+
public function allowedDeadLink(): string
79+
{
80+
return sprintf('<a ssg-ignore href="%s">foo</a>', uri('/404'));
81+
}
5082
}

0 commit comments

Comments
 (0)