Skip to content

Commit 97197aa

Browse files
committed
feat(router): support dead link detection when generating static pages
1 parent d6a7d92 commit 97197aa

File tree

2 files changed

+155
-9
lines changed

2 files changed

+155
-9
lines changed
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
<?php
2+
3+
namespace Tempest\Router\Static\Exceptions;
4+
5+
final class DeadLinksDetectedException extends StaticPageException
6+
{
7+
public function __construct(
8+
string $uri,
9+
public readonly array $links,
10+
) {
11+
parent::__construct(sprintf('%s has %s dead links', $uri, count($links)), $uri);
12+
}
13+
}

packages/router/src/Static/StaticGenerateCommand.php

Lines changed: 142 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,48 +10,61 @@
1010
use Tempest\Console\ExitCode;
1111
use Tempest\Console\HasConsole;
1212
use Tempest\Container\Container;
13+
use Tempest\Core\AppConfig;
1314
use Tempest\Core\Kernel;
1415
use Tempest\EventBus\EventBus;
1516
use Tempest\Http\GenericRequest;
1617
use Tempest\Http\Method;
1718
use Tempest\Http\Status;
19+
use Tempest\HttpClient\HttpClient;
1820
use Tempest\Router\DataProvider;
1921
use Tempest\Router\Router;
22+
use Tempest\Router\Static\Exceptions\DeadLinksDetectedException;
2023
use Tempest\Router\Static\Exceptions\InvalidStatusCodeException;
2124
use Tempest\Router\Static\Exceptions\NoTextualBodyException;
25+
use Tempest\Support\Regex;
26+
use Tempest\Support\Str;
2227
use Tempest\View\Exceptions\ViewCompilationError;
2328
use Tempest\View\View;
2429
use Tempest\View\ViewRenderer;
2530
use Tempest\Vite\Exceptions\ManifestNotFoundException;
2631
use Throwable;
2732

33+
use function Tempest\Support\Language\pluralize;
2834
use function Tempest\Support\path;
2935
use function Tempest\uri;
3036

31-
final readonly class StaticGenerateCommand
37+
final class StaticGenerateCommand
3238
{
3339
use HasConsole;
3440

41+
private array $verifiedLinks = [];
42+
3543
public function __construct(
36-
private Console $console,
37-
private Kernel $kernel,
38-
private Container $container,
39-
private StaticPageConfig $staticPageConfig,
40-
private Router $router,
41-
private ViewRenderer $viewRenderer,
42-
private EventBus $eventBus,
44+
private readonly AppConfig $appConfig,
45+
private readonly Console $console,
46+
private readonly Kernel $kernel,
47+
private readonly Container $container,
48+
private readonly StaticPageConfig $staticPageConfig,
49+
private readonly Router $router,
50+
private readonly ViewRenderer $viewRenderer,
51+
private readonly EventBus $eventBus,
52+
private readonly HttpClient $httpClient,
4353
) {}
4454

4555
#[ConsoleCommand(name: 'static:generate', description: 'Compiles static pages')]
4656
public function __invoke(
4757
?string $filter = null,
58+
bool $allowDeadLinks = false,
59+
bool $allowExternalDeadLinks = true,
4860
#[ConsoleArgument(aliases: ['v'])]
4961
bool $verbose = false,
5062
): ExitCode {
5163
$publicPath = path($this->kernel->root, 'public');
5264

5365
$generated = 0;
5466
$failures = 0;
67+
$deadlinks = [];
5568

5669
$this->console->header('Generating static pages');
5770

@@ -64,6 +77,10 @@ public function __invoke(
6477
$failures++;
6578

6679
match (true) {
80+
$event->exception instanceof DeadLinksDetectedException => $this->keyValue(
81+
"<style='fg-gray'>{$event->path}</style>",
82+
sprintf("<style='fg-red'>%s DEAD %s</style>", count($event->exception->links), pluralize('LINK', count($event->exception->links))),
83+
),
6784
$event->exception instanceof InvalidStatusCodeException => $this->keyValue(
6885
"<style='fg-gray'>{$event->path}</style>",
6986
"<style='fg-red'>HTTP {$event->exception->status->value}</style>",
@@ -126,6 +143,11 @@ public function __invoke(
126143
mkdir($directory->toString(), recursive: true);
127144
}
128145

146+
if (! $allowDeadLinks && count($links = $this->detectDeadLinks($uri, $content, checkExternal: ! $allowExternalDeadLinks)) > 0) {
147+
$deadlinks[$uri] = $links;
148+
throw new DeadLinksDetectedException($uri, $links);
149+
}
150+
129151
file_put_contents($file->toString(), $content);
130152

131153
$this->eventBus->dispatch(new StaticPageGenerated($uri, $file->toString(), $content));
@@ -152,8 +174,119 @@ public function __invoke(
152174

153175
$this->keyValue('Static pages generated', "<style='fg-green'>{$generated}</style>");
154176

155-
return $failures > 0
177+
if ($deadlinks) {
178+
$this->console->header('Dead links');
179+
180+
foreach ($deadlinks as $uri => $links) {
181+
foreach ($links as $link) {
182+
$this->keyValue("<style='fg-gray'>{$uri}</style>", "<style='fg-red'>{$link}</style>");
183+
}
184+
}
185+
}
186+
187+
return $failures > 0 || count($deadlinks) > 0
156188
? ExitCode::ERROR
157189
: ExitCode::SUCCESS;
158190
}
191+
192+
private function detectDeadLinks(string $uri, string $html, bool $checkExternal = false): array
193+
{
194+
$deadlinks = [];
195+
$links = Regex\get_all_matches($html, '/<a\s+(?<ignore>ssg-ignore)?[^>]*href=["\'](?<url>[^"\']+)["\'][^>]*>/i', matches: ['url', 'ignore']);
196+
197+
foreach ($links as ['url' => $link, 'ignore' => $ignore]) {
198+
// Links can be ignored with the ssg-ignore attribute
199+
if ($ignore ?: false) {
200+
continue;
201+
}
202+
203+
// Check anchors (#)
204+
if (Str\starts_with($link, '#')) {
205+
if (! Regex\matches($html, "/id=\"" . preg_quote(Str\strip_start($link, '#'), '/') . "\"/")) {
206+
$deadlinks[] = $link;
207+
}
208+
209+
continue;
210+
}
211+
212+
// Resolve relative links (../ or ./)
213+
if (Str\starts_with($link, ['../', './'])) {
214+
$link = $this->resolveRelativeLink($uri, $link);
215+
}
216+
217+
// Don't ping the same link multiple times
218+
if (in_array($link, $this->verifiedLinks, strict: true)) {
219+
continue;
220+
}
221+
222+
$this->verifiedLinks[] = $link;
223+
224+
// Check internal links with router (/ or same base uri)
225+
if (Str\starts_with($link, '/') || Str\starts_with($this->getLinkWithoutProtocol($link), $this->getLinkWithoutProtocol($this->appConfig->baseUri))) {
226+
$response = $this->router->dispatch(new GenericRequest(
227+
method: Method::GET,
228+
uri: match (true) {
229+
Str\starts_with($link, '/') => $this->appConfig->baseUri . '/' . Str\strip_start($link, '/'),
230+
default => $link,
231+
},
232+
));
233+
234+
if ($response->status->isClientError() || $response->status->isServerError()) {
235+
$deadlinks[] = $link;
236+
}
237+
238+
continue;
239+
}
240+
241+
if (! $checkExternal) {
242+
continue;
243+
}
244+
245+
if (Str\starts_with($link, 'http')) {
246+
$response = $this->httpClient->get($link);
247+
248+
if ($response->status->isClientError() || $response->status->isServerError()) {
249+
$deadlinks[] = $link;
250+
}
251+
252+
continue;
253+
}
254+
255+
// If we reach this, there is an unknown kind of link.
256+
}
257+
258+
return $deadlinks;
259+
}
260+
261+
/**
262+
* Resolves paths starting with ./ or ../ to a canonical URI.
263+
*/
264+
private function resolveRelativeLink(string $basePath, string $relativePath): string
265+
{
266+
$basePath = Str\strip_end($basePath, '/');
267+
268+
if (Str\starts_with($relativePath, ['../', './'])) {
269+
$baseParts = explode('/', $basePath);
270+
$relativeParts = explode('/', $relativePath);
271+
272+
array_pop($baseParts);
273+
274+
foreach ($relativeParts as $part) {
275+
if ($part === '..') {
276+
array_pop($baseParts);
277+
} elseif ($part !== '.') {
278+
$baseParts[] = $part;
279+
}
280+
}
281+
282+
return implode('/', $baseParts);
283+
}
284+
285+
return $basePath . '/' . Str\strip_start($relativePath, './');
286+
}
287+
288+
private function getLinkWithoutProtocol(string $link): string
289+
{
290+
return Str\strip_start($link, ['https://', 'http://']);
291+
}
159292
}

0 commit comments

Comments
 (0)