1010use Tempest \Console \ExitCode ;
1111use Tempest \Console \HasConsole ;
1212use Tempest \Container \Container ;
13+ use Tempest \Core \AppConfig ;
1314use Tempest \Core \Kernel ;
1415use Tempest \EventBus \EventBus ;
1516use Tempest \Http \GenericRequest ;
1617use Tempest \Http \Method ;
1718use Tempest \Http \Status ;
19+ use Tempest \HttpClient \HttpClient ;
1820use Tempest \Router \DataProvider ;
1921use Tempest \Router \Router ;
22+ use Tempest \Router \Static \Exceptions \DeadLinksDetectedException ;
2023use Tempest \Router \Static \Exceptions \InvalidStatusCodeException ;
2124use Tempest \Router \Static \Exceptions \NoTextualBodyException ;
25+ use Tempest \Support \Arr ;
26+ use Tempest \Support \Regex ;
27+ use Tempest \Support \Str ;
2228use Tempest \View \Exceptions \ViewCompilationError ;
2329use Tempest \View \View ;
2430use Tempest \View \ViewRenderer ;
2531use Tempest \Vite \Exceptions \ManifestNotFoundException ;
2632use Throwable ;
2733
34+ use function Tempest \Support \Language \pluralize ;
2835use function Tempest \Support \path ;
2936use function Tempest \uri ;
3037
31- final readonly class StaticGenerateCommand
38+ final class StaticGenerateCommand
3239{
3340 use HasConsole;
3441
42+ private array $ verifiedLinks = [];
43+
3544 public function __construct (
36- private Console $ console ,
37- private Kernel $ kernel ,
38- private Container $ container ,
39- private StaticPageConfig $ staticPageConfig ,
40- private Router $ router ,
41- private ViewRenderer $ viewRenderer ,
42- private EventBus $ eventBus ,
45+ private readonly AppConfig $ appConfig ,
46+ private readonly Console $ console ,
47+ private readonly Kernel $ kernel ,
48+ private readonly Container $ container ,
49+ private readonly StaticPageConfig $ staticPageConfig ,
50+ private readonly Router $ router ,
51+ private readonly ViewRenderer $ viewRenderer ,
52+ private readonly EventBus $ eventBus ,
53+ private readonly HttpClient $ httpClient ,
4354 ) {}
4455
4556 #[ConsoleCommand(name: 'static:generate ' , description: 'Compiles static pages ' )]
4657 public function __invoke (
4758 ?string $ filter = null ,
59+ bool $ allowDeadLinks = false ,
60+ bool $ allowExternalDeadLinks = true ,
4861 #[ConsoleArgument(aliases: ['v ' ])]
4962 bool $ verbose = false ,
5063 ): ExitCode {
5164 $ publicPath = path ($ this ->kernel ->root , 'public ' );
5265
5366 $ generated = 0 ;
5467 $ failures = 0 ;
68+ $ deadlinks = [];
5569
5670 $ this ->console ->header ('Generating static pages ' );
5771
@@ -64,6 +78,10 @@ public function __invoke(
6478 $ failures ++;
6579
6680 match (true ) {
81+ $ event ->exception instanceof DeadLinksDetectedException => $ this ->keyValue (
82+ "<style='fg-gray'> {$ event ->path }</style> " ,
83+ sprintf ("<style='fg-red'>%s DEAD %s</style> " , count ($ event ->exception ->links ), pluralize ('LINK ' , count ($ event ->exception ->links ))),
84+ ),
6785 $ event ->exception instanceof InvalidStatusCodeException => $ this ->keyValue (
6886 "<style='fg-gray'> {$ event ->path }</style> " ,
6987 "<style='fg-red'>HTTP {$ event ->exception ->status ->value }</style> " ,
@@ -126,6 +144,11 @@ public function __invoke(
126144 mkdir ($ directory ->toString (), recursive: true );
127145 }
128146
147+ if (! $ allowDeadLinks && count ($ links = $ this ->detectDeadLinks ($ uri , $ content , checkExternal: ! $ allowExternalDeadLinks )) > 0 ) {
148+ $ deadlinks [$ uri ] = $ links ;
149+ throw new DeadLinksDetectedException ($ uri , $ links );
150+ }
151+
129152 file_put_contents ($ file ->toString (), $ content );
130153
131154 $ this ->eventBus ->dispatch (new StaticPageGenerated ($ uri , $ file ->toString (), $ content ));
@@ -152,8 +175,127 @@ public function __invoke(
152175
153176 $ this ->keyValue ('Static pages generated ' , "<style='fg-green'> {$ generated }</style> " );
154177
155- return $ failures > 0
178+ if ($ deadlinks ) {
179+ $ this ->console ->header ('Dead links ' );
180+
181+ foreach ($ deadlinks as $ uri => $ links ) {
182+ foreach ($ links as $ link ) {
183+ $ this ->keyValue ("<style='fg-gray'> {$ uri }</style> " , "<style='fg-red'> {$ link }</style> " );
184+ }
185+ }
186+ }
187+
188+ return $ failures > 0 || count ($ deadlinks ) > 0
156189 ? ExitCode::ERROR
157190 : ExitCode::SUCCESS ;
158191 }
192+
193+ private function detectDeadLinks (string $ uri , string $ html , bool $ checkExternal = false ): array
194+ {
195+ $ deadlinks = [];
196+ $ links = Regex \get_all_matches ($ html , '/<a\s+(?<ignore>ssg-ignore)?[^>]*href=[" \'](?<url>[^" \']+)[" \'][^>]*>/i ' , matches: ['url ' , 'ignore ' ]);
197+
198+ foreach ($ links as ['url ' => $ link , 'ignore ' => $ ignore ]) {
199+ // Links can be ignored with the ssg-ignore attribute
200+ if ($ ignore ?: false ) {
201+ continue ;
202+ }
203+
204+ // Check anchors (#)
205+ if (Str \starts_with ($ link , '# ' )) {
206+ if (! Regex \matches ($ html , "/id= \"" . preg_quote (Str \strip_start ($ link , '# ' ), '/ ' ) . "\"/ " )) {
207+ $ deadlinks [] = $ link ;
208+ }
209+
210+ continue ;
211+ }
212+
213+ // Resolve relative links (../ or ./)
214+ if (Str \starts_with ($ link , ['../ ' , './ ' ])) {
215+ $ link = $ this ->resolveRelativeLink ($ uri , $ link );
216+ }
217+
218+ // Don't ping the same link multiple times
219+ if (in_array ($ link , $ this ->verifiedLinks , strict: true )) {
220+ continue ;
221+ }
222+
223+ $ this ->verifiedLinks [] = $ link ;
224+
225+ // Check internal links with router (/ or same base uri)
226+ if (Str \starts_with ($ link , '/ ' ) || Str \starts_with ($ this ->getLinkWithoutProtocol ($ link ), $ this ->getLinkWithoutProtocol ($ this ->appConfig ->baseUri ))) {
227+ do {
228+ $ target ??= match (true ) {
229+ Str \starts_with ($ link , '/ ' ) => $ this ->appConfig ->baseUri . '/ ' . Str \strip_start ($ link , '/ ' ),
230+ default => $ link ,
231+ };
232+
233+ $ response = $ this ->router ->dispatch (new GenericRequest (
234+ method: Method::GET ,
235+ uri: $ target ,
236+ ));
237+
238+ if ($ response ->status ->isRedirect ()) {
239+ $ target = Arr \first ($ response ->getHeader ('Location ' )->values );
240+ }
241+ } while ($ response ->status ->isRedirect ());
242+
243+ if ($ response ->status ->isClientError () || $ response ->status ->isServerError ()) {
244+ $ deadlinks [] = $ link ;
245+ }
246+
247+ continue ;
248+ }
249+
250+ if (! $ checkExternal ) {
251+ continue ;
252+ }
253+
254+ if (Str \starts_with ($ link , 'http ' )) {
255+ $ response = $ this ->httpClient ->get ($ link );
256+
257+ if ($ response ->status ->isClientError () || $ response ->status ->isServerError ()) {
258+ $ deadlinks [] = $ link ;
259+ }
260+
261+ continue ;
262+ }
263+
264+ // If we reach this, there is an unknown kind of link.
265+ }
266+
267+ return $ deadlinks ;
268+ }
269+
270+ /**
271+ * Resolves paths starting with ./ or ../ to a canonical URI.
272+ */
273+ private function resolveRelativeLink (string $ basePath , string $ relativePath ): string
274+ {
275+ $ basePath = Str \strip_end ($ basePath , '/ ' );
276+
277+ if (Str \starts_with ($ relativePath , ['../ ' , './ ' ])) {
278+ $ baseParts = explode ('/ ' , $ basePath );
279+ $ relativeParts = explode ('/ ' , $ relativePath );
280+
281+ array_pop ($ baseParts );
282+
283+ foreach ($ relativeParts as $ part ) {
284+ if ($ part === '.. ' ) {
285+ array_pop ($ baseParts );
286+ } elseif ($ part !== '. ' ) {
287+ $ baseParts [] = $ part ;
288+ }
289+ }
290+
291+ return implode ('/ ' , $ baseParts );
292+ }
293+
294+ return $ basePath . '/ ' . Str \strip_start ($ relativePath , './ ' );
295+ }
296+
297+ private function getLinkWithoutProtocol (string $ link ): string
298+ {
299+ return Str \strip_start ($ link , ['https:// ' , 'http:// ' ]);
300+ }
159301}
0 commit comments