1010use Tempest \Console \ExitCode ;
1111use Tempest \Console \HasConsole ;
1212use Tempest \Container \Container ;
13+ use Tempest \Core \AppConfig ;
1314use Tempest \Core \Kernel ;
1415use Tempest \EventBus \EventBus ;
1516use Tempest \Http \GenericRequest ;
1617use Tempest \Http \Method ;
1718use Tempest \Http \Status ;
19+ use Tempest \HttpClient \HttpClient ;
1820use Tempest \Router \DataProvider ;
1921use Tempest \Router \Router ;
22+ use Tempest \Router \Static \Exceptions \DeadLinksDetectedException ;
2023use Tempest \Router \Static \Exceptions \InvalidStatusCodeException ;
2124use Tempest \Router \Static \Exceptions \NoTextualBodyException ;
25+ use Tempest \Support \Regex ;
26+ use Tempest \Support \Str ;
2227use Tempest \View \Exceptions \ViewCompilationError ;
2328use Tempest \View \View ;
2429use Tempest \View \ViewRenderer ;
2530use Tempest \Vite \Exceptions \ManifestNotFoundException ;
2631use Throwable ;
2732
33+ use function Tempest \Support \Language \pluralize ;
2834use function Tempest \Support \path ;
2935use function Tempest \uri ;
3036
31- final readonly class StaticGenerateCommand
37+ final class StaticGenerateCommand
3238{
3339 use HasConsole;
3440
41+ private array $ verifiedLinks = [];
42+
3543 public function __construct (
36- private Console $ console ,
37- private Kernel $ kernel ,
38- private Container $ container ,
39- private StaticPageConfig $ staticPageConfig ,
40- private Router $ router ,
41- private ViewRenderer $ viewRenderer ,
42- private EventBus $ eventBus ,
44+ private readonly AppConfig $ appConfig ,
45+ private readonly Console $ console ,
46+ private readonly Kernel $ kernel ,
47+ private readonly Container $ container ,
48+ private readonly StaticPageConfig $ staticPageConfig ,
49+ private readonly Router $ router ,
50+ private readonly ViewRenderer $ viewRenderer ,
51+ private readonly EventBus $ eventBus ,
52+ private readonly HttpClient $ httpClient ,
4353 ) {}
4454
4555 #[ConsoleCommand(name: 'static:generate ' , description: 'Compiles static pages ' )]
4656 public function __invoke (
4757 ?string $ filter = null ,
58+ bool $ allowDeadLinks = false ,
59+ bool $ allowExternalDeadLinks = true ,
4860 #[ConsoleArgument(aliases: ['v ' ])]
4961 bool $ verbose = false ,
5062 ): ExitCode {
5163 $ publicPath = path ($ this ->kernel ->root , 'public ' );
5264
5365 $ generated = 0 ;
5466 $ failures = 0 ;
67+ $ deadlinks = [];
5568
5669 $ this ->console ->header ('Generating static pages ' );
5770
@@ -64,6 +77,10 @@ public function __invoke(
6477 $ failures ++;
6578
6679 match (true ) {
80+ $ event ->exception instanceof DeadLinksDetectedException => $ this ->keyValue (
81+ "<style='fg-gray'> {$ event ->path }</style> " ,
82+ sprintf ("<style='fg-red'>%s DEAD %s</style> " , count ($ event ->exception ->links ), pluralize ('LINK ' , count ($ event ->exception ->links ))),
83+ ),
6784 $ event ->exception instanceof InvalidStatusCodeException => $ this ->keyValue (
6885 "<style='fg-gray'> {$ event ->path }</style> " ,
6986 "<style='fg-red'>HTTP {$ event ->exception ->status ->value }</style> " ,
@@ -126,6 +143,11 @@ public function __invoke(
126143 mkdir ($ directory ->toString (), recursive: true );
127144 }
128145
146+ if (! $ allowDeadLinks && count ($ links = $ this ->detectDeadLinks ($ uri , $ content , checkExternal: ! $ allowExternalDeadLinks )) > 0 ) {
147+ $ deadlinks [$ uri ] = $ links ;
148+ throw new DeadLinksDetectedException ($ uri , $ links );
149+ }
150+
129151 file_put_contents ($ file ->toString (), $ content );
130152
131153 $ this ->eventBus ->dispatch (new StaticPageGenerated ($ uri , $ file ->toString (), $ content ));
@@ -152,8 +174,119 @@ public function __invoke(
152174
153175 $ this ->keyValue ('Static pages generated ' , "<style='fg-green'> {$ generated }</style> " );
154176
155- return $ failures > 0
177+ if ($ deadlinks ) {
178+ $ this ->console ->header ('Dead links ' );
179+
180+ foreach ($ deadlinks as $ uri => $ links ) {
181+ foreach ($ links as $ link ) {
182+ $ this ->keyValue ("<style='fg-gray'> {$ uri }</style> " , "<style='fg-red'> {$ link }</style> " );
183+ }
184+ }
185+ }
186+
187+ return $ failures > 0 || count ($ deadlinks ) > 0
156188 ? ExitCode::ERROR
157189 : ExitCode::SUCCESS ;
158190 }
191+
192+ private function detectDeadLinks (string $ uri , string $ html , bool $ checkExternal = false ): array
193+ {
194+ $ deadlinks = [];
195+ $ links = Regex \get_all_matches ($ html , '/<a\s+(?<ignore>ssg-ignore)?[^>]*href=[" \'](?<url>[^" \']+)[" \'][^>]*>/i ' , matches: ['url ' , 'ignore ' ]);
196+
197+ foreach ($ links as ['url ' => $ link , 'ignore ' => $ ignore ]) {
198+ // Links can be ignored with the ssg-ignore attribute
199+ if ($ ignore ?: false ) {
200+ continue ;
201+ }
202+
203+ // Check anchors (#)
204+ if (Str \starts_with ($ link , '# ' )) {
205+ if (! Regex \matches ($ html , "/id= \"" . preg_quote (Str \strip_start ($ link , '# ' ), '/ ' ) . "\"/ " )) {
206+ $ deadlinks [] = $ link ;
207+ }
208+
209+ continue ;
210+ }
211+
212+ // Resolve relative links (../ or ./)
213+ if (Str \starts_with ($ link , ['../ ' , './ ' ])) {
214+ $ link = $ this ->resolveRelativeLink ($ uri , $ link );
215+ }
216+
217+ // Don't ping the same link multiple times
218+ if (in_array ($ link , $ this ->verifiedLinks , strict: true )) {
219+ continue ;
220+ }
221+
222+ $ this ->verifiedLinks [] = $ link ;
223+
224+ // Check internal links with router (/ or same base uri)
225+ if (Str \starts_with ($ link , '/ ' ) || Str \starts_with ($ this ->getLinkWithoutProtocol ($ link ), $ this ->getLinkWithoutProtocol ($ this ->appConfig ->baseUri ))) {
226+ $ response = $ this ->router ->dispatch (new GenericRequest (
227+ method: Method::GET ,
228+ uri: match (true ) {
229+ Str \starts_with ($ link , '/ ' ) => $ this ->appConfig ->baseUri . '/ ' . Str \strip_start ($ link , '/ ' ),
230+ default => $ link ,
231+ },
232+ ));
233+
234+ if ($ response ->status ->isClientError () || $ response ->status ->isServerError ()) {
235+ $ deadlinks [] = $ link ;
236+ }
237+
238+ continue ;
239+ }
240+
241+ if (! $ checkExternal ) {
242+ continue ;
243+ }
244+
245+ if (Str \starts_with ($ link , 'http ' )) {
246+ $ response = $ this ->httpClient ->get ($ link );
247+
248+ if ($ response ->status ->isClientError () || $ response ->status ->isServerError ()) {
249+ $ deadlinks [] = $ link ;
250+ }
251+
252+ continue ;
253+ }
254+
255+ // If we reach this, there is an unknown kind of link.
256+ }
257+
258+ return $ deadlinks ;
259+ }
260+
261+ /**
262+ * Resolves paths starting with ./ or ../ to a canonical URI.
263+ */
264+ private function resolveRelativeLink (string $ basePath , string $ relativePath ): string
265+ {
266+ $ basePath = Str \strip_end ($ basePath , '/ ' );
267+
268+ if (Str \starts_with ($ relativePath , ['../ ' , './ ' ])) {
269+ $ baseParts = explode ('/ ' , $ basePath );
270+ $ relativeParts = explode ('/ ' , $ relativePath );
271+
272+ array_pop ($ baseParts );
273+
274+ foreach ($ relativeParts as $ part ) {
275+ if ($ part === '.. ' ) {
276+ array_pop ($ baseParts );
277+ } elseif ($ part !== '. ' ) {
278+ $ baseParts [] = $ part ;
279+ }
280+ }
281+
282+ return implode ('/ ' , $ baseParts );
283+ }
284+
285+ return $ basePath . '/ ' . Str \strip_start ($ relativePath , './ ' );
286+ }
287+
288+ private function getLinkWithoutProtocol (string $ link ): string
289+ {
290+ return Str \strip_start ($ link , ['https:// ' , 'http:// ' ]);
291+ }
159292}
0 commit comments