@@ -580,3 +580,103 @@ export const isWprRelatedError = async(contents: string): Promise<boolean> => {
580580
581581 return false ;
582582}
583+
584+ /**
585+ * Collects all href attributes from elements matching a selector.
586+ *
587+ * @async
588+ * @param {Page } page - The Playwright page object
589+ * @param {string } selector - CSS selector to find elements with href
590+ * @return {Promise<Set<string>> } - Set of collected hrefs
591+ */
592+ export const collectHrefsFromSelector = async ( page : Page , selector : string ) : Promise < Set < string > > => {
593+ const hrefs = new Set < string > ( ) ;
594+ const links = await page . $$eval ( selector , ( elements ) =>
595+ elements
596+ . map ( ( el ) => el . getAttribute ( 'href' ) )
597+ . filter ( ( href ) : href is string => Boolean ( href ) )
598+ ) ;
599+ links . forEach ( ( href ) => hrefs . add ( href ) ) ;
600+ return hrefs ;
601+ } ;
602+
603+ /**
604+ * Normalizes and filters URLs, removing anchors, special protocols, and invalid URLs.
605+ *
606+ * @param {Set<string> } hrefs - Set of href strings to normalize
607+ * @param {string } baseUrl - Base URL for resolving relative URLs
608+ * @param {string[] } [skipProtocols=['mailto:', 'tel:', 'javascript:']] - Protocols to skip
609+ * @return {Set<string> } - Set of normalized, valid URLs
610+ */
611+ export const normalizeUrls = (
612+ hrefs : Set < string > ,
613+ baseUrl : string ,
614+ skipProtocols : string [ ] = [ 'mailto:' , 'tel:' , 'javascript:' ]
615+ ) : Set < string > => {
616+ const normalizedUrls = new Set < string > ( ) ;
617+
618+ for ( const href of hrefs ) {
619+ const lower = href . toLowerCase ( ) ;
620+
621+ // Skip anchors, special protocols, and admin-post actions
622+ if ( lower . startsWith ( '#' ) || skipProtocols . some ( ( p ) => lower . startsWith ( p ) ) ) {
623+ continue ;
624+ }
625+
626+ try {
627+ const url = new URL ( href , baseUrl ) ;
628+
629+ if ( ! [ 'http:' , 'https:' ] . includes ( url . protocol ) || url . pathname . endsWith ( '/wp-admin/admin-post.php' ) ) {
630+ continue ;
631+ }
632+
633+ url . hash = '' ;
634+ normalizedUrls . add ( url . toString ( ) ) ;
635+ } catch ( error ) {
636+ // Skip malformed URLs silently - they can't be validated anyway
637+ continue ;
638+ }
639+ }
640+
641+ return normalizedUrls ;
642+ } ;
643+
644+ /**
645+ * Validates HTTP/HTTPS URLs and collects broken links, handling client/server errors appropriately.
646+ * Fails on internal 4xx errors, allows external 401/403 (gated content), warns on 5xx and network errors.
647+ *
648+ * @async
649+ * @param {Page } page - The Playwright page object
650+ * @param {Set<string> } urls - Set of URLs to validate
651+ * @param {string } currentHost - Current host to distinguish internal from external URLs
652+ * @return {Promise<string[]> } - Array of broken link strings in format "STATUS: url"
653+ */
654+ export const validateLinks = async ( page : Page , urls : Set < string > , currentHost : string ) : Promise < string [ ] > => {
655+ const brokenLinks : string [ ] = [ ] ;
656+
657+ for ( const url of urls ) {
658+ try {
659+ const response = await page . request . get ( url , { maxRedirects : 5 , timeout : 30000 } ) ;
660+ const status = response . status ( ) ;
661+ const isExternal = new URL ( url ) . host !== currentHost ;
662+
663+ if ( status >= 400 && status < 500 ) {
664+ if ( ! ( isExternal && ( status === 401 || status === 403 ) ) ) {
665+ brokenLinks . push ( `${ status } : ${ url } ` ) ;
666+ }
667+ }
668+
669+ if ( status >= 500 ) {
670+ // eslint-disable-next-line no-console
671+ console . warn ( `Warning: ${ url } returned ${ status } (server error, not failing test)` ) ;
672+ }
673+ } catch ( error : unknown ) {
674+ const msg = error instanceof Error ? error . message : String ( error ) ;
675+ brokenLinks . push ( `NETWORK: ${ url } (${ msg } )` ) ;
676+ // eslint-disable-next-line no-console
677+ console . warn ( `Network error for ${ url } : ${ msg } ` ) ;
678+ }
679+ }
680+
681+ return brokenLinks ;
682+ } ;
0 commit comments