@@ -7,6 +7,12 @@ const REDIRECT_CODES = [301, 302, 304, 307, 308]
77// other non standard codes, like 999 from linkedin
88const OTHER_CODES = [ 999 ]
99
10+ // URLs that we accept 429s for
11+ const ACCEPTED_RATE_LIMITED_URLS = [
12+ 'https://github.com/nitrictech/nitric' ,
13+ // Add more URLs here as needed
14+ ]
15+
1016const IGNORED_URLS = [
1117 'googleads.g.doubleclick.net' ,
1218 'youtube.com/api' ,
@@ -36,16 +42,47 @@ const IGNORED_URLS = [
3642const rootBaseUrl = Cypress . config ( 'baseUrl' )
3743
3844const isInternalUrl = ( url : string ) => {
39- // check against the base url
40- // and check if the url does not contain a file extension
41- return url . startsWith ( rootBaseUrl ) && ! url . includes ( '.' )
45+ return (
46+ url . startsWith ( rootBaseUrl ) || url . startsWith ( './' ) || url . startsWith ( '../' )
47+ )
48+ }
49+
50+ const getCleanInternalUrl = ( url : string , currentPage : string ) => {
51+ if ( url . startsWith ( rootBaseUrl ) ) {
52+ return url . replace ( rootBaseUrl , '' )
53+ }
54+
55+ // Handle relative paths
56+ if ( url . startsWith ( './' ) || url . startsWith ( '../' ) ) {
57+ // Get the directory of the current page
58+ const currentDir = currentPage . substring (
59+ 0 ,
60+ currentPage . lastIndexOf ( '/' ) + 1 ,
61+ )
62+ // Resolve the relative path
63+ const fullPath = new URL ( url , `${ rootBaseUrl } ${ currentDir } ` ) . pathname
64+ return fullPath . replace ( rootBaseUrl , '' )
65+ }
66+
67+ return url
4268}
4369
4470const isExternalUrl = ( url : string ) => {
4571 return ! url . includes ( 'localhost' )
4672}
4773
48- const req = ( url : string , retryCount = 0 , followRedirect = false ) : any => {
74+ const isAcceptedRateLimitedUrl = ( url : string ) => {
75+ return ACCEPTED_RATE_LIMITED_URLS . some ( ( acceptedUrl ) =>
76+ url . startsWith ( acceptedUrl ) ,
77+ )
78+ }
79+
80+ const req = (
81+ url : string ,
82+ retryCount = 0 ,
83+ followRedirect = false ,
84+ visitedLinks : Record < string , boolean > = { } ,
85+ ) : any => {
4986 return cy
5087 . request ( {
5188 url,
@@ -54,11 +91,34 @@ const req = (url: string, retryCount = 0, followRedirect = false): any => {
5491 gzip : false ,
5592 } )
5693 . then ( ( resp ) => {
57- // retry on timeout and too many requests
58- if ( [ 408 , 429 ] . includes ( resp . status ) && retryCount < 3 ) {
59- cy . log ( `request ${ url } timed out, retrying again...` )
60- cy . wait ( 500 )
61- return req ( url , retryCount + 1 )
94+ // Handle rate limiting (429) with exponential backoff
95+ if ( resp . status === 429 && retryCount < 3 ) {
96+ const retryAfter = resp . headers [ 'retry-after' ]
97+ ? parseInt (
98+ Array . isArray ( resp . headers [ 'retry-after' ] )
99+ ? resp . headers [ 'retry-after' ] [ 0 ]
100+ : resp . headers [ 'retry-after' ] ,
101+ )
102+ : null
103+ const waitTime = retryAfter
104+ ? retryAfter * 1000
105+ : Math . min ( 500 * Math . pow ( 2 , retryCount ) , 5000 )
106+
107+ cy . log (
108+ `Rate limited for ${ url } , waiting ${ waitTime } ms before retry ${ retryCount + 1 } /3` ,
109+ )
110+ cy . wait ( waitTime )
111+ return req ( url , retryCount + 1 , followRedirect , visitedLinks )
112+ }
113+
114+ // Handle timeouts with exponential backoff
115+ if ( resp . status === 408 && retryCount < 3 ) {
116+ const waitTime = Math . min ( 200 * Math . pow ( 2 , retryCount ) , 2000 )
117+ cy . log (
118+ `Request timeout for ${ url } , waiting ${ waitTime } ms before retry ${ retryCount + 1 } /3` ,
119+ )
120+ cy . wait ( waitTime )
121+ return req ( url , retryCount + 1 , followRedirect , visitedLinks )
62122 }
63123
64124 return resp
@@ -67,6 +127,7 @@ const req = (url: string, retryCount = 0, followRedirect = false): any => {
67127
68128describe ( 'Broken links test suite' , ( ) => {
69129 const VISITED_SUCCESSFUL_LINKS = { }
130+ const BATCH_SIZE = 10 // Process links in batches of 10
70131
71132 pages . forEach ( ( page ) => {
72133 it ( `Should visit page ${ page } and check all links` , ( ) => {
@@ -84,61 +145,103 @@ describe('Broken links test suite', () => {
84145 ( l ) => href ?. includes ( l ) || src ?. includes ( l ) ,
85146 )
86147 } )
87- . each ( ( link ) => {
88- cy . log ( `link: ${ link [ 0 ] . textContent } ` )
89- const baseUrl = link . prop ( 'href' ) || link . prop ( 'src' )
90-
91- const url = baseUrl . split ( '#' ) [ 0 ]
92-
93- if ( VISITED_SUCCESSFUL_LINKS [ url ] ) {
94- cy . log ( `link already checked` )
95- expect ( VISITED_SUCCESSFUL_LINKS [ url ] ) . to . be . true
96- } else {
97- // if the link is internal then check the link against the pages fixture (sitemap)
98- if ( isInternalUrl ( url ) ) {
99- // clean the url by removing the base url and query params
100- const rootBaseUrlRegex = new RegExp ( `^${ rootBaseUrl } ` )
101- let cleanUrl = url . replace ( rootBaseUrlRegex , '' )
102- const queryIndex = cleanUrl . indexOf ( '?' )
103- cleanUrl =
104- queryIndex !== - 1 ? cleanUrl . slice ( 0 , queryIndex ) : cleanUrl
105-
106- cy . log ( `checking internal link: ${ cleanUrl } ` )
107- if ( ! pages . includes ( cleanUrl ) ) {
108- assert . fail ( `${ cleanUrl } is not part of the pages fixture` )
109- } else {
110- VISITED_SUCCESSFUL_LINKS [ url ] = true
111- }
148+ . then ( ( $links ) => {
149+ const linkPromises = [ ]
150+ const linksToCheck = [ ]
112151
152+ $links . each ( ( _i , link ) => {
153+ const baseUrl =
154+ link . getAttribute ( 'href' ) || link . getAttribute ( 'src' )
155+ if ( ! baseUrl ) {
156+ cy . log ( 'Skipping link with no href/src:' , link )
113157 return
114158 }
115159
116- cy . wait ( 25 )
117-
118- req ( url ) . then ( ( res : Cypress . Response < any > ) => {
119- let acceptableCodes = CORRECT_CODES
120- if ( REDIRECT_CODES . includes ( res . status ) && ! isExternalUrl ( url ) ) {
121- assert . fail (
122- `${ url } returned ${ res . status } to ${ res . headers [ 'location' ] } ` ,
123- )
124- } else {
125- acceptableCodes = [
126- ...CORRECT_CODES ,
127- ...REDIRECT_CODES ,
128- ...OTHER_CODES ,
129- ]
160+ // Skip if the URL is just a hash fragment
161+ if ( baseUrl . startsWith ( '#' ) ) {
162+ cy . log ( 'Skipping hash fragment:' , baseUrl )
163+ return
164+ }
165+
166+ const url = baseUrl . split ( '#' ) [ 0 ]
167+ if ( ! url ) {
168+ cy . log ( 'Skipping empty URL from:' , baseUrl )
169+ return
170+ }
171+
172+ if ( VISITED_SUCCESSFUL_LINKS [ url ] ) {
173+ cy . log ( `Skipping already checked link: ${ url } ` )
174+ return
175+ }
176+
177+ linksToCheck . push ( url )
178+ } )
179+
180+ // Process links in batches
181+ for ( let i = 0 ; i < linksToCheck . length ; i += BATCH_SIZE ) {
182+ const batch = linksToCheck . slice ( i , i + BATCH_SIZE )
183+ const batchPromises = batch . map ( ( url ) => {
184+ if ( ! url ) {
185+ cy . log ( 'Skipping empty URL in batch' )
186+ return Promise . resolve ( )
130187 }
131188
132- if ( acceptableCodes . includes ( res . status ) ) {
189+ if ( isInternalUrl ( url ) ) {
190+ const cleanUrl = getCleanInternalUrl ( url , page )
191+ if ( ! pages . includes ( cleanUrl ) ) {
192+ assert . fail ( `${ cleanUrl } is not part of the pages fixture` )
193+ }
133194 VISITED_SUCCESSFUL_LINKS [ url ] = true
195+ return Promise . resolve ( )
134196 }
135197
136- expect ( res . status ) . oneOf (
137- acceptableCodes ,
138- `${ url } returned ${ res . status } ` ,
198+ return req ( url , 0 , false , VISITED_SUCCESSFUL_LINKS ) . then (
199+ ( res : Cypress . Response < any > ) => {
200+ let acceptableCodes = CORRECT_CODES
201+ if (
202+ REDIRECT_CODES . includes ( res . status ) &&
203+ ! isExternalUrl ( url )
204+ ) {
205+ assert . fail (
206+ `${ url } returned ${ res . status } to ${ res . headers [ 'location' ] } ` ,
207+ )
208+ } else if ( res . status === 429 ) {
209+ // After all retries, if we still get a 429, only mark as successful for accepted URLs
210+ if ( isAcceptedRateLimitedUrl ( url ) ) {
211+ cy . log (
212+ `Rate limited for accepted URL ${ url } after all retries, marking as successful` ,
213+ )
214+ VISITED_SUCCESSFUL_LINKS [ url ] = true
215+ return
216+ } else {
217+ assert . fail (
218+ `${ url } returned 429 (Rate Limited) and is not in the accepted list` ,
219+ )
220+ }
221+ } else {
222+ acceptableCodes = [
223+ ...CORRECT_CODES ,
224+ ...REDIRECT_CODES ,
225+ ...OTHER_CODES ,
226+ ]
227+ }
228+
229+ if ( acceptableCodes . includes ( res . status ) ) {
230+ VISITED_SUCCESSFUL_LINKS [ url ] = true
231+ }
232+
233+ expect ( res . status ) . oneOf (
234+ acceptableCodes ,
235+ `${ url } returned ${ res . status } ` ,
236+ )
237+ } ,
139238 )
140239 } )
240+
241+ linkPromises . push ( Promise . all ( batchPromises ) )
141242 }
243+
244+ return Promise . all ( linkPromises )
142245 } )
143246 } )
144247 } )
0 commit comments