@@ -7,6 +7,12 @@ const REDIRECT_CODES = [301, 302, 304, 307, 308]
7
7
// other non standard codes, like 999 from linkedin
8
8
const OTHER_CODES = [ 999 ]
9
9
10
+ // URLs that we accept 429s for
11
+ const ACCEPTED_RATE_LIMITED_URLS = [
12
+ 'https://github.com/nitrictech/nitric' ,
13
+ // Add more URLs here as needed
14
+ ]
15
+
10
16
const IGNORED_URLS = [
11
17
'googleads.g.doubleclick.net' ,
12
18
'youtube.com/api' ,
@@ -36,16 +42,47 @@ const IGNORED_URLS = [
36
42
const rootBaseUrl = Cypress . config ( 'baseUrl' )
37
43
38
44
const isInternalUrl = ( url : string ) => {
39
- // check against the base url
40
- // and check if the url does not contain a file extension
41
- return url . startsWith ( rootBaseUrl ) && ! url . includes ( '.' )
45
+ return (
46
+ url . startsWith ( rootBaseUrl ) || url . startsWith ( './' ) || url . startsWith ( '../' )
47
+ )
48
+ }
49
+
50
+ const getCleanInternalUrl = ( url : string , currentPage : string ) => {
51
+ if ( url . startsWith ( rootBaseUrl ) ) {
52
+ return url . replace ( rootBaseUrl , '' )
53
+ }
54
+
55
+ // Handle relative paths
56
+ if ( url . startsWith ( './' ) || url . startsWith ( '../' ) ) {
57
+ // Get the directory of the current page
58
+ const currentDir = currentPage . substring (
59
+ 0 ,
60
+ currentPage . lastIndexOf ( '/' ) + 1 ,
61
+ )
62
+ // Resolve the relative path
63
+ const fullPath = new URL ( url , `${ rootBaseUrl } ${ currentDir } ` ) . pathname
64
+ return fullPath . replace ( rootBaseUrl , '' )
65
+ }
66
+
67
+ return url
42
68
}
43
69
44
70
const isExternalUrl = ( url : string ) => {
45
71
return ! url . includes ( 'localhost' )
46
72
}
47
73
48
- const req = ( url : string , retryCount = 0 , followRedirect = false ) : any => {
74
+ const isAcceptedRateLimitedUrl = ( url : string ) => {
75
+ return ACCEPTED_RATE_LIMITED_URLS . some ( ( acceptedUrl ) =>
76
+ url . startsWith ( acceptedUrl ) ,
77
+ )
78
+ }
79
+
80
+ const req = (
81
+ url : string ,
82
+ retryCount = 0 ,
83
+ followRedirect = false ,
84
+ visitedLinks : Record < string , boolean > = { } ,
85
+ ) : any => {
49
86
return cy
50
87
. request ( {
51
88
url,
@@ -54,11 +91,34 @@ const req = (url: string, retryCount = 0, followRedirect = false): any => {
54
91
gzip : false ,
55
92
} )
56
93
. then ( ( resp ) => {
57
- // retry on timeout and too many requests
58
- if ( [ 408 , 429 ] . includes ( resp . status ) && retryCount < 3 ) {
59
- cy . log ( `request ${ url } timed out, retrying again...` )
60
- cy . wait ( 500 )
61
- return req ( url , retryCount + 1 )
94
+ // Handle rate limiting (429) with exponential backoff
95
+ if ( resp . status === 429 && retryCount < 3 ) {
96
+ const retryAfter = resp . headers [ 'retry-after' ]
97
+ ? parseInt (
98
+ Array . isArray ( resp . headers [ 'retry-after' ] )
99
+ ? resp . headers [ 'retry-after' ] [ 0 ]
100
+ : resp . headers [ 'retry-after' ] ,
101
+ )
102
+ : null
103
+ const waitTime = retryAfter
104
+ ? retryAfter * 1000
105
+ : Math . min ( 500 * Math . pow ( 2 , retryCount ) , 5000 )
106
+
107
+ cy . log (
108
+ `Rate limited for ${ url } , waiting ${ waitTime } ms before retry ${ retryCount + 1 } /3` ,
109
+ )
110
+ cy . wait ( waitTime )
111
+ return req ( url , retryCount + 1 , followRedirect , visitedLinks )
112
+ }
113
+
114
+ // Handle timeouts with exponential backoff
115
+ if ( resp . status === 408 && retryCount < 3 ) {
116
+ const waitTime = Math . min ( 200 * Math . pow ( 2 , retryCount ) , 2000 )
117
+ cy . log (
118
+ `Request timeout for ${ url } , waiting ${ waitTime } ms before retry ${ retryCount + 1 } /3` ,
119
+ )
120
+ cy . wait ( waitTime )
121
+ return req ( url , retryCount + 1 , followRedirect , visitedLinks )
62
122
}
63
123
64
124
return resp
@@ -67,6 +127,7 @@ const req = (url: string, retryCount = 0, followRedirect = false): any => {
67
127
68
128
describe ( 'Broken links test suite' , ( ) => {
69
129
const VISITED_SUCCESSFUL_LINKS = { }
130
+ const BATCH_SIZE = 10 // Process links in batches of 10
70
131
71
132
pages . forEach ( ( page ) => {
72
133
it ( `Should visit page ${ page } and check all links` , ( ) => {
@@ -84,61 +145,103 @@ describe('Broken links test suite', () => {
84
145
( l ) => href ?. includes ( l ) || src ?. includes ( l ) ,
85
146
)
86
147
} )
87
- . each ( ( link ) => {
88
- cy . log ( `link: ${ link [ 0 ] . textContent } ` )
89
- const baseUrl = link . prop ( 'href' ) || link . prop ( 'src' )
90
-
91
- const url = baseUrl . split ( '#' ) [ 0 ]
92
-
93
- if ( VISITED_SUCCESSFUL_LINKS [ url ] ) {
94
- cy . log ( `link already checked` )
95
- expect ( VISITED_SUCCESSFUL_LINKS [ url ] ) . to . be . true
96
- } else {
97
- // if the link is internal then check the link against the pages fixture (sitemap)
98
- if ( isInternalUrl ( url ) ) {
99
- // clean the url by removing the base url and query params
100
- const rootBaseUrlRegex = new RegExp ( `^${ rootBaseUrl } ` )
101
- let cleanUrl = url . replace ( rootBaseUrlRegex , '' )
102
- const queryIndex = cleanUrl . indexOf ( '?' )
103
- cleanUrl =
104
- queryIndex !== - 1 ? cleanUrl . slice ( 0 , queryIndex ) : cleanUrl
105
-
106
- cy . log ( `checking internal link: ${ cleanUrl } ` )
107
- if ( ! pages . includes ( cleanUrl ) ) {
108
- assert . fail ( `${ cleanUrl } is not part of the pages fixture` )
109
- } else {
110
- VISITED_SUCCESSFUL_LINKS [ url ] = true
111
- }
148
+ . then ( ( $links ) => {
149
+ const linkPromises = [ ]
150
+ const linksToCheck = [ ]
112
151
152
+ $links . each ( ( _i , link ) => {
153
+ const baseUrl =
154
+ link . getAttribute ( 'href' ) || link . getAttribute ( 'src' )
155
+ if ( ! baseUrl ) {
156
+ cy . log ( 'Skipping link with no href/src:' , link )
113
157
return
114
158
}
115
159
116
- cy . wait ( 25 )
117
-
118
- req ( url ) . then ( ( res : Cypress . Response < any > ) => {
119
- let acceptableCodes = CORRECT_CODES
120
- if ( REDIRECT_CODES . includes ( res . status ) && ! isExternalUrl ( url ) ) {
121
- assert . fail (
122
- `${ url } returned ${ res . status } to ${ res . headers [ 'location' ] } ` ,
123
- )
124
- } else {
125
- acceptableCodes = [
126
- ...CORRECT_CODES ,
127
- ...REDIRECT_CODES ,
128
- ...OTHER_CODES ,
129
- ]
160
+ // Skip if the URL is just a hash fragment
161
+ if ( baseUrl . startsWith ( '#' ) ) {
162
+ cy . log ( 'Skipping hash fragment:' , baseUrl )
163
+ return
164
+ }
165
+
166
+ const url = baseUrl . split ( '#' ) [ 0 ]
167
+ if ( ! url ) {
168
+ cy . log ( 'Skipping empty URL from:' , baseUrl )
169
+ return
170
+ }
171
+
172
+ if ( VISITED_SUCCESSFUL_LINKS [ url ] ) {
173
+ cy . log ( `Skipping already checked link: ${ url } ` )
174
+ return
175
+ }
176
+
177
+ linksToCheck . push ( url )
178
+ } )
179
+
180
+ // Process links in batches
181
+ for ( let i = 0 ; i < linksToCheck . length ; i += BATCH_SIZE ) {
182
+ const batch = linksToCheck . slice ( i , i + BATCH_SIZE )
183
+ const batchPromises = batch . map ( ( url ) => {
184
+ if ( ! url ) {
185
+ cy . log ( 'Skipping empty URL in batch' )
186
+ return Promise . resolve ( )
130
187
}
131
188
132
- if ( acceptableCodes . includes ( res . status ) ) {
189
+ if ( isInternalUrl ( url ) ) {
190
+ const cleanUrl = getCleanInternalUrl ( url , page )
191
+ if ( ! pages . includes ( cleanUrl ) ) {
192
+ assert . fail ( `${ cleanUrl } is not part of the pages fixture` )
193
+ }
133
194
VISITED_SUCCESSFUL_LINKS [ url ] = true
195
+ return Promise . resolve ( )
134
196
}
135
197
136
- expect ( res . status ) . oneOf (
137
- acceptableCodes ,
138
- `${ url } returned ${ res . status } ` ,
198
+ return req ( url , 0 , false , VISITED_SUCCESSFUL_LINKS ) . then (
199
+ ( res : Cypress . Response < any > ) => {
200
+ let acceptableCodes = CORRECT_CODES
201
+ if (
202
+ REDIRECT_CODES . includes ( res . status ) &&
203
+ ! isExternalUrl ( url )
204
+ ) {
205
+ assert . fail (
206
+ `${ url } returned ${ res . status } to ${ res . headers [ 'location' ] } ` ,
207
+ )
208
+ } else if ( res . status === 429 ) {
209
+ // After all retries, if we still get a 429, only mark as successful for accepted URLs
210
+ if ( isAcceptedRateLimitedUrl ( url ) ) {
211
+ cy . log (
212
+ `Rate limited for accepted URL ${ url } after all retries, marking as successful` ,
213
+ )
214
+ VISITED_SUCCESSFUL_LINKS [ url ] = true
215
+ return
216
+ } else {
217
+ assert . fail (
218
+ `${ url } returned 429 (Rate Limited) and is not in the accepted list` ,
219
+ )
220
+ }
221
+ } else {
222
+ acceptableCodes = [
223
+ ...CORRECT_CODES ,
224
+ ...REDIRECT_CODES ,
225
+ ...OTHER_CODES ,
226
+ ]
227
+ }
228
+
229
+ if ( acceptableCodes . includes ( res . status ) ) {
230
+ VISITED_SUCCESSFUL_LINKS [ url ] = true
231
+ }
232
+
233
+ expect ( res . status ) . oneOf (
234
+ acceptableCodes ,
235
+ `${ url } returned ${ res . status } ` ,
236
+ )
237
+ } ,
139
238
)
140
239
} )
240
+
241
+ linkPromises . push ( Promise . all ( batchPromises ) )
141
242
}
243
+
244
+ return Promise . all ( linkPromises )
142
245
} )
143
246
} )
144
247
} )
0 commit comments