@@ -7,6 +7,12 @@ const REDIRECT_CODES = [301, 302, 304, 307, 308]
7
7
// other non standard codes, like 999 from linkedin
8
8
const OTHER_CODES = [ 999 ]
9
9
10
+ // URLs that we accept 429s for
11
+ const ACCEPTED_RATE_LIMITED_URLS = [
12
+ 'https://github.com/nitrictech/nitric' ,
13
+ // Add more URLs here as needed
14
+ ]
15
+
10
16
const IGNORED_URLS = [
11
17
'googleads.g.doubleclick.net' ,
12
18
'youtube.com/api' ,
@@ -45,7 +51,18 @@ const isExternalUrl = (url: string) => {
45
51
return ! url . includes ( 'localhost' )
46
52
}
47
53
48
- const req = ( url : string , retryCount = 0 , followRedirect = false ) : any => {
54
+ const isAcceptedRateLimitedUrl = ( url : string ) => {
55
+ return ACCEPTED_RATE_LIMITED_URLS . some ( ( acceptedUrl ) =>
56
+ url . startsWith ( acceptedUrl ) ,
57
+ )
58
+ }
59
+
60
+ const req = (
61
+ url : string ,
62
+ retryCount = 0 ,
63
+ followRedirect = false ,
64
+ visitedLinks : Record < string , boolean > = { } ,
65
+ ) : any => {
49
66
return cy
50
67
. request ( {
51
68
url,
@@ -54,11 +71,34 @@ const req = (url: string, retryCount = 0, followRedirect = false): any => {
54
71
gzip : false ,
55
72
} )
56
73
. then ( ( resp ) => {
57
- // retry on timeout and too many requests
58
- if ( [ 408 , 429 ] . includes ( resp . status ) && retryCount < 3 ) {
59
- cy . log ( `request ${ url } timed out, retrying again...` )
60
- cy . wait ( 500 )
61
- return req ( url , retryCount + 1 )
74
+ // Handle rate limiting (429) with exponential backoff
75
+ if ( resp . status === 429 && retryCount < 5 ) {
76
+ const retryAfter = resp . headers [ 'retry-after' ]
77
+ ? parseInt (
78
+ Array . isArray ( resp . headers [ 'retry-after' ] )
79
+ ? resp . headers [ 'retry-after' ] [ 0 ]
80
+ : resp . headers [ 'retry-after' ] ,
81
+ )
82
+ : null
83
+ const waitTime = retryAfter
84
+ ? retryAfter * 1000
85
+ : Math . min ( 500 * Math . pow ( 2 , retryCount ) , 5000 )
86
+
87
+ cy . log (
88
+ `Rate limited for ${ url } , waiting ${ waitTime } ms before retry ${ retryCount + 1 } /5` ,
89
+ )
90
+ cy . wait ( waitTime )
91
+ return req ( url , retryCount + 1 , followRedirect , visitedLinks )
92
+ }
93
+
94
+ // Handle timeouts with exponential backoff
95
+ if ( resp . status === 408 && retryCount < 3 ) {
96
+ const waitTime = Math . min ( 200 * Math . pow ( 2 , retryCount ) , 2000 )
97
+ cy . log (
98
+ `Request timeout for ${ url } , waiting ${ waitTime } ms before retry ${ retryCount + 1 } /3` ,
99
+ )
100
+ cy . wait ( waitTime )
101
+ return req ( url , retryCount + 1 , followRedirect , visitedLinks )
62
102
}
63
103
64
104
return resp
@@ -67,6 +107,7 @@ const req = (url: string, retryCount = 0, followRedirect = false): any => {
67
107
68
108
describe ( 'Broken links test suite' , ( ) => {
69
109
const VISITED_SUCCESSFUL_LINKS = { }
110
+ const BATCH_SIZE = 10 // Process links in batches of 10
70
111
71
112
pages . forEach ( ( page ) => {
72
113
it ( `Should visit page ${ page } and check all links` , ( ) => {
@@ -84,61 +125,108 @@ describe('Broken links test suite', () => {
84
125
( l ) => href ?. includes ( l ) || src ?. includes ( l ) ,
85
126
)
86
127
} )
87
- . each ( ( link ) => {
88
- cy . log ( `link: ${ link [ 0 ] . textContent } ` )
89
- const baseUrl = link . prop ( 'href' ) || link . prop ( 'src' )
90
-
91
- const url = baseUrl . split ( '#' ) [ 0 ]
92
-
93
- if ( VISITED_SUCCESSFUL_LINKS [ url ] ) {
94
- cy . log ( `link already checked` )
95
- expect ( VISITED_SUCCESSFUL_LINKS [ url ] ) . to . be . true
96
- } else {
97
- // if the link is internal then check the link against the pages fixture (sitemap)
98
- if ( isInternalUrl ( url ) ) {
99
- // clean the url by removing the base url and query params
100
- const rootBaseUrlRegex = new RegExp ( `^${ rootBaseUrl } ` )
101
- let cleanUrl = url . replace ( rootBaseUrlRegex , '' )
102
- const queryIndex = cleanUrl . indexOf ( '?' )
103
- cleanUrl =
104
- queryIndex !== - 1 ? cleanUrl . slice ( 0 , queryIndex ) : cleanUrl
105
-
106
- cy . log ( `checking internal link: ${ cleanUrl } ` )
107
- if ( ! pages . includes ( cleanUrl ) ) {
108
- assert . fail ( `${ cleanUrl } is not part of the pages fixture` )
109
- } else {
110
- VISITED_SUCCESSFUL_LINKS [ url ] = true
111
- }
128
+ . then ( ( $links ) => {
129
+ const linkPromises = [ ]
130
+ const linksToCheck = [ ]
131
+
132
+ $links . each ( ( _i , link ) => {
133
+ const baseUrl =
134
+ link . getAttribute ( 'href' ) || link . getAttribute ( 'src' )
135
+ if ( ! baseUrl ) {
136
+ cy . log ( 'Skipping link with no href/src:' , link )
137
+ return
138
+ }
112
139
140
+ // Skip if the URL is just a hash fragment
141
+ if ( baseUrl . startsWith ( '#' ) ) {
142
+ cy . log ( 'Skipping hash fragment:' , baseUrl )
113
143
return
114
144
}
115
145
116
- cy . wait ( 25 )
117
-
118
- req ( url ) . then ( ( res : Cypress . Response < any > ) => {
119
- let acceptableCodes = CORRECT_CODES
120
- if ( REDIRECT_CODES . includes ( res . status ) && ! isExternalUrl ( url ) ) {
121
- assert . fail (
122
- `${ url } returned ${ res . status } to ${ res . headers [ 'location' ] } ` ,
123
- )
124
- } else {
125
- acceptableCodes = [
126
- ...CORRECT_CODES ,
127
- ...REDIRECT_CODES ,
128
- ...OTHER_CODES ,
129
- ]
146
+ const url = baseUrl . split ( '#' ) [ 0 ]
147
+ if ( ! url ) {
148
+ cy . log ( 'Skipping empty URL from:' , baseUrl )
149
+ return
150
+ }
151
+
152
+ if ( VISITED_SUCCESSFUL_LINKS [ url ] ) {
153
+ cy . log ( `Skipping already checked link: ${ url } ` )
154
+ return
155
+ }
156
+
157
+ linksToCheck . push ( url )
158
+ } )
159
+
160
+ // Process links in batches
161
+ for ( let i = 0 ; i < linksToCheck . length ; i += BATCH_SIZE ) {
162
+ const batch = linksToCheck . slice ( i , i + BATCH_SIZE )
163
+ const batchPromises = batch . map ( ( url ) => {
164
+ if ( ! url ) {
165
+ cy . log ( 'Skipping empty URL in batch' )
166
+ return Promise . resolve ( )
130
167
}
131
168
132
- if ( acceptableCodes . includes ( res . status ) ) {
169
+ if ( isInternalUrl ( url ) ) {
170
+ const rootBaseUrlRegex = new RegExp ( `^${ rootBaseUrl } ` )
171
+ let cleanUrl = url . replace ( rootBaseUrlRegex , '' )
172
+ const queryIndex = cleanUrl . indexOf ( '?' )
173
+ cleanUrl =
174
+ queryIndex !== - 1 ? cleanUrl . slice ( 0 , queryIndex ) : cleanUrl
175
+
176
+ if ( ! pages . includes ( cleanUrl ) ) {
177
+ assert . fail ( `${ cleanUrl } is not part of the pages fixture` )
178
+ }
133
179
VISITED_SUCCESSFUL_LINKS [ url ] = true
180
+ return Promise . resolve ( )
134
181
}
135
182
136
- expect ( res . status ) . oneOf (
137
- acceptableCodes ,
138
- `${ url } returned ${ res . status } ` ,
183
+ return req ( url , 0 , false , VISITED_SUCCESSFUL_LINKS ) . then (
184
+ ( res : Cypress . Response < any > ) => {
185
+ let acceptableCodes = CORRECT_CODES
186
+ if (
187
+ REDIRECT_CODES . includes ( res . status ) &&
188
+ ! isExternalUrl ( url )
189
+ ) {
190
+ assert . fail (
191
+ `${ url } returned ${ res . status } to ${ res . headers [ 'location' ] } ` ,
192
+ )
193
+ } else if ( res . status === 429 ) {
194
+ // After all retries, if we still get a 429, only mark as successful for accepted URLs
195
+ if ( isAcceptedRateLimitedUrl ( url ) ) {
196
+ cy . log (
197
+ `Rate limited for accepted URL ${ url } after all retries, marking as successful` ,
198
+ )
199
+ VISITED_SUCCESSFUL_LINKS [ url ] = true
200
+ return
201
+ } else {
202
+ assert . fail (
203
+ `${ url } returned 429 (Rate Limited) and is not in the accepted list` ,
204
+ )
205
+ }
206
+ } else {
207
+ acceptableCodes = [
208
+ ...CORRECT_CODES ,
209
+ ...REDIRECT_CODES ,
210
+ ...OTHER_CODES ,
211
+ ]
212
+ }
213
+
214
+ if ( acceptableCodes . includes ( res . status ) ) {
215
+ VISITED_SUCCESSFUL_LINKS [ url ] = true
216
+ }
217
+
218
+ expect ( res . status ) . oneOf (
219
+ acceptableCodes ,
220
+ `${ url } returned ${ res . status } ` ,
221
+ )
222
+ } ,
139
223
)
140
224
} )
225
+
226
+ linkPromises . push ( Promise . all ( batchPromises ) )
141
227
}
228
+
229
+ return Promise . all ( linkPromises )
142
230
} )
143
231
} )
144
232
} )
0 commit comments