@@ -149,41 +149,75 @@ async function checkLinks() {
149
149
}
150
150
151
151
const getPageData = async ( ) => {
152
- try {
153
- const response = await nFetch ( externalPageLink , {
154
- headers : {
155
- // Spoof a normal looking User-Agent to keep the servers happy
156
- // See https://github.com/JustinBeckwith/linkinator/blob/main/src/index.ts
157
- //
158
- // To better future-proof against the ua string being
159
- // responsible for any breakage, pull common, up-to-date strings
160
- // from a reliable source.
161
- // https://github.com/w3c/aria-practices/issues/3270
162
- 'User-Agent' :
163
- 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/605.1.1' ,
164
- } ,
165
- } ) ;
166
- const text = await response . text ( ) ;
167
- const html = HTMLParser . parse ( text ) ;
168
- const ids = html
169
- . querySelectorAll ( '[id]' )
170
- . map ( ( idElement ) => idElement . getAttribute ( 'id' ) ) ;
171
-
172
- // Handle GitHub README links.
173
- // These links are stored within a react-partial element
174
- const reactPartial = getReactPartial ( hrefOrSrc , html ) ;
175
- return {
176
- ok : response . ok ,
177
- status : response . status ,
178
- ids,
179
- reactPartial,
180
- } ;
181
- } catch ( error ) {
182
- return {
183
- errorMessage :
184
- `Found broken external link on ${ htmlPath } :${ lineNumber } :${ columnNumber } \n` +
185
- ` ${ error . stack } ` ,
186
- } ;
152
+ const domain = new URL ( externalPageLink ) . hostname ;
153
+ let retryCount = 0 ;
154
+ const maxRetries = 3 ;
155
+ const baseDelay = 15 ;
156
+
157
+ while ( retryCount < maxRetries ) {
158
+ try {
159
+ const response = await nFetch ( externalPageLink , {
160
+ headers : {
161
+ // Spoof a normal looking User-Agent to keep the servers happy
162
+ // See https://github.com/JustinBeckwith/linkinator/blob/main/src/index.ts
163
+ //
164
+ // To better future-proof against the ua string being
165
+ // responsible for any breakage, pull common, up-to-date strings
166
+ // from a reliable source.
167
+ // https://github.com/w3c/aria-practices/issues/3270
168
+ 'User-Agent' :
169
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/605.1.1' ,
170
+ } ,
171
+ } ) ;
172
+
173
+ // Handle rate limit-related statuses
174
+ if (
175
+ response . status === 403 ||
176
+ response . status === 429 ||
177
+ response . status === 503 ||
178
+ response . status === 508
179
+ ) {
180
+ throw new Error (
181
+ response . status === 429
182
+ ? `Rate limited by ${ domain } `
183
+ : `Unsuccessful response from ${ domain } (${ response . status } )`
184
+ ) ;
185
+ }
186
+
187
+ const text = await response . text ( ) ;
188
+ const html = HTMLParser . parse ( text ) ;
189
+ const ids = html
190
+ . querySelectorAll ( '[id]' )
191
+ . map ( ( idElement ) => idElement . getAttribute ( 'id' ) ) ;
192
+
193
+ // Handle GitHub README links.
194
+ // These links are stored within a react-partial element
195
+ const reactPartial = getReactPartial ( hrefOrSrc , html ) ;
196
+ return {
197
+ ok : response . ok ,
198
+ status : response . status ,
199
+ ids,
200
+ reactPartial,
201
+ } ;
202
+ } catch ( error ) {
203
+ if ( retryCount < maxRetries ) {
204
+ // Found the retry-after unit returned from response headers too
205
+ // variable to use here, but ~15 seconds seems like a safe
206
+ // initial default
207
+ const delay = baseDelay * 1000 * Math . pow ( 2 , retryCount ) ;
208
+ console . info (
209
+ `Error fetching ${ externalPageLink } : ${ error . message } , retrying in ${ delay } ms`
210
+ ) ;
211
+ await new Promise ( ( resolve ) => setTimeout ( resolve , delay ) ) ;
212
+ retryCount ++ ;
213
+ continue ;
214
+ }
215
+ return {
216
+ errorMessage :
217
+ `Found broken external link on ${ htmlPath } :${ lineNumber } :${ columnNumber } \n` +
218
+ ` ${ error . stack } ` ,
219
+ } ;
220
+ }
187
221
}
188
222
} ;
189
223
@@ -213,17 +247,6 @@ async function checkLinks() {
213
247
Object . entries ( externalPageLoaders ) . map (
214
248
async ( [ externalPageLink , getPageData ] ) => {
215
249
let pageData = await getPageData ( ) ;
216
- if ( pageData . errorMessage ) {
217
- console . info ( 'Retrying once' ) ;
218
- pageData = await getPageData ( ) ;
219
- }
220
- if ( pageData . errorMessage ) {
221
- await new Promise ( ( resolve ) => {
222
- setTimeout ( resolve , 2000 ) ;
223
- } ) ;
224
- console . info ( 'Retrying twice' ) ;
225
- pageData = await getPageData ( ) ;
226
- }
227
250
externalPageData [ externalPageLink ] = pageData ;
228
251
loadedCount += 1 ;
229
252
}
0 commit comments