Skip to content

Commit 00d33b1

Browse files
authored
Infrastructure: Support rate-limit retries in link-checker (#3273)
Handle rate-limiting related response codes of 403, 429, 503 and 508 with link-checker using an exponential backoff. The initial delay is set at 15s.
1 parent 7b7d576 commit 00d33b1

File tree

1 file changed

+69
-46
lines changed

1 file changed

+69
-46
lines changed

scripts/link-checker.js

Lines changed: 69 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -149,41 +149,75 @@ async function checkLinks() {
149149
}
150150

151151
const getPageData = async () => {
152-
try {
153-
const response = await nFetch(externalPageLink, {
154-
headers: {
155-
// Spoof a normal looking User-Agent to keep the servers happy
156-
// See https://github.com/JustinBeckwith/linkinator/blob/main/src/index.ts
157-
//
158-
// To better future-proof against the ua string being
159-
// responsible for any breakage, pull common, up-to-date strings
160-
// from a reliable source.
161-
// https://github.com/w3c/aria-practices/issues/3270
162-
'User-Agent':
163-
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/605.1.1',
164-
},
165-
});
166-
const text = await response.text();
167-
const html = HTMLParser.parse(text);
168-
const ids = html
169-
.querySelectorAll('[id]')
170-
.map((idElement) => idElement.getAttribute('id'));
171-
172-
// Handle GitHub README links.
173-
// These links are stored within a react-partial element
174-
const reactPartial = getReactPartial(hrefOrSrc, html);
175-
return {
176-
ok: response.ok,
177-
status: response.status,
178-
ids,
179-
reactPartial,
180-
};
181-
} catch (error) {
182-
return {
183-
errorMessage:
184-
`Found broken external link on ${htmlPath}:${lineNumber}:${columnNumber}\n` +
185-
` ${error.stack}`,
186-
};
152+
const domain = new URL(externalPageLink).hostname;
153+
let retryCount = 0;
154+
const maxRetries = 3;
155+
const baseDelay = 15;
156+
157+
while (retryCount < maxRetries) {
158+
try {
159+
const response = await nFetch(externalPageLink, {
160+
headers: {
161+
// Spoof a normal looking User-Agent to keep the servers happy
162+
// See https://github.com/JustinBeckwith/linkinator/blob/main/src/index.ts
163+
//
164+
// To better future-proof against the ua string being
165+
// responsible for any breakage, pull common, up-to-date strings
166+
// from a reliable source.
167+
// https://github.com/w3c/aria-practices/issues/3270
168+
'User-Agent':
169+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/605.1.1',
170+
},
171+
});
172+
173+
// Handle rate limit-related statuses
174+
if (
175+
response.status === 403 ||
176+
response.status === 429 ||
177+
response.status === 503 ||
178+
response.status === 508
179+
) {
180+
throw new Error(
181+
response.status === 429
182+
? `Rate limited by ${domain}`
183+
: `Unsuccessful response from ${domain} (${response.status})`
184+
);
185+
}
186+
187+
const text = await response.text();
188+
const html = HTMLParser.parse(text);
189+
const ids = html
190+
.querySelectorAll('[id]')
191+
.map((idElement) => idElement.getAttribute('id'));
192+
193+
// Handle GitHub README links.
194+
// These links are stored within a react-partial element
195+
const reactPartial = getReactPartial(hrefOrSrc, html);
196+
return {
197+
ok: response.ok,
198+
status: response.status,
199+
ids,
200+
reactPartial,
201+
};
202+
} catch (error) {
203+
if (retryCount < maxRetries) {
204+
// Found the retry-after unit returned from response headers too
205+
// variable to use here, but ~15 seconds seems like a safe
206+
// initial default
207+
const delay = baseDelay * 1000 * Math.pow(2, retryCount);
208+
console.info(
209+
`Error fetching ${externalPageLink}: ${error.message}, retrying in ${delay}ms`
210+
);
211+
await new Promise((resolve) => setTimeout(resolve, delay));
212+
retryCount++;
213+
continue;
214+
}
215+
return {
216+
errorMessage:
217+
`Found broken external link on ${htmlPath}:${lineNumber}:${columnNumber}\n` +
218+
` ${error.stack}`,
219+
};
220+
}
187221
}
188222
};
189223

@@ -213,17 +247,6 @@ async function checkLinks() {
213247
Object.entries(externalPageLoaders).map(
214248
async ([externalPageLink, getPageData]) => {
215249
let pageData = await getPageData();
216-
if (pageData.errorMessage) {
217-
console.info('Retrying once');
218-
pageData = await getPageData();
219-
}
220-
if (pageData.errorMessage) {
221-
await new Promise((resolve) => {
222-
setTimeout(resolve, 2000);
223-
});
224-
console.info('Retrying twice');
225-
pageData = await getPageData();
226-
}
227250
externalPageData[externalPageLink] = pageData;
228251
loadedCount += 1;
229252
}

0 commit comments

Comments
 (0)