Skip to content

Commit 86bcd36

Browse files
authored
Add concurrency limit to link checker to prevent overwhelming external servers (#57514)
1 parent 4589da0 commit 86bcd36

File tree

1 file changed

+50
-14
lines changed

1 file changed

+50
-14
lines changed

src/links/scripts/rendered-content-link-checker.ts

Lines changed: 50 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,12 @@ type Options = {
7676
bail?: boolean
7777
commentLimitToExternalLinks?: boolean
7878
actionContext?: any
79+
concurrency?: number
7980
}
8081

82+
// Default concurrency limit for URL requests
83+
const DEFAULT_CONCURRENCY_LIMIT = 3
84+
8185
const STATIC_PREFIXES: Record<string, string> = {
8286
assets: path.resolve('assets'),
8387
public: path.resolve(path.join('src', 'graphql', 'data')),
@@ -114,6 +118,32 @@ const externalLinkCheckerDB = await JSONFilePreset<Data>(EXTERNAL_LINK_CHECKER_D
114118

115119
type DBType = typeof externalLinkCheckerDB
116120

121+
// Simple concurrency limiter
122+
async function limitConcurrency<T, R>(
123+
items: T[],
124+
asyncFn: (item: T) => Promise<R>,
125+
limit: number = 3,
126+
): Promise<R[]> {
127+
const results: Promise<R>[] = []
128+
const executing = new Set<Promise<R>>()
129+
130+
for (const item of items) {
131+
const promise = asyncFn(item).then((result) => {
132+
executing.delete(promise)
133+
return result
134+
})
135+
136+
results.push(promise)
137+
executing.add(promise)
138+
139+
if (executing.size >= limit) {
140+
await Promise.race(executing)
141+
}
142+
}
143+
144+
return Promise.all(results)
145+
}
146+
117147
// Given a number and a percentage, return the same number with a *percentage*
118148
// max change of making a bit larger or smaller.
119149
// E.g. `jitter(55, 10)` will return a value between `[55 - 55/10: 55 + 55/10]`
@@ -156,6 +186,7 @@ if (import.meta.url.endsWith(process.argv[1])) {
156186
REPORT_LABEL,
157187
EXTERNAL_SERVER_ERRORS_AS_WARNINGS,
158188
CHECK_ANCHORS,
189+
CONCURRENCY,
159190
} = process.env
160191

161192
const octokit = github()
@@ -193,6 +224,7 @@ if (import.meta.url.endsWith(process.argv[1])) {
193224
reportAuthor: REPORT_AUTHOR,
194225
actionContext: getActionContext(),
195226
externalServerErrorsAsWarning: EXTERNAL_SERVER_ERRORS_AS_WARNINGS,
227+
concurrency: CONCURRENCY ? parseInt(CONCURRENCY, 10) : DEFAULT_CONCURRENCY_LIMIT,
196228
}
197229

198230
if (opts.shouldComment || opts.createReport) {
@@ -238,6 +270,7 @@ if (import.meta.url.endsWith(process.argv[1])) {
238270
* externalServerErrorsAsWarning {boolean} - Treat >=500 errors or temporary request errors as warning
239271
* filter {Array<string>} - strings to match the pages' relativePath
240272
* versions {Array<string>} - only certain pages' versions (e.g. )
273+
* concurrency {number} - Maximum number of concurrent URL requests (default: 3, env: CONCURRENCY)
241274
*
242275
*/
243276

@@ -263,6 +296,7 @@ async function main(
263296
reportRepository = 'github/docs-content',
264297
reportAuthor = 'docs-bot',
265298
reportLabel = 'broken link report',
299+
concurrency = DEFAULT_CONCURRENCY_LIMIT,
266300
} = opts
267301

268302
// Note! The reason we're using `warmServer()` in this script,
@@ -337,8 +371,9 @@ async function main(
337371

338372
debugTimeStart(core, 'processPages')
339373
const t0 = new Date().getTime()
340-
const flawsGroups = await Promise.all(
341-
pages.map((page: Page) =>
374+
const flawsGroups = await limitConcurrency(
375+
pages,
376+
(page: Page) =>
342377
processPage(
343378
core,
344379
page,
@@ -348,7 +383,7 @@ async function main(
348383
externalLinkCheckerDB,
349384
versions as string[],
350385
),
351-
),
386+
concurrency, // Limit concurrent page checks
352387
)
353388
const t1 = new Date().getTime()
354389
debugTimeEnd(core, 'processPages')
@@ -653,14 +688,13 @@ async function processPage(
653688
versions: string[],
654689
) {
655690
const { verbose, verboseUrl, bail } = opts
656-
const allFlawsEach = await Promise.all(
657-
page.permalinks
658-
.filter((permalink) => {
659-
return !versions.length || versions.includes(permalink.pageVersion)
660-
})
661-
.map((permalink) => {
662-
return processPermalink(core, permalink, page, pageMap, redirects, opts, db)
663-
}),
691+
const filteredPermalinks = page.permalinks.filter((permalink) => {
692+
return !versions.length || versions.includes(permalink.pageVersion)
693+
})
694+
const allFlawsEach = await limitConcurrency(
695+
filteredPermalinks,
696+
(permalink) => processPermalink(core, permalink, page, pageMap, redirects, opts, db),
697+
opts.concurrency || DEFAULT_CONCURRENCY_LIMIT, // Limit concurrent permalink checks per page
664698
)
665699

666700
const allFlaws = allFlawsEach.flat()
@@ -714,8 +748,9 @@ async function processPermalink(
714748
$('a[href]').each((i, link) => {
715749
links.push(link)
716750
})
717-
const newFlaws: LinkFlaw[] = await Promise.all(
718-
links.map(async (link) => {
751+
const newFlaws: LinkFlaw[] = await limitConcurrency(
752+
links,
753+
async (link) => {
719754
const { href } = (link as cheerio.TagElement).attribs
720755

721756
// The global cache can't be used for anchor links because they
@@ -756,7 +791,8 @@ async function processPermalink(
756791
globalHrefCheckCache.set(href, flaw)
757792
}
758793
}
759-
}),
794+
},
795+
opts.concurrency || DEFAULT_CONCURRENCY_LIMIT, // Limit concurrent link checks per permalink
760796
)
761797

762798
for (const flaw of newFlaws) {

0 commit comments

Comments
 (0)