@@ -76,8 +76,12 @@ type Options = {
7676 bail ?: boolean
7777 commentLimitToExternalLinks ?: boolean
7878 actionContext ?: any
79+ concurrency ?: number
7980}
8081
82+ // Default concurrency limit for URL requests
83+ const DEFAULT_CONCURRENCY_LIMIT = 3
84+
8185const STATIC_PREFIXES : Record < string , string > = {
8286 assets : path . resolve ( 'assets' ) ,
8387 public : path . resolve ( path . join ( 'src' , 'graphql' , 'data' ) ) ,
@@ -114,6 +118,32 @@ const externalLinkCheckerDB = await JSONFilePreset<Data>(EXTERNAL_LINK_CHECKER_D
114118
115119type DBType = typeof externalLinkCheckerDB
116120
121+ // Simple concurrency limiter
122+ async function limitConcurrency < T , R > (
123+ items : T [ ] ,
124+ asyncFn : ( item : T ) => Promise < R > ,
125+ limit : number = 3 ,
126+ ) : Promise < R [ ] > {
127+ const results : Promise < R > [ ] = [ ]
128+ const executing = new Set < Promise < R > > ( )
129+
130+ for ( const item of items ) {
131+ const promise = asyncFn ( item ) . then ( ( result ) => {
132+ executing . delete ( promise )
133+ return result
134+ } )
135+
136+ results . push ( promise )
137+ executing . add ( promise )
138+
139+ if ( executing . size >= limit ) {
140+ await Promise . race ( executing )
141+ }
142+ }
143+
144+ return Promise . all ( results )
145+ }
146+
117147// Given a number and a percentage, return the same number with a *percentage*
118148// max change of making a bit larger or smaller.
119149// E.g. `jitter(55, 10)` will return a value between `[55 - 55/10: 55 + 55/10]`
@@ -156,6 +186,7 @@ if (import.meta.url.endsWith(process.argv[1])) {
156186 REPORT_LABEL ,
157187 EXTERNAL_SERVER_ERRORS_AS_WARNINGS ,
158188 CHECK_ANCHORS ,
189+ CONCURRENCY ,
159190 } = process . env
160191
161192 const octokit = github ( )
@@ -193,6 +224,7 @@ if (import.meta.url.endsWith(process.argv[1])) {
193224 reportAuthor : REPORT_AUTHOR ,
194225 actionContext : getActionContext ( ) ,
195226 externalServerErrorsAsWarning : EXTERNAL_SERVER_ERRORS_AS_WARNINGS ,
227+ concurrency : CONCURRENCY ? parseInt ( CONCURRENCY , 10 ) : DEFAULT_CONCURRENCY_LIMIT ,
196228 }
197229
198230 if ( opts . shouldComment || opts . createReport ) {
@@ -238,6 +270,7 @@ if (import.meta.url.endsWith(process.argv[1])) {
238270 * externalServerErrorsAsWarning {boolean} - Treat >=500 errors or temporary request errors as warning
239271 * filter {Array<string>} - strings to match the pages' relativePath
240272 * versions {Array<string>} - only certain pages' versions (e.g. )
273+ * concurrency {number} - Maximum number of concurrent URL requests (default: 3, env: CONCURRENCY)
241274 *
242275 */
243276
@@ -263,6 +296,7 @@ async function main(
263296 reportRepository = 'github/docs-content' ,
264297 reportAuthor = 'docs-bot' ,
265298 reportLabel = 'broken link report' ,
299+ concurrency = DEFAULT_CONCURRENCY_LIMIT ,
266300 } = opts
267301
268302 // Note! The reason we're using `warmServer()` in this script,
@@ -337,8 +371,9 @@ async function main(
337371
338372 debugTimeStart ( core , 'processPages' )
339373 const t0 = new Date ( ) . getTime ( )
340- const flawsGroups = await Promise . all (
341- pages . map ( ( page : Page ) =>
374+ const flawsGroups = await limitConcurrency (
375+ pages ,
376+ ( page : Page ) =>
342377 processPage (
343378 core ,
344379 page ,
@@ -348,7 +383,7 @@ async function main(
348383 externalLinkCheckerDB ,
349384 versions as string [ ] ,
350385 ) ,
351- ) ,
386+ concurrency , // Limit concurrent page checks
352387 )
353388 const t1 = new Date ( ) . getTime ( )
354389 debugTimeEnd ( core , 'processPages' )
@@ -653,14 +688,13 @@ async function processPage(
653688 versions : string [ ] ,
654689) {
655690 const { verbose, verboseUrl, bail } = opts
656- const allFlawsEach = await Promise . all (
657- page . permalinks
658- . filter ( ( permalink ) => {
659- return ! versions . length || versions . includes ( permalink . pageVersion )
660- } )
661- . map ( ( permalink ) => {
662- return processPermalink ( core , permalink , page , pageMap , redirects , opts , db )
663- } ) ,
691+ const filteredPermalinks = page . permalinks . filter ( ( permalink ) => {
692+ return ! versions . length || versions . includes ( permalink . pageVersion )
693+ } )
694+ const allFlawsEach = await limitConcurrency (
695+ filteredPermalinks ,
696+ ( permalink ) => processPermalink ( core , permalink , page , pageMap , redirects , opts , db ) ,
697+ opts . concurrency || DEFAULT_CONCURRENCY_LIMIT , // Limit concurrent permalink checks per page
664698 )
665699
666700 const allFlaws = allFlawsEach . flat ( )
@@ -714,8 +748,9 @@ async function processPermalink(
714748 $ ( 'a[href]' ) . each ( ( i , link ) => {
715749 links . push ( link )
716750 } )
717- const newFlaws : LinkFlaw [ ] = await Promise . all (
718- links . map ( async ( link ) => {
751+ const newFlaws : LinkFlaw [ ] = await limitConcurrency (
752+ links ,
753+ async ( link ) => {
719754 const { href } = ( link as cheerio . TagElement ) . attribs
720755
721756 // The global cache can't be used for anchor links because they
@@ -756,7 +791,8 @@ async function processPermalink(
756791 globalHrefCheckCache . set ( href , flaw )
757792 }
758793 }
759- } ) ,
794+ } ,
795+ opts . concurrency || DEFAULT_CONCURRENCY_LIMIT , // Limit concurrent link checks per permalink
760796 )
761797
762798 for ( const flaw of newFlaws ) {
0 commit comments