@@ -11,7 +11,7 @@ var ALLOWED_RESOURCES = ['document', 'stylesheet', 'image', 'media', 'font', 'ot
1111const ALLOWED_STATUSES = [ 200 , 201 ] ;
1212const REQUEST_TIMEOUT = 180000 ;
1313const MIN_VIEWPORT_HEIGHT = 1080 ;
14- const MAX_RETRY_WAIT_TIME = 60000 ;
14+ const MAX_WAIT_FOR_REQUEST_CALL = 60000 ;
1515
1616export async function prepareSnapshot ( snapshot : Snapshot , ctx : Context ) : Promise < Record < string , any > > {
1717 let processedOptions : Record < string , any > = { } ;
@@ -300,7 +300,7 @@ export default async function processSnapshot(snapshot: Snapshot, ctx: Context):
300300 }
301301 }
302302
303- const pendingResources = new Map < string , any > ( ) ;
303+ const pendingRequests = new Set < string > ( ) ;
304304
305305 // Use route to intercept network requests and discover resources
306306 await page . route ( '**/*' , async ( route , request ) => {
@@ -360,32 +360,27 @@ export default async function processSnapshot(snapshot: Snapshot, ctx: Context):
360360 body = globalCache . get ( requestUrl ) . body ;
361361 } else {
362362 ctx . log . debug ( `Resource not found in cache or global cache ${ requestUrl } fetching from server` ) ;
363- pendingResources . set ( requestUrl , request ) ;
363+ pendingRequests . add ( requestUrl ) ;
364364 response = await page . request . fetch ( request , requestOptions ) ;
365365 body = await response . body ( ) ;
366+ pendingRequests . delete ( requestUrl ) ;
366367 }
367368
368369 // handle response
369370 if ( ! body ) {
370371 ctx . log . debug ( `Handling request ${ requestUrl } \n - skipping no response` ) ;
371- pendingResources . delete ( requestUrl ) ;
372372 } else if ( ! body . length ) {
373373 ctx . log . debug ( `Handling request ${ requestUrl } \n - skipping empty response` ) ;
374- pendingResources . delete ( requestUrl ) ;
375374 } else if ( requestUrl === snapshot . url ) {
376375 ctx . log . debug ( `Handling request ${ requestUrl } \n - skipping root resource` ) ;
377- pendingResources . delete ( requestUrl ) ;
378376 } else if ( ! ctx . config . allowedHostnames . includes ( requestHostname ) ) {
379377 ctx . log . debug ( `Handling request ${ requestUrl } \n - skipping remote resource` ) ;
380- pendingResources . delete ( requestUrl ) ;
381378 } else if ( cache [ requestUrl ] ) {
382379 ctx . log . debug ( `Handling request ${ requestUrl } \n - skipping already cached resource` ) ;
383380 } else if ( body . length > MAX_RESOURCE_SIZE ) {
384381 ctx . log . debug ( `Handling request ${ requestUrl } \n - skipping resource larger than 15MB` ) ;
385- pendingResources . delete ( requestUrl ) ;
386382 } else if ( ! ALLOWED_RESOURCES . includes ( request . resourceType ( ) ) ) {
387383 ctx . log . debug ( `Handling request ${ requestUrl } \n - skipping disallowed resource type [${ request . resourceType ( ) } ]` ) ;
388- pendingResources . delete ( requestUrl ) ;
389384 } else if ( ! ALLOWED_STATUSES . includes ( response . status ( ) ) ) {
390385 ctx . log . debug ( `${ globalViewport } Handling request ${ requestUrl } \n - skipping disallowed status [${ response . status ( ) } ]` ) ;
391386
@@ -396,9 +391,10 @@ export default async function processSnapshot(snapshot: Snapshot, ctx: Context):
396391
397392 let responseOfRetry , bodyOfRetry
398393 ctx . log . debug ( `Resource had a disallowed status ${ requestUrl } fetching from server again` ) ;
394+ pendingRequests . add ( requestUrl ) ;
399395 responseOfRetry = await page . request . fetch ( request , requestOptions ) ;
400396 bodyOfRetry = await responseOfRetry . body ( ) ;
401-
397+ pendingRequests . delete ( requestUrl ) ;
402398 if ( responseOfRetry && responseOfRetry . status ( ) && ALLOWED_STATUSES . includes ( responseOfRetry . status ( ) ) ) {
403399 ctx . log . debug ( `Handling request after retry ${ requestUrl } \n - content-type ${ responseOfRetry . headers ( ) [ 'content-type' ] } ` ) ;
404400 cache [ requestUrl ] = {
@@ -411,7 +407,6 @@ export default async function processSnapshot(snapshot: Snapshot, ctx: Context):
411407 type : responseOfRetry . headers ( ) [ 'content-type' ]
412408 } ) ;
413409 }
414- pendingResources . delete ( requestUrl ) ;
415410 route . fulfill ( {
416411 status : responseOfRetry . status ( ) ,
417412 headers : responseOfRetry . headers ( ) ,
@@ -460,8 +455,6 @@ export default async function processSnapshot(snapshot: Snapshot, ctx: Context):
460455 body : body . toString ( 'base64' ) ,
461456 type : response . headers ( ) [ 'content-type' ]
462457 }
463-
464- pendingResources . delete ( requestUrl ) ;
465458 }
466459
467460
@@ -671,52 +664,23 @@ export default async function processSnapshot(snapshot: Snapshot, ctx: Context):
671664 ctx . log . debug ( `Network idle failed due to ${ error } ` ) ;
672665 }
673666
674- // Retry all pending resources
675- for ( const [ pendingUrl , pendingRequest ] of pendingResources ) {
676- ctx . log . debug ( `Retrying resource ${ pendingUrl } with ${ MAX_RETRY_WAIT_TIME } s timeout...` ) ;
677- try {
678-
679- const retryOptions : Record < string , any > = {
680- timeout : MAX_RETRY_WAIT_TIME ,
681- headers : {
682- ...constants . REQUEST_HEADERS
683- }
684- } ;
685- if ( ctx . config . basicAuthorization ) {
686- ctx . log . debug ( `Adding basic authorization to the headers for root url` ) ;
687- let token = Buffer . from ( `${ ctx . config . basicAuthorization . username } :${ ctx . config . basicAuthorization . password } ` ) . toString ( 'base64' ) ;
688- retryOptions . headers . Authorization = `Basic ${ token } ` ;
689- }
690- if ( ctx . config . requestHeaders && Array . isArray ( ctx . config . requestHeaders ) ) {
691- ctx . config . requestHeaders . forEach ( ( headerObj ) => {
692- Object . entries ( headerObj ) . forEach ( ( [ key , value ] ) => {
693- retryOptions . headers [ key ] = value ;
694- } ) ;
695- } ) ;
696- }
697- const retryResponse = await page . request . fetch ( pendingRequest , retryOptions ) ;
698- const retryBody = await retryResponse . body ( ) ;
699-
700- if ( retryResponse && retryResponse . status ( ) && ALLOWED_STATUSES . includes ( retryResponse . status ( ) ) ) {
701- ctx . log . debug ( `Retry successful for ${ pendingUrl } ` ) ;
702- cache [ pendingUrl ] = {
703- body : retryBody . toString ( 'base64' ) ,
704- type : retryResponse . headers ( ) [ 'content-type' ]
705- } ;
706-
707- if ( ctx . config . useGlobalCache ) {
708- globalCache . set ( pendingUrl , {
709- body : retryBody . toString ( 'base64' ) ,
710- type : retryResponse . headers ( ) [ 'content-type' ]
711- } ) ;
712- }
713- }
714- pendingResources . delete ( pendingUrl ) ;
715- } catch ( retryError ) {
716- ctx . log . debug ( `Retry failed for ${ pendingUrl } : ${ retryError } ` ) ;
717- pendingResources . delete ( pendingUrl ) ;
667+ // Wait for pending requests to complete
668+ const checkPending = async ( ) => {
669+ const startTime = Date . now ( ) ;
670+ while ( pendingRequests . size > 0 ) {
671+ const elapsedTime = Date . now ( ) - startTime ;
672+ if ( elapsedTime >= MAX_WAIT_FOR_REQUEST_CALL ) {
673+ ctx . log . debug ( `Timeout reached (${ MAX_WAIT_FOR_REQUEST_CALL / 1000 } s). Stopping wait for pending requests.` ) ;
674+ break ;
675+ }
676+ ctx . log . debug ( 'Pending requests:' , Array . from ( pendingRequests . keys ( ) ) ) ;
677+ await page . waitForTimeout ( 1000 ) ;
718678 }
719- }
679+ ctx . log . debug ( 'No pending requests.' ) ;
680+ } ;
681+
682+ await checkPending ( ) ;
683+
720684
721685
722686 if ( ctx . config . allowedAssets && ctx . config . allowedAssets . length ) {
0 commit comments