@@ -11,6 +11,7 @@ var ALLOWED_RESOURCES = ['document', 'stylesheet', 'image', 'media', 'font', 'ot
1111const ALLOWED_STATUSES = [ 200 , 201 ] ;
1212const REQUEST_TIMEOUT = 1800000 ;
1313const MIN_VIEWPORT_HEIGHT = 1080 ;
14+ const MAX_RETRY_WAIT_TIME = 60000 ;
1415
1516export async function prepareSnapshot ( snapshot : Snapshot , ctx : Context ) : Promise < Record < string , any > > {
1617 let processedOptions : Record < string , any > = { } ;
@@ -299,6 +300,8 @@ export default async function processSnapshot(snapshot: Snapshot, ctx: Context):
299300 }
300301 }
301302
303+ const pendingResources = new Map < string , any > ( ) ;
304+
302305 // Use route to intercept network requests and discover resources
303306 await page . route ( '**/*' , async ( route , request ) => {
304307 const requestUrl = request . url ( )
@@ -357,25 +360,32 @@ export default async function processSnapshot(snapshot: Snapshot, ctx: Context):
357360 body = globalCache . get ( requestUrl ) . body ;
358361 } else {
359362 ctx . log . debug ( `Resource not found in cache or global cache ${ requestUrl } fetching from server` ) ;
363+ pendingResources . set ( requestUrl , request ) ;
360364 response = await page . request . fetch ( request , requestOptions ) ;
361365 body = await response . body ( ) ;
362366 }
363367
364368 // handle response
365369 if ( ! body ) {
366370 ctx . log . debug ( `Handling request ${ requestUrl } \n - skipping no response` ) ;
371+ pendingResources . delete ( requestUrl ) ;
367372 } else if ( ! body . length ) {
368373 ctx . log . debug ( `Handling request ${ requestUrl } \n - skipping empty response` ) ;
374+ pendingResources . delete ( requestUrl ) ;
369375 } else if ( requestUrl === snapshot . url ) {
370376 ctx . log . debug ( `Handling request ${ requestUrl } \n - skipping root resource` ) ;
377+ pendingResources . delete ( requestUrl ) ;
371378 } else if ( ! ctx . config . allowedHostnames . includes ( requestHostname ) ) {
372379 ctx . log . debug ( `Handling request ${ requestUrl } \n - skipping remote resource` ) ;
380+ pendingResources . delete ( requestUrl ) ;
373381 } else if ( cache [ requestUrl ] ) {
374382 ctx . log . debug ( `Handling request ${ requestUrl } \n - skipping already cached resource` ) ;
375383 } else if ( body . length > MAX_RESOURCE_SIZE ) {
376384 ctx . log . debug ( `Handling request ${ requestUrl } \n - skipping resource larger than 15MB` ) ;
385+ pendingResources . delete ( requestUrl ) ;
377386 } else if ( ! ALLOWED_RESOURCES . includes ( request . resourceType ( ) ) ) {
378387 ctx . log . debug ( `Handling request ${ requestUrl } \n - skipping disallowed resource type [${ request . resourceType ( ) } ]` ) ;
388+ pendingResources . delete ( requestUrl ) ;
379389 } else if ( ! ALLOWED_STATUSES . includes ( response . status ( ) ) ) {
380390 ctx . log . debug ( `${ globalViewport } Handling request ${ requestUrl } \n - skipping disallowed status [${ response . status ( ) } ]` ) ;
381391
@@ -395,6 +405,13 @@ export default async function processSnapshot(snapshot: Snapshot, ctx: Context):
395405 body : bodyOfRetry . toString ( 'base64' ) ,
396406 type : responseOfRetry . headers ( ) [ 'content-type' ]
397407 }
408+ if ( ctx . config . useGlobalCache ) {
409+ globalCache . set ( requestUrl , {
410+ body : bodyOfRetry . toString ( 'base64' ) ,
411+ type : responseOfRetry . headers ( ) [ 'content-type' ]
412+ } ) ;
413+ }
414+ pendingResources . delete ( requestUrl ) ;
398415 route . fulfill ( {
399416 status : responseOfRetry . status ( ) ,
400417 headers : responseOfRetry . headers ( ) ,
@@ -443,8 +460,11 @@ export default async function processSnapshot(snapshot: Snapshot, ctx: Context):
443460 body : body . toString ( 'base64' ) ,
444461 type : response . headers ( ) [ 'content-type' ]
445462 }
463+
464+ pendingResources . delete ( requestUrl ) ;
446465 }
447466
467+
448468 // Continue the request with the fetched response
449469 route . fulfill ( {
450470 status : response . status ( ) ,
@@ -651,6 +671,53 @@ export default async function processSnapshot(snapshot: Snapshot, ctx: Context):
651671 ctx . log . debug ( `Network idle failed due to ${ error } ` ) ;
652672 }
653673
674+ // Retry all pending resources
675+ for ( const [ pendingUrl , pendingRequest ] of pendingResources ) {
676+ ctx . log . debug ( `Retrying resource ${ pendingUrl } with ${ MAX_RETRY_WAIT_TIME } s timeout...` ) ;
677+ try {
678+
679+ const retryOptions : Record < string , any > = {
680+ timeout : MAX_RETRY_WAIT_TIME ,
681+ headers : {
682+ ...constants . REQUEST_HEADERS
683+ }
684+ } ;
685+ if ( ctx . config . basicAuthorization ) {
686+ ctx . log . debug ( `Adding basic authorization to the headers for root url` ) ;
687+ let token = Buffer . from ( `${ ctx . config . basicAuthorization . username } :${ ctx . config . basicAuthorization . password } ` ) . toString ( 'base64' ) ;
688+ retryOptions . headers . Authorization = `Basic ${ token } ` ;
689+ }
690+ if ( ctx . config . requestHeaders && Array . isArray ( ctx . config . requestHeaders ) ) {
691+ ctx . config . requestHeaders . forEach ( ( headerObj ) => {
692+ Object . entries ( headerObj ) . forEach ( ( [ key , value ] ) => {
693+ retryOptions . headers [ key ] = value ;
694+ } ) ;
695+ } ) ;
696+ }
697+ const retryResponse = await page . request . fetch ( pendingRequest , retryOptions ) ;
698+ const retryBody = await retryResponse . body ( ) ;
699+
700+ if ( retryResponse && retryResponse . status ( ) && ALLOWED_STATUSES . includes ( retryResponse . status ( ) ) ) {
701+ ctx . log . debug ( `Retry successful for ${ pendingUrl } ` ) ;
702+ cache [ pendingUrl ] = {
703+ body : retryBody . toString ( 'base64' ) ,
704+ type : retryResponse . headers ( ) [ 'content-type' ]
705+ } ;
706+
707+ if ( ctx . config . useGlobalCache ) {
708+ globalCache . set ( pendingUrl , {
709+ body : retryBody . toString ( 'base64' ) ,
710+ type : retryResponse . headers ( ) [ 'content-type' ]
711+ } ) ;
712+ }
713+ }
714+ pendingResources . delete ( pendingUrl ) ;
715+ } catch ( retryError ) {
716+ ctx . log . debug ( `Retry failed for ${ pendingUrl } : ${ retryError } ` ) ;
717+ pendingResources . delete ( pendingUrl ) ;
718+ }
719+ }
720+
654721
655722 if ( ctx . config . allowedAssets && ctx . config . allowedAssets . length ) {
656723 for ( let assetUrl of ctx . config . allowedAssets ) {
0 commit comments