@@ -44,6 +44,7 @@ import {
4444 KeyValueStore ,
4545 LogLevel ,
4646 mergeCookies ,
47+ NavigationSkippedError ,
4748 NonRetryableError ,
4849 purgeDefaultStorages ,
4950 RequestHandlerError ,
@@ -914,13 +915,13 @@ export class BasicCrawler<
914915 try {
915916 await this . basicContextPipeline
916917 . chain ( this . contextPipeline )
917- . call ( crawlingContext , ( ctx ) => this . handleRequest ( ctx , source ) ) ;
918+ . call ( crawlingContext , ( ctx ) => this . handleRequest ( ctx , source , request ) ) ;
918919 } catch ( error ) {
919920 // ContextPipelineInterruptedError means the request was intentionally skipped
920921 // (e.g., doesn't match enqueue strategy after redirect). Just return gracefully.
921922 if ( error instanceof ContextPipelineInterruptedError ) {
922923 await this . _timeoutAndRetry (
923- async ( ) => this . requestManager ?. markRequestHandled ( crawlingContext . request ! ) ,
924+ async ( ) => this . requestManager ?. markRequestHandled ( request ) ,
924925 this . internalTimeoutMillis ,
925926 `Marking request ${ crawlingContext . request . url } (${ crawlingContext . request . id } ) as handled timed out after ${
926927 this . internalTimeoutMillis / 1e3
@@ -939,6 +940,7 @@ export class BasicCrawler<
939940 await this . _requestFunctionErrorHandler (
940941 unwrappedError ,
941942 crawlingContext as CrawlingContext ,
943+ request ,
942944 this . requestManager ! ,
943945 ) ;
944946 crawlingContext . session ?. markBad ( ) ;
@@ -1838,9 +1840,7 @@ export class BasicCrawler<
18381840 }
18391841
18401842 /** Handles a single request - runs the request handler with retries, error handling, and lifecycle management. */
1841- protected async handleRequest ( crawlingContext : ExtendedContext , requestSource : IRequestManager ) {
1842- const { request } = crawlingContext ;
1843-
1843+ protected async handleRequest ( crawlingContext : ExtendedContext , requestSource : IRequestManager , request : Request ) {
18441844 const statisticsId = request . id || request . uniqueKey ;
18451845 this . stats . startJob ( statisticsId ) ;
18461846
@@ -1871,7 +1871,7 @@ export class BasicCrawler<
18711871 try {
18721872 request . state = RequestState . ERROR_HANDLER ;
18731873 await addTimeoutToPromise (
1874- async ( ) => this . _requestFunctionErrorHandler ( err , crawlingContext , requestSource ) ,
1874+ async ( ) => this . _requestFunctionErrorHandler ( err , crawlingContext , request , requestSource ) ,
18751875 this . internalTimeoutMillis ,
18761876 `Handling request failure of ${ request . url } (${ request . id } ) timed out after ${
18771877 this . internalTimeoutMillis / 1e3
@@ -2052,13 +2052,15 @@ export class BasicCrawler<
20522052
20532053 /**
20542054 * Handles errors thrown by user provided requestHandler()
2055+ *
2056+ * @param request The request object, passed separately to circumvent potential dynamic logic in crawlingContext.request
20552057 */
20562058 protected async _requestFunctionErrorHandler (
20572059 error : Error ,
20582060 crawlingContext : CrawlingContext ,
2061+ request : Request ,
20592062 source : IRequestList | IRequestManager ,
20602063 ) : Promise < void > {
2061- const { request } = crawlingContext ;
20622064 request . pushErrorMessage ( error ) ;
20632065
20642066 if ( error instanceof CriticalError ) {
@@ -2256,6 +2258,18 @@ export class BasicCrawler<
22562258 }
22572259
22582260 private requestMatchesEnqueueStrategy ( request : Request ) {
2261+ // If `skipNavigation` was used, just return `true`
2262+ try {
2263+ // eslint-disable-next-line @typescript-eslint/no-unused-expressions
2264+ request . loadedUrl ;
2265+ } catch ( err ) {
2266+ if ( err instanceof NavigationSkippedError ) {
2267+ return true ;
2268+ }
2269+
2270+ throw err ;
2271+ }
2272+
22592273 const { url, loadedUrl } = request ;
22602274
22612275 // eslint-disable-next-line dot-notation -- private access
0 commit comments