33from copy import deepcopy
44import datetime
55import os
6+ import traceback
67
78from scrapy import signals
89from scrapy .crawler import CrawlerRunner , Crawler
@@ -109,6 +110,7 @@ def __init__(self, spider_name, request_kwargs,
109110 self .items = []
110111 self .items_dropped = []
111112 self .errors = []
113+ self .user_error = None
112114 self .max_requests = int (max_requests ) if max_requests else None
113115 self .timeout_limit = int (app_settings .TIMEOUT_LIMIT )
114116 self .request_count = 0
@@ -120,7 +122,7 @@ def __init__(self, spider_name, request_kwargs,
120122 # because we need to know if spider has method available
121123 self .callback_name = request_kwargs .pop ('callback' , None ) or 'parse'
122124 # do the same for errback
123- self .errback_name = request_kwargs .pop ('errback' , None ) or 'parse'
125+ self .errback_name = request_kwargs .pop ('errback' , None ) or app_settings . DEFAULT_ERRBACK_NAME
124126
125127 if request_kwargs .get ("url" ):
126128 self .request = self .create_spider_request (deepcopy (request_kwargs ))
@@ -171,17 +173,30 @@ def spider_idle(self, spider):
171173
172174 """
173175 if spider is self .crawler .spider and self .request and not self ._request_scheduled :
174- callback = getattr (self .crawler .spider , self .callback_name )
175- assert callable (callback ), 'Invalid callback'
176- self .request = self .request .replace (callback = callback )
176+ try :
177+ callback = getattr (self .crawler .spider , self .callback_name )
178+ assert callable (callback ), 'Invalid callback'
179+ self .request = self .request .replace (callback = callback )
180+ except (AssertionError , AttributeError ):
181+ msg = f"Invalid spider callback { self .callback_name } , callback not callable or not a method of a spider { self .spider_name } "
182+ self .user_error = Error (400 , message = msg )
183+ try :
184+ if self .errback_name :
185+ errback = getattr (self .crawler .spider , self .errback_name )
186+ assert callable (errback ), 'Invalid errback'
187+ self .request = self .request .replace (errback = errback )
188+ except (AssertionError , AttributeError ):
189+ msg = f"Invalid spider errback { self .errback_name } , errback not callable or not a method of a spider { self .spider_name } "
190+ self .user_error = Error (400 , message = msg )
191+ if self .user_error :
192+ log .msg (self .user_error .message , level = log .ERROR )
193+ return
177194
178- errback = getattr (self .crawler .spider , self .errback_name )
179- assert callable (errback ), 'Invalid errback'
180- self .request = self .request .replace (errback = errback )
181195 modify_request = getattr (
182196 self .crawler .spider , "modify_realtime_request" , None )
183197 if callable (modify_request ):
184198 self .request = modify_request (self .request )
199+
185200 spider .crawler .engine .crawl (self .request )
186201 self ._request_scheduled = True
187202 raise DontCloseSpider
@@ -238,6 +253,9 @@ def return_items(self, result):
238253 "stats" : stats ,
239254 "spider_name" : self .spider_name ,
240255 }
256+
257+ results ["user_error" ] = self .user_error
258+
241259 if self .debug :
242260 results ["errors" ] = self .errors
243261 return results
0 commit comments