@@ -102,7 +102,11 @@ def _assert_enabled(self, spider,
102102 # check for not banning before maxbans for bancode
103103 for x in range (maxbans + 1 ):
104104 self .assertEqual (crawler .engine .fake_spider_closed_result , None )
105- res = Response ('http://ban.me/%d' % x , status = self .bancode )
105+ res = Response (
106+ 'http://ban.me/%d' % x ,
107+ status = self .bancode ,
108+ headers = {'X-Crawlera-Error' : 'banned' },
109+ )
106110 assert mw .process_response (req , res , spider ) is res
107111
108112 # max bans reached and close_spider called
@@ -219,15 +223,30 @@ def test_delay_adjustment(self):
219223 slot = MockedSlot (self .spider .download_delay )
220224 crawler .engine .downloader .slots [slot_key ] = slot
221225
222- # ban
226+ # no ban
223227 req = Request (url , meta = {'download_slot' : slot_key })
224- res = Response (ban_url , status = self .bancode , request = req )
228+ headers = {'X-Crawlera-Error' : 'no_proxies' }
229+ res = Response (
230+ ban_url , status = self .bancode , headers = headers , request = req )
225231 mw .process_response (req , res , self .spider )
226232 self .assertEqual (slot .delay , delay )
227233 self .assertEqual (self .spider .download_delay , delay )
228234
235+ # ban without retry-after
236+ req = Request (url , meta = {'download_slot' : slot_key })
237+ headers = {'X-Crawlera-Error' : 'banned' }
238+ res = Response (
239+ ban_url , status = self .bancode , headers = headers , request = req )
240+ mw .process_response (req , res , self .spider )
241+ self .assertEqual (slot .delay , delay )
242+ self .assertEqual (self .spider .download_delay , delay )
243+
244+ # ban with retry-after
229245 retry_after = 1.5
230- headers = {'retry-after' : str (retry_after )}
246+ headers = {
247+ 'retry-after' : str (retry_after ),
248+ 'X-Crawlera-Error' : 'banned'
249+ }
231250 res = Response (
232251 ban_url , status = self .bancode , headers = headers , request = req )
233252 mw .process_response (req , res , self .spider )
@@ -335,8 +354,12 @@ def test_stats(self):
335354 assert mw .process_response (req , res , spider ) is res
336355 self .assertEqual (crawler .stats .get_value ('crawlera/response' ), 2 )
337356 self .assertEqual (crawler .stats .get_value ('crawlera/response/status/{}' .format (mw .ban_code )), 1 )
338- self .assertEqual (crawler .stats .get_value ('crawlera/response/banned' ), 1 )
339357 self .assertEqual (crawler .stats .get_value ('crawlera/response/error/somethingbad' ), 1 )
358+ res = Response (req .url , status = mw .ban_code , headers = {'X-Crawlera-Error' : 'banned' })
359+ assert mw .process_response (req , res , spider ) is res
360+ self .assertEqual (crawler .stats .get_value ('crawlera/response' ), 3 )
361+ self .assertEqual (crawler .stats .get_value ('crawlera/response/status/{}' .format (mw .ban_code )), 2 )
362+ self .assertEqual (crawler .stats .get_value ('crawlera/response/banned' ), 1 )
340363
341364 def _make_fake_request (self , spider , crawlera_enabled ):
342365 spider .crawlera_enabled = crawlera_enabled
@@ -441,3 +464,16 @@ def test_dont_proxy_false_does_nothing(self):
441464 req .meta ['dont_proxy' ] = False
442465 assert mw .process_request (req , spider ) is None
443466 self .assertIsNotNone (req .meta .get ('proxy' ))
467+
468+ def test_is_banned (self ):
469+ self .spider .crawlera_enabled = True
470+ crawler = self ._mock_crawler (self .spider , self .settings )
471+ mw = self .mwcls .from_crawler (crawler )
472+ mw .open_spider (self .spider )
473+ req = self ._make_fake_request (self .spider , crawlera_enabled = True )
474+ res = Response (req .url , status = 200 )
475+ self .assertFalse (mw ._is_banned (res ))
476+ res = Response (req .url , status = 503 , headers = {'X-Crawlera-Error' : 'no_proxies' })
477+ self .assertFalse (mw ._is_banned (res ))
478+ res = Response (req .url , status = 503 , headers = {'X-Crawlera-Error' : 'banned' })
479+ self .assertTrue (mw ._is_banned (res ))
0 commit comments