@@ -27,6 +27,7 @@ class CrawleraMiddlewareTestCase(TestCase):
2727
2828 mwcls = CrawleraMiddleware
2929 bancode = 503
30+ auth_error_code = 407
3031
3132 def setUp (self ):
3233 self .spider = Spider ('foo' )
@@ -474,7 +475,6 @@ def test_noslaves_delays(self, random_uniform_patch):
474475 crawler = self ._mock_crawler (self .spider , self .settings )
475476 mw = self .mwcls .from_crawler (crawler )
476477 mw .open_spider (self .spider )
477- mw .noslaves_max_delay = max_delay
478478
479479 slot = MockedSlot ()
480480 crawler .engine .downloader .slots [slot_key ] = slot
@@ -514,6 +514,77 @@ def test_noslaves_delays(self, random_uniform_patch):
514514 mw .process_response (good_req , good_res , self .spider )
515515 self .assertEqual (slot .delay , default_delay )
516516
517+ @patch ('random.uniform' )
518+ def test_auth_error_retries (self , random_uniform_patch ):
519+ # mock random.uniform to just return the max delay
520+ random_uniform_patch .side_effect = lambda x , y : y
521+
522+ slot_key = 'www.scrapytest.org'
523+ url = 'http://www.scrapytest.org'
524+ ban_url = 'http://auth.error'
525+ max_delay = 70
526+ backoff_step = 15
527+ default_delay = 0
528+
529+ self .settings ['CRAWLERA_BACKOFF_STEP' ] = backoff_step
530+ self .settings ['CRAWLERA_BACKOFF_MAX' ] = max_delay
531+
532+ self .spider .crawlera_enabled = True
533+ crawler = self ._mock_crawler (self .spider , self .settings )
534+ mw = self .mwcls .from_crawler (crawler )
535+ mw .open_spider (self .spider )
536+ mw .max_auth_retry_times = 4
537+
538+ slot = MockedSlot ()
539+ crawler .engine .downloader .slots [slot_key ] = slot
540+
541+ auth_error_req = Request (url , meta = {'download_slot' : slot_key })
542+ auth_error_headers = {'X-Crawlera-Error' : 'bad_proxy_auth' }
543+ auth_error_response = Response (
544+ ban_url ,
545+ status = self .auth_error_code ,
546+ request = auth_error_req ,
547+ headers = auth_error_headers
548+ )
549+
550+ # delays grow exponentially, retry times increase accordingly
551+ req = mw .process_response (auth_error_req , auth_error_response , self .spider )
552+ self .assertEqual (slot .delay , backoff_step )
553+ retry_times = req .meta ["crawlera_auth_retry_times" ]
554+ self .assertEqual (retry_times , 1 )
555+
556+ auth_error_response .meta ["crawlera_auth_retry_times" ] = retry_times
557+ req = mw .process_response (auth_error_req , auth_error_response , self .spider )
558+ self .assertEqual (slot .delay , backoff_step * 2 ** 1 )
559+ retry_times = req .meta ["crawlera_auth_retry_times" ]
560+ self .assertEqual (retry_times , 2 )
561+
562+ auth_error_response .meta ["crawlera_auth_retry_times" ] = retry_times
563+ req = mw .process_response (auth_error_req , auth_error_response , self .spider )
564+ self .assertEqual (slot .delay , backoff_step * 2 ** 2 )
565+ retry_times = req .meta ["crawlera_auth_retry_times" ]
566+ self .assertEqual (retry_times , 3 )
567+
568+ auth_error_response .meta ["crawlera_auth_retry_times" ] = retry_times
569+ req = mw .process_response (auth_error_req , auth_error_response , self .spider )
570+ self .assertEqual (slot .delay , max_delay )
571+ retry_times = req .meta ["crawlera_auth_retry_times" ]
572+ self .assertEqual (retry_times , 4 )
573+
574+ # Should return a response when after max number of retries
575+ auth_error_response .meta ["crawlera_auth_retry_times" ] = retry_times
576+ res = mw .process_response (auth_error_req , auth_error_response , self .spider )
577+ self .assertIsInstance (res , Response )
578+
579+ # non crawlera 407 is not retried
580+ non_crawlera_407_response = Response (
581+ ban_url ,
582+ status = self .auth_error_code ,
583+ request = auth_error_req ,
584+ )
585+ res = mw .process_response (auth_error_req , non_crawlera_407_response , self .spider )
586+ self .assertIsInstance (res , Response )
587+
517588 @patch ('scrapy_crawlera.middleware.logging' )
518589 def test_open_spider_logging (self , mock_logger ):
519590 spider = self .spider
0 commit comments