|
14 | 14 | from scrapy_crawlera import CrawleraMiddleware |
15 | 15 | import os |
16 | 16 |
|
| 17 | +from scrapy_crawlera.utils import exp_backoff |
| 18 | + |
17 | 19 |
|
18 | 20 | class MockedSlot(object): |
19 | 21 |
|
@@ -223,15 +225,6 @@ def test_delay_adjustment(self): |
223 | 225 | slot = MockedSlot(self.spider.download_delay) |
224 | 226 | crawler.engine.downloader.slots[slot_key] = slot |
225 | 227 |
|
226 | | - # no ban |
227 | | - req = Request(url, meta={'download_slot': slot_key}) |
228 | | - headers = {'X-Crawlera-Error': 'no_proxies'} |
229 | | - res = Response( |
230 | | - ban_url, status=self.bancode, headers=headers, request=req) |
231 | | - mw.process_response(req, res, self.spider) |
232 | | - self.assertEqual(slot.delay, delay) |
233 | | - self.assertEqual(self.spider.download_delay, delay) |
234 | | - |
235 | 228 | # ban without retry-after |
236 | 229 | req = Request(url, meta={'download_slot': slot_key}) |
237 | 230 | headers = {'X-Crawlera-Error': 'banned'} |
@@ -473,7 +466,66 @@ def test_is_banned(self): |
473 | 466 | req = self._make_fake_request(self.spider, crawlera_enabled=True) |
474 | 467 | res = Response(req.url, status=200) |
475 | 468 | self.assertFalse(mw._is_banned(res)) |
476 | | - res = Response(req.url, status=503, headers={'X-Crawlera-Error': 'no_proxies'}) |
| 469 | + res = Response(req.url, status=503, headers={'X-Crawlera-Error': 'noslaves'}) |
477 | 470 | self.assertFalse(mw._is_banned(res)) |
478 | 471 | res = Response(req.url, status=503, headers={'X-Crawlera-Error': 'banned'}) |
479 | 472 | self.assertTrue(mw._is_banned(res)) |
| 473 | + |
| 474 | + @patch('random.uniform') |
| 475 | + def test_noslaves_delays(self, random_uniform_patch): |
| 476 | + # mock random.uniform to just return the max delay |
| 477 | + random_uniform_patch.side_effect = lambda x, y: y |
| 478 | + |
| 479 | + slot_key = 'www.scrapytest.org' |
| 480 | + url = 'http://www.scrapytest.org' |
| 481 | + ban_url = 'http://ban.me' |
| 482 | + max_delay = 70 |
| 483 | + backoff_step = 15 |
| 484 | + default_delay = 0 |
| 485 | + |
| 486 | + self.settings['CRAWLERA_BACKOFF_STEP'] = backoff_step |
| 487 | + self.settings['CRAWLERA_BACKOFF_MAX'] = max_delay |
| 488 | + |
| 489 | + self.spider.crawlera_enabled = True |
| 490 | + crawler = self._mock_crawler(self.spider, self.settings) |
| 491 | + mw = self.mwcls.from_crawler(crawler) |
| 492 | + mw.open_spider(self.spider) |
| 493 | + mw.noslaves_max_delay = max_delay |
| 494 | + |
| 495 | + slot = MockedSlot() |
| 496 | + crawler.engine.downloader.slots[slot_key] = slot |
| 497 | + |
| 498 | + noslaves_req = Request(url, meta={'download_slot': slot_key}) |
| 499 | + headers = {'X-Crawlera-Error': 'noslaves'} |
| 500 | + noslaves_res = Response( |
| 501 | + ban_url, status=self.bancode, headers=headers, request=noslaves_req) |
| 502 | + |
| 503 | + # delays grow exponentially |
| 504 | + mw.process_response(noslaves_req, noslaves_res, self.spider) |
| 505 | + self.assertEqual(slot.delay, backoff_step) |
| 506 | + |
| 507 | + mw.process_response(noslaves_req, noslaves_res, self.spider) |
| 508 | + self.assertEqual(slot.delay, backoff_step * 2 ** 1) |
| 509 | + |
| 510 | + mw.process_response(noslaves_req, noslaves_res, self.spider) |
| 511 | + self.assertEqual(slot.delay, backoff_step * 2 ** 2) |
| 512 | + |
| 513 | + mw.process_response(noslaves_req, noslaves_res, self.spider) |
| 514 | + self.assertEqual(slot.delay, max_delay) |
| 515 | + |
| 516 | + # other responses reset delay |
| 517 | + ban_req = Request(url, meta={'download_slot': slot_key}) |
| 518 | + ban_headers = {'X-Crawlera-Error': 'banned'} |
| 519 | + ban_res = Response( |
| 520 | + ban_url, status=self.bancode, headers=ban_headers, request=ban_req) |
| 521 | + mw.process_response(ban_req, ban_res, self.spider) |
| 522 | + self.assertEqual(slot.delay, default_delay) |
| 523 | + |
| 524 | + mw.process_response(noslaves_req, noslaves_res, self.spider) |
| 525 | + self.assertEqual(slot.delay, backoff_step) |
| 526 | + |
| 527 | + good_req = Request(url, meta={'download_slot': slot_key}) |
| 528 | + good_res = Response( |
| 529 | + url, status=200, request=good_req) |
| 530 | + mw.process_response(good_req, good_res, self.spider) |
| 531 | + self.assertEqual(slot.delay, default_delay) |
0 commit comments