|
1 | 1 | from unittest import TestCase |
| 2 | +try: |
| 3 | + from unittest.mock import patch |
| 4 | +except ImportError: |
| 5 | + from mock import patch |
2 | 6 |
|
3 | 7 | from w3lib.http import basic_auth_header |
4 | 8 | from scrapy.http import Request, Response |
@@ -362,3 +366,67 @@ def test_clean_headers_when_enabled(self): |
362 | 366 | self.assertIn(b'X-Crawlera-Debug', req.headers) |
363 | 367 | self.assertIn(b'X-Crawlera-Profile', req.headers) |
364 | 368 | self.assertIn(b'User-Agent', req.headers) |
| 369 | + |
| 370 | + def test_crawlera_default_headers(self): |
| 371 | + spider = self.spider |
| 372 | + self.spider.crawlera_enabled = True |
| 373 | + |
| 374 | + self.settings['CRAWLERA_DEFAULT_HEADERS'] = { |
| 375 | + 'X-Crawlera-Profile': 'desktop' |
| 376 | + } |
| 377 | + crawler = self._mock_crawler(spider, self.settings) |
| 378 | + mw = self.mwcls.from_crawler(crawler) |
| 379 | + mw.open_spider(spider) |
| 380 | + req = Request('http://www.scrapytest.org/other') |
| 381 | + assert mw.process_request(req, spider) is None |
| 382 | + self.assertEqual(req.headers['X-Crawlera-Profile'], b'desktop') |
| 383 | + |
| 384 | + # test ignore None headers |
| 385 | + self.settings['CRAWLERA_DEFAULT_HEADERS'] = { |
| 386 | + 'X-Crawlera-Profile': None, |
| 387 | + 'X-Crawlera-Cookies': 'disable' |
| 388 | + } |
| 389 | + crawler = self._mock_crawler(spider, self.settings) |
| 390 | + mw = self.mwcls.from_crawler(crawler) |
| 391 | + mw.open_spider(spider) |
| 392 | + req = Request('http://www.scrapytest.org/other') |
| 393 | + assert mw.process_request(req, spider) is None |
| 394 | + self.assertEqual(req.headers['X-Crawlera-Cookies'], b'disable') |
| 395 | + self.assertNotIn('X-Crawlera-Profile', req.headers) |
| 396 | + |
| 397 | + @patch('scrapy_crawlera.middleware.logging') |
| 398 | + def test_crawlera_default_headers_conflicting_headers(self, mock_logger): |
| 399 | + spider = self.spider |
| 400 | + self.spider.crawlera_enabled = True |
| 401 | + |
| 402 | + self.settings['CRAWLERA_DEFAULT_HEADERS'] = { |
| 403 | + 'X-Crawlera-Profile': 'desktop' |
| 404 | + } |
| 405 | + crawler = self._mock_crawler(spider, self.settings) |
| 406 | + mw = self.mwcls.from_crawler(crawler) |
| 407 | + mw.open_spider(spider) |
| 408 | + |
| 409 | + req = Request('http://www.scrapytest.org/other', |
| 410 | + headers={'X-Crawlera-UA': 'desktop'}) |
| 411 | + assert mw.process_request(req, spider) is None |
| 412 | + self.assertEqual(req.headers['X-Crawlera-UA'], b'desktop') |
| 413 | + self.assertEqual(req.headers['X-Crawlera-Profile'], b'desktop') |
| 414 | + mock_logger.warn.assert_called_with( |
| 415 | + "The headers ('X-Crawlera-Profile', 'X-Crawlera-UA') are conflictin" |
| 416 | + "g on request http://www.scrapytest.org/other. X-Crawlera-UA will b" |
| 417 | + "e ignored. Please check https://doc.scrapinghub.com/crawlera.html " |
| 418 | + "for more information" |
| 419 | + ) |
| 420 | + |
| 421 | + # test it ignores case |
| 422 | + req = Request('http://www.scrapytest.org/other', |
| 423 | + headers={'x-crawlera-ua': 'desktop'}) |
| 424 | + assert mw.process_request(req, spider) is None |
| 425 | + self.assertEqual(req.headers['X-Crawlera-UA'], b'desktop') |
| 426 | + self.assertEqual(req.headers['X-Crawlera-Profile'], b'desktop') |
| 427 | + mock_logger.warn.assert_called_with( |
| 428 | + "The headers ('X-Crawlera-Profile', 'X-Crawlera-UA') are conflictin" |
| 429 | + "g on request http://www.scrapytest.org/other. X-Crawlera-UA will b" |
| 430 | + "e ignored. Please check https://doc.scrapinghub.com/crawlera.html " |
| 431 | + "for more information" |
| 432 | + ) |
0 commit comments