1+ import binascii
12import os
23import pytest
34from random import choice
89 from mock import call , patch
910
1011from w3lib .http import basic_auth_header
12+ from scrapy .downloadermiddlewares .httpproxy import HttpProxyMiddleware
1113from scrapy .http import Request , Response
1214from scrapy .spiders import Spider
1315from scrapy .utils .test import get_crawler
@@ -84,17 +86,19 @@ def _assert_disabled(self, spider, settings=None):
8486 def _assert_enabled (self , spider ,
8587 settings = None ,
8688 proxyurl = 'http://proxy.zyte.com:8011' ,
89+ proxyurlcreds = 'http://apikey:@proxy.zyte.com:8011' ,
8790 proxyauth = basic_auth_header ('apikey' , '' ),
8891 maxbans = 400 ,
8992 download_timeout = 190 ):
9093 crawler = self ._mock_crawler (spider , settings )
9194 mw = self .mwcls .from_crawler (crawler )
9295 mw .open_spider (spider )
96+ assert mw .url == proxyurl
9397 req = Request ('http://www.scrapytest.org' )
9498 assert mw .process_request (req , spider ) is None
95- self .assertEqual (req .meta .get ('proxy' ), proxyurl )
99+ self .assertEqual (req .meta .get ('proxy' ), proxyurlcreds )
96100 self .assertEqual (req .meta .get ('download_timeout' ), download_timeout )
97- self .assertEqual ( req . headers . get ( 'Proxy-Authorization' ), proxyauth )
101+ self .assertNotIn ( b 'Proxy-Authorization', req . headers )
98102 res = self ._mock_zyte_smartproxy_response (req .url )
99103 assert mw .process_response (req , res , spider ) is res
100104
@@ -169,31 +173,31 @@ def test_apikey(self):
169173 self .spider .zyte_smartproxy_enabled = True
170174 self .settings ['ZYTE_SMARTPROXY_APIKEY' ] = apikey = 'apikey'
171175 proxyauth = basic_auth_header (apikey , '' )
172- self ._assert_enabled (self .spider , self .settings , proxyauth = proxyauth )
176+ self ._assert_enabled (self .spider , self .settings , proxyauth = proxyauth , proxyurlcreds = 'http://apikey:@proxy.zyte.com:8011' )
173177
174178 self .spider .zyte_smartproxy_apikey = apikey = 'notfromsettings'
175179 proxyauth = basic_auth_header (apikey , '' )
176- self ._assert_enabled (self .spider , self .settings , proxyauth = proxyauth )
180+ self ._assert_enabled (self .spider , self .settings , proxyauth = proxyauth , proxyurlcreds = 'http://notfromsettings:@proxy.zyte.com:8011' )
177181
178182 def test_proxyurl (self ):
179183 self .spider .zyte_smartproxy_enabled = True
180184 self .settings ['ZYTE_SMARTPROXY_URL' ] = 'http://localhost:8011'
181- self ._assert_enabled (self .spider , self .settings , proxyurl = 'http://localhost:8011' )
185+ self ._assert_enabled (self .spider , self .settings , proxyurl = 'http://localhost:8011' , proxyurlcreds = 'http://apikey:@localhost:8011' )
182186
183187 def test_proxyurl_no_protocol (self ):
184188 self .spider .zyte_smartproxy_enabled = True
185189 self .settings ['ZYTE_SMARTPROXY_URL' ] = 'localhost:8011'
186- self ._assert_enabled (self .spider , self .settings , proxyurl = 'http://localhost:8011' )
190+ self ._assert_enabled (self .spider , self .settings , proxyurl = 'http://localhost:8011' , proxyurlcreds = 'http://apikey:@localhost:8011' )
187191
188192 def test_proxyurl_https (self ):
189193 self .spider .zyte_smartproxy_enabled = True
190194 self .settings ['ZYTE_SMARTPROXY_URL' ] = 'https://localhost:8011'
191- self ._assert_enabled (self .spider , self .settings , proxyurl = 'https://localhost:8011' )
195+ self ._assert_enabled (self .spider , self .settings , proxyurl = 'https://localhost:8011' , proxyurlcreds = 'https://apikey:@localhost:8011' )
192196
193197 def test_proxyurl_including_noconnect (self ):
194198 self .spider .zyte_smartproxy_enabled = True
195199 self .settings ['ZYTE_SMARTPROXY_URL' ] = 'http://localhost:8011?noconnect'
196- self ._assert_enabled (self .spider , self .settings , proxyurl = 'http://localhost:8011?noconnect' )
200+ self ._assert_enabled (self .spider , self .settings , proxyurl = 'http://localhost:8011?noconnect' , proxyurlcreds = 'http://apikey:@localhost:8011?noconnect' )
197201
198202 def test_maxbans (self ):
199203 self .spider .zyte_smartproxy_enabled = True
@@ -218,7 +222,7 @@ def test_download_timeout(self):
218222 self ._assert_enabled (self .spider , self .settings , download_timeout = 120 )
219223
220224 def test_hooks (self ):
221- proxyauth = b'Basic Foo'
225+ proxyauth = basic_auth_header ( 'foo' , '' )
222226
223227 class _ECLS (self .mwcls ):
224228 def is_enabled (self , spider ):
@@ -241,7 +245,7 @@ def get_proxyauth(self, spider):
241245 wascalled [:] = [] # reset
242246 enabled = True
243247 self .spider .zyte_smartproxy_enabled = False
244- self ._assert_enabled (self .spider , self .settings , proxyauth = proxyauth )
248+ self ._assert_enabled (self .spider , self .settings , proxyauth = proxyauth , proxyurlcreds = 'http://foo:@proxy.zyte.com:8011' )
245249 self .assertEqual (wascalled , ['is_enabled' , 'get_proxyauth' ])
246250
247251 def test_delay_adjustment (self ):
@@ -909,3 +913,72 @@ def test_client_header(self):
909913 req .headers .get ('X-Crawlera-Client' ).decode ('utf-8' ),
910914 'scrapy-zyte-smartproxy/%s' % __version__
911915 )
916+
917+ def test_scrapy_httpproxy_integration (self ):
918+ self .spider .zyte_smartproxy_enabled = True
919+ crawler = self ._mock_crawler (self .spider , self .settings )
920+ smartproxy = self .mwcls .from_crawler (crawler )
921+ smartproxy .open_spider (self .spider )
922+ httpproxy = HttpProxyMiddleware .from_crawler (crawler )
923+ request = Request ('https://example.com' )
924+ auth_header = basic_auth_header ('apikey' , '' )
925+
926+ # 1st pass
927+ self .assertEqual (smartproxy .process_request (request , self .spider ), None )
928+ self .assertEqual (httpproxy .process_request (request , self .spider ), None )
929+ self .assertEqual (request .meta ['proxy' ], 'http://proxy.zyte.com:8011' )
930+ self .assertEqual (request .headers [b'Proxy-Authorization' ], auth_header )
931+
932+ # 2nd pass (e.g. retry or redirect)
933+ self .assertEqual (smartproxy .process_request (request , self .spider ), None )
934+ self .assertEqual (httpproxy .process_request (request , self .spider ), None )
935+ self .assertEqual (request .meta ['proxy' ], 'http://proxy.zyte.com:8011' )
936+ self .assertEqual (request .headers [b'Proxy-Authorization' ], auth_header )
937+
938+ def test_subclass_non_basic_header (self ):
939+
940+ class Subclass (self .mwcls ):
941+ def get_proxyauth (self , spider ):
942+ return b'Non-Basic foo'
943+
944+ self .spider .zyte_smartproxy_enabled = True
945+ crawler = self ._mock_crawler (self .spider , self .settings )
946+ smartproxy = Subclass .from_crawler (crawler )
947+ with pytest .raises (ValueError ):
948+ smartproxy .open_spider (self .spider )
949+
950+ def test_subclass_basic_header_non_base64 (self ):
951+
952+ class Subclass (self .mwcls ):
953+ def get_proxyauth (self , spider ):
954+ return b'Basic foo'
955+
956+ self .spider .zyte_smartproxy_enabled = True
957+ crawler = self ._mock_crawler (self .spider , self .settings )
958+ smartproxy = Subclass .from_crawler (crawler )
959+ with pytest .raises ((TypeError , binascii .Error )):
960+ smartproxy .open_spider (self .spider )
961+
962+ def test_subclass_basic_header_nonurlsafe_base64 (self ):
963+
964+ class Subclass (self .mwcls ):
965+ def get_proxyauth (self , spider ):
966+ return b'Basic YWF+Og=='
967+
968+ self .spider .zyte_smartproxy_enabled = True
969+ crawler = self ._mock_crawler (self .spider , self .settings )
970+ smartproxy = Subclass .from_crawler (crawler )
971+ smartproxy .open_spider (self .spider )
972+ self .assertEqual (smartproxy ._auth_url , "http://aa~:@proxy.zyte.com:8011" )
973+
974+ def test_subclass_basic_header_urlsafe_base64 (self ):
975+
976+ class Subclass (self .mwcls ):
977+ def get_proxyauth (self , spider ):
978+ return b'Basic YWF-Og=='
979+
980+ self .spider .zyte_smartproxy_enabled = True
981+ crawler = self ._mock_crawler (self .spider , self .settings )
982+ smartproxy = Subclass .from_crawler (crawler )
983+ smartproxy .open_spider (self .spider )
984+ self .assertEqual (smartproxy ._auth_url , "http://aa~:@proxy.zyte.com:8011" )
0 commit comments