1010DEFAULT_SCRIPT = """
1111function main(splash)
1212 splash:init_cookies(splash.args.cookies)
13- assert( splash:go{
13+ splash:go{
1414 splash.args.url,
1515 headers=splash.args.headers,
1616 http_method=splash.args.http_method,
1717 body=splash.args.body,
18- })
19- assert(splash:wait(0.5))
18+ }
19+ local wait = tonumber(splash.args.wait or 0.5)
20+ assert(splash:wait(wait))
2021
2122 local entries = splash:history()
2223 local last_response = entries[#entries].response
@@ -40,6 +41,11 @@ class HelloWorld(HtmlResource):
4041 extra_headers = {'X-MyHeader' : 'my value' , 'Set-Cookie' : 'sessionid=ABCD' }
4142
4243
44+ class Http400Resource (HtmlResource ):
45+ status_code = 400
46+ html = "Website returns HTTP 400 error"
47+
48+
4349
4450class ManyCookies (Resource , object ):
4551 class SetMyCookie (HtmlResource ):
@@ -94,6 +100,9 @@ def parse(self, response):
94100 resp = items [0 ]['response' ]
95101 assert resp .url == url
96102 assert resp .css ('body::text' ).get ().strip () == "hello world!"
103+ assert resp .status == resp .splash_response_status == 200
104+ assert resp .headers == resp .splash_response_headers
105+ assert resp .splash_response_headers ['Content-Type' ] == b"text/html; charset=utf-8"
97106
98107 resp2 = items [1 ]['response' ]
99108 assert resp2 .body == resp .body
@@ -118,12 +127,78 @@ def start_requests(self):
118127 assert len (items ) == 1
119128 resp = items [0 ]['response' ]
120129 assert resp .url == url + "/#foo"
130+ assert resp .status == resp .splash_response_status == 200
121131 assert resp .css ('body::text' ).get ().strip () == "hello world!"
122132 assert resp .data ['jsvalue' ] == 3
123133 assert resp .headers ['X-MyHeader' ] == b'my value'
134+ assert resp .headers ['Content-Type' ] == b'text/html'
135+ assert resp .splash_response_headers ['Content-Type' ] == b'application/json'
124136 assert resp .data ['args' ]['foo' ] == 'bar'
125137
126138
139+ @requires_splash
140+ @inlineCallbacks
141+ def test_bad_request (settings ):
142+ class BadRequestSpider (ResponseSpider ):
143+ custom_settings = {'HTTPERROR_ALLOW_ALL' : True }
144+
145+ def start_requests (self ):
146+ yield SplashRequest (self .url , endpoint = 'execute' ,
147+ args = {'lua_source' : DEFAULT_SCRIPT , 'wait' : 'bar' })
148+
149+ class GoodRequestSpider (ResponseSpider ):
150+ custom_settings = {'HTTPERROR_ALLOW_ALL' : True }
151+
152+ def start_requests (self ):
153+ yield SplashRequest (self .url , endpoint = 'execute' ,
154+ args = {'lua_source' : DEFAULT_SCRIPT })
155+
156+
157+ items , url , crawler = yield crawl_items (BadRequestSpider , HelloWorld ,
158+ settings )
159+ resp = items [0 ]['response' ]
160+ assert resp .status == 400
161+ assert resp .splash_response_status == 400
162+
163+ items , url , crawler = yield crawl_items (GoodRequestSpider , Http400Resource ,
164+ settings )
165+ resp = items [0 ]['response' ]
166+ assert resp .status == 400
167+ assert resp .splash_response_status == 200
168+
169+
170+ @requires_splash
171+ @inlineCallbacks
172+ def test_cache_args (settings ):
173+
174+ class CacheArgsSpider (ResponseSpider ):
175+ def _request (self , url ):
176+ return SplashRequest (url , endpoint = 'execute' ,
177+ args = {'lua_source' : DEFAULT_SCRIPT , 'x' : 'yy' },
178+ cache_args = ['lua_source' ])
179+
180+ def start_requests (self ):
181+ yield self ._request (self .url )
182+
183+ def parse (self , response ):
184+ yield {'response' : response }
185+ yield self ._request (self .url + "#foo" )
186+
187+
188+ items , url , crawler = yield crawl_items (CacheArgsSpider , HelloWorld ,
189+ settings )
190+ assert len (items ) == 2
191+ resp = items [0 ]['response' ]
192+ assert b"function main(splash)" in resp .request .body
193+ assert b"yy" in resp .request .body
194+ print (resp .body , resp .request .body )
195+
196+ resp = items [1 ]['response' ]
197+ assert b"function main(splash)" not in resp .request .body
198+ assert b"yy" in resp .request .body
199+ print (resp .body , resp .request .body )
200+
201+
127202@requires_splash
128203@inlineCallbacks
129204def test_cookies (settings ):
@@ -171,7 +246,6 @@ def parse_3(self, response):
171246 args = {'lua_source' : DEFAULT_SCRIPT },
172247 cookies = {'bomb' : BOMB })
173248
174-
175249 def parse_4 (self , response ):
176250 yield {'response' : response }
177251
@@ -185,19 +259,19 @@ def _cookie_dict(har_cookies):
185259
186260 # cookie should be sent to remote website, not to Splash
187261 resp = items [0 ]['response' ]
188- splash_headers = resp .request .headers
262+ splash_request_headers = resp .request .headers
189263 cookies = resp .data ['args' ]['cookies' ]
190- print (splash_headers )
264+ print (splash_request_headers )
191265 print (cookies )
192266 assert _cookie_dict (cookies ) == {
193267 # 'login': '1', # FIXME
194268 'x-set-splash' : '1'
195269 }
196- assert splash_headers .get (b'Cookie' ) is None
270+ assert splash_request_headers .get (b'Cookie' ) is None
197271
198272 # new cookie should be also sent to remote website, not to Splash
199273 resp2 = items [1 ]['response' ]
200- splash_headers = resp2 .request .headers
274+ splash_request_headers = resp2 .request .headers
201275 headers = resp2 .data ['args' ]['headers' ]
202276 cookies = resp2 .data ['args' ]['cookies' ]
203277 assert canonicalize_url (headers ['Referer' ]) == canonicalize_url (url )
@@ -206,29 +280,29 @@ def _cookie_dict(har_cookies):
206280 'x-set-splash' : '1' ,
207281 'sessionid' : 'ABCD'
208282 }
209- print (splash_headers )
283+ print (splash_request_headers )
210284 print (headers )
211285 print (cookies )
212- assert splash_headers .get (b'Cookie' ) is None
286+ assert splash_request_headers .get (b'Cookie' ) is None
213287
214288 # TODO/FIXME: Cookies fetched when working with Splash should be picked up
215289 # by Scrapy
216290 resp3 = items [2 ]['response' ]
217- splash_headers = resp3 .request .headers
218- cookie_header = splash_headers .get (b'Cookie' )
291+ splash_request_headers = resp3 .request .headers
292+ cookie_header = splash_request_headers .get (b'Cookie' )
219293 assert b'x-set-scrapy=1' in cookie_header
220294 assert b'login=1' in cookie_header
221295 assert b'x-set-splash=1' in cookie_header
222296 # assert b'sessionid=ABCD' in cookie_header # FIXME
223297
224298 # cookie bomb shouldn't cause problems
225299 resp4 = items [3 ]['response' ]
226- splash_headers = resp4 .request .headers
300+ splash_request_headers = resp4 .request .headers
227301 cookies = resp4 .data ['args' ]['cookies' ]
228302 assert _cookie_dict (cookies ) == {
229303 # 'login': '1',
230304 'x-set-splash' : '1' ,
231305 'sessionid' : 'ABCD' ,
232306 'bomb' : BOMB ,
233307 }
234- assert splash_headers .get (b'Cookie' ) is None
308+ assert splash_request_headers .get (b'Cookie' ) is None
0 commit comments