Skip to content

Commit 74b2d6e

Browse files
committed
don't handle 498 HTTP responses from remote websites; more tests for cache_args
it requires a proper Lua script to work
1 parent 99066e2 commit 74b2d6e

File tree

2 files changed

+34
-2
lines changed

2 files changed

+34
-2
lines changed

scrapy_splash/middleware.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,7 @@ def process_response(self, request, response, spider):
380380

381381
# handle save_args/load_args
382382
self._process_x_splash_saved_arguments(request, response)
383-
if response.status == 498:
383+
if get_splash_status(response) == 498:
384384
logger.debug("Got HTTP 498 response for {}; "
385385
"sending arguments again.".format(request),
386386
extra={'spider': spider})
@@ -424,7 +424,7 @@ def _log_400(self, request, response, spider):
424424

425425
def _process_x_splash_saved_arguments(self, request, response):
426426
""" Keep track of arguments saved by Splash. """
427-
saved_args = response.headers.get(b'X-Splash-Saved-Arguments')
427+
saved_args = get_splash_headers(response).get(b'X-Splash-Saved-Arguments')
428428
if not saved_args:
429429
return
430430
saved_args = parse_x_splash_saved_arguments_header(saved_args)

tests/test_integration.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,38 @@ def start_requests(self):
167167
assert resp.splash_response_status == 200
168168

169169

170+
@requires_splash
171+
@inlineCallbacks
172+
def test_cache_args(settings):
173+
174+
class CacheArgsSpider(ResponseSpider):
175+
def _request(self, url):
176+
return SplashRequest(url, endpoint='execute',
177+
args={'lua_source': DEFAULT_SCRIPT, 'x': 'yy'},
178+
cache_args=['lua_source'])
179+
180+
def start_requests(self):
181+
yield self._request(self.url)
182+
183+
def parse(self, response):
184+
yield {'response': response}
185+
yield self._request(self.url + "#foo")
186+
187+
188+
items, url, crawler = yield crawl_items(CacheArgsSpider, HelloWorld,
189+
settings)
190+
assert len(items) == 2
191+
resp = items[0]['response']
192+
assert b"function main(splash)" in resp.request.body
193+
assert b"yy" in resp.request.body
194+
print(resp.body, resp.request.body)
195+
196+
resp = items[1]['response']
197+
assert b"function main(splash)" not in resp.request.body
198+
assert b"yy" in resp.request.body
199+
print(resp.body, resp.request.body)
200+
201+
170202
@requires_splash
171203
@inlineCallbacks
172204
def test_cookies(settings):

0 commit comments

Comments
 (0)