Skip to content

Commit bfba9ad

Browse files
committed
response.splash_response_status and response.splash_response_headers
1 parent ee5000d commit bfba9ad

File tree

4 files changed

+48
-21
lines changed

4 files changed

+48
-21
lines changed

README.rst

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,9 @@ to set ``meta['splash']['args']`` use ``SplashRequest(..., args=myargs)``.
271271
and ``assert(splash:go(..))`` fails with an HTTP error
272272
response.status is also set to HTTP error code.
273273

274+
Original URL, status and headers are available as ``response.real_url``,
275+
``response.splash_response_status`` and ``response.splash_response_headers``.
276+
274277
This option is set to True by default if you use SplashRequest.
275278
``render.json`` and ``execute`` endpoints may not have all the necessary
276279
keys/values in the response.
@@ -631,7 +634,9 @@ aware of:
631634

632635
3. As seen by Scrapy, response.url is an URL of the Splash server.
633636
scrapy-splash fixes it to be an URL of a requested page.
634-
"Real" URL is still available as ``response.real_url``.
637+
"Real" URL is still available as ``response.real_url``. scrapy-splash also
638+
allows to handle ``response.status`` and ``response.headers`` transparently
639+
on Scrapy side.
635640

636641
4. Some options depend on each other - for example, if you use timeout_
637642
Splash option then you may want to set ``download_timeout``

scrapy_splash/response.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,23 @@ def __init__(self, url, *args, **kwargs):
3030
if _url is not None:
3131
self.real_url = url
3232
url = _url
33+
self.splash_response_status = kwargs.pop('splash_response_status',
34+
None)
35+
self.splash_response_headers = kwargs.pop('splash_response_headers',
36+
None)
3337
super(_SplashResponseMixin, self).__init__(url, *args, **kwargs)
38+
if self.splash_response_status is None:
39+
self.splash_response_status = self.status
40+
if self.splash_response_headers is None:
41+
self.splash_response_headers = self.headers.copy()
3442

3543
def replace(self, *args, **kwargs):
3644
"""Create a new Response with the same attributes except for those
3745
given new values.
3846
"""
3947
for x in ['url', 'status', 'headers', 'body', 'request', 'flags',
40-
'real_url']:
48+
'real_url', 'splash_response_status',
49+
'splash_response_headers']:
4150
kwargs.setdefault(x, getattr(self, x))
4251
cls = kwargs.pop('cls', self.__class__)
4352
return cls(*args, **kwargs)
@@ -80,11 +89,14 @@ class SplashJsonResponse(SplashResponse):
8089
(['splash']['magic_response'] is not False), several other response
8190
attributes (headers, body, url, status code) are set automatically:
8291
83-
* response.headers are filled from 'headers' keys;
84-
* response.url is set to the value of 'url' key;
92+
* response.url is set to the value of 'url' key, original url is
93+
available as ``responce.real_url``;
94+
* response.headers are filled from 'headers' keys; original headers are
95+
available as ``response.splash_response_headers``;
96+
* response.status is set from the value of 'http_status' key; original
97+
status is available as ``response.splash_response_status``;
8598
* response.body is set to the value of 'html' key,
8699
or to base64-decoded value of 'body' key;
87-
* response.status is set from the value of 'http_status' key.
88100
"""
89101
def __init__(self, *args, **kwargs):
90102
self.cookiejar = None

tests/test_integration.py

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,14 @@
1010
DEFAULT_SCRIPT = """
1111
function main(splash)
1212
splash:init_cookies(splash.args.cookies)
13-
assert(splash:go{
13+
splash:go{
1414
splash.args.url,
1515
headers=splash.args.headers,
1616
http_method=splash.args.http_method,
1717
body=splash.args.body,
18-
})
19-
assert(splash:wait(0.5))
18+
}
19+
local wait = tonumber(splash.args.wait or 0.5)
20+
assert(splash:wait(wait))
2021
2122
local entries = splash:history()
2223
local last_response = entries[#entries].response
@@ -94,6 +95,9 @@ def parse(self, response):
9495
resp = items[0]['response']
9596
assert resp.url == url
9697
assert resp.css('body::text').get().strip() == "hello world!"
98+
assert resp.status == resp.splash_response_status == 200
99+
assert resp.headers == resp.splash_response_headers
100+
assert resp.splash_response_headers['Content-Type'] == b"text/html; charset=utf-8"
97101

98102
resp2 = items[1]['response']
99103
assert resp2.body == resp.body
@@ -118,9 +122,12 @@ def start_requests(self):
118122
assert len(items) == 1
119123
resp = items[0]['response']
120124
assert resp.url == url + "/#foo"
125+
assert resp.status == resp.splash_response_status == 200
121126
assert resp.css('body::text').get().strip() == "hello world!"
122127
assert resp.data['jsvalue'] == 3
123128
assert resp.headers['X-MyHeader'] == b'my value'
129+
assert resp.headers['Content-Type'] == b'text/html'
130+
assert resp.splash_response_headers['Content-Type'] == b'application/json'
124131
assert resp.data['args']['foo'] == 'bar'
125132

126133

@@ -171,7 +178,6 @@ def parse_3(self, response):
171178
args={'lua_source': DEFAULT_SCRIPT},
172179
cookies={'bomb': BOMB})
173180

174-
175181
def parse_4(self, response):
176182
yield {'response': response}
177183

@@ -185,19 +191,19 @@ def _cookie_dict(har_cookies):
185191

186192
# cookie should be sent to remote website, not to Splash
187193
resp = items[0]['response']
188-
splash_headers = resp.request.headers
194+
splash_request_headers = resp.request.headers
189195
cookies = resp.data['args']['cookies']
190-
print(splash_headers)
196+
print(splash_request_headers)
191197
print(cookies)
192198
assert _cookie_dict(cookies) == {
193199
# 'login': '1', # FIXME
194200
'x-set-splash': '1'
195201
}
196-
assert splash_headers.get(b'Cookie') is None
202+
assert splash_request_headers.get(b'Cookie') is None
197203

198204
# new cookie should be also sent to remote website, not to Splash
199205
resp2 = items[1]['response']
200-
splash_headers = resp2.request.headers
206+
splash_request_headers = resp2.request.headers
201207
headers = resp2.data['args']['headers']
202208
cookies = resp2.data['args']['cookies']
203209
assert canonicalize_url(headers['Referer']) == canonicalize_url(url)
@@ -206,29 +212,29 @@ def _cookie_dict(har_cookies):
206212
'x-set-splash': '1',
207213
'sessionid': 'ABCD'
208214
}
209-
print(splash_headers)
215+
print(splash_request_headers)
210216
print(headers)
211217
print(cookies)
212-
assert splash_headers.get(b'Cookie') is None
218+
assert splash_request_headers.get(b'Cookie') is None
213219

214220
# TODO/FIXME: Cookies fetched when working with Splash should be picked up
215221
# by Scrapy
216222
resp3 = items[2]['response']
217-
splash_headers = resp3.request.headers
218-
cookie_header = splash_headers.get(b'Cookie')
223+
splash_request_headers = resp3.request.headers
224+
cookie_header = splash_request_headers.get(b'Cookie')
219225
assert b'x-set-scrapy=1' in cookie_header
220226
assert b'login=1' in cookie_header
221227
assert b'x-set-splash=1' in cookie_header
222228
# assert b'sessionid=ABCD' in cookie_header # FIXME
223229

224230
# cookie bomb shouldn't cause problems
225231
resp4 = items[3]['response']
226-
splash_headers = resp4.request.headers
232+
splash_request_headers = resp4.request.headers
227233
cookies = resp4.data['args']['cookies']
228234
assert _cookie_dict(cookies) == {
229235
# 'login': '1',
230236
'x-set-splash': '1',
231237
'sessionid': 'ABCD',
232238
'bomb': BOMB,
233239
}
234-
assert splash_headers.get(b'Cookie') is None
240+
assert splash_request_headers.get(b'Cookie') is None

tests/test_middleware.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,8 @@ def cb():
188188
assert response2.text == response2.body_as_unicode() == res_body
189189
assert response2.encoding == 'utf8'
190190
assert response2.headers == {b'Content-Type': [b'application/json']}
191-
assert response2.status == 200
191+
assert response2.splash_response_headers == response2.headers
192+
assert response2.status == response2.splash_response_status == 200
192193

193194

194195
def test_magic_response():
@@ -233,7 +234,9 @@ def test_magic_response():
233234
b'X-My-Header': [b'foo'],
234235
b'Set-Cookie': [b'bar=baz'],
235236
}
237+
assert resp2.splash_response_headers == {b'Content-Type': [b'application/json']}
236238
assert resp2.status == 404
239+
assert resp2.splash_response_status == 200
237240
assert resp2.url == "http://exmaple.com/#id42"
238241
assert len(resp2.cookiejar) == 3
239242
cookies = [c for c in resp2.cookiejar]
@@ -359,7 +362,8 @@ def test_magic_response2():
359362
assert resp2.data == resp_data
360363
assert resp2.body == b'binary data'
361364
assert resp2.headers == {b'Content-Type': [b'text/plain']}
362-
assert resp2.status == 200
365+
assert resp2.splash_response_headers == {b'Content-Type': [b'application/json']}
366+
assert resp2.status == resp2.splash_response_status == 200
363367
assert resp2.url == "http://example.com/"
364368

365369

0 commit comments

Comments
 (0)