|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | +import scrapy |
| 3 | +from pytest_twisted import inlineCallbacks |
| 4 | + |
| 5 | +from scrapy_splash import SplashRequest |
| 6 | +from .utils import crawl_items, requires_splash, HtmlResource |
| 7 | + |
| 8 | +DEFAULT_SCRIPT = """ |
| 9 | +function main(splash) |
| 10 | + splash:init_cookies(splash.args.cookies) |
| 11 | + assert(splash:go{ |
| 12 | + splash.args.url, |
| 13 | + headers=splash.args.headers, |
| 14 | + http_method=splash.args.http_method, |
| 15 | + body=splash.args.body, |
| 16 | + }) |
| 17 | + assert(splash:wait(0.5)) |
| 18 | +
|
| 19 | + local entries = splash:history() |
| 20 | + local last_response = entries[#entries].response |
| 21 | + return { |
| 22 | + url = splash:url(), |
| 23 | + headers = last_response.headers, |
| 24 | + http_status = last_response.status, |
| 25 | + cookies = splash:get_cookies(), |
| 26 | + html = splash:html(), |
| 27 | + args = splash.args, |
| 28 | + jsvalue = splash:evaljs("1+2"), |
| 29 | + } |
| 30 | +end |
| 31 | +""" |
| 32 | + |
| 33 | + |
| 34 | +class HelloWorld(HtmlResource): |
| 35 | + html = """ |
| 36 | + <html><body><script>document.write('hello world!');</script></body></html> |
| 37 | + """ |
| 38 | + extra_headers = {'X-MyHeader': 'my value'} |
| 39 | + |
| 40 | + |
| 41 | +class ResponseSpider(scrapy.Spider): |
| 42 | + """ Make a request to URL, return Scrapy response """ |
| 43 | + url = None |
| 44 | + |
| 45 | + def start_requests(self): |
| 46 | + yield SplashRequest(self.url) |
| 47 | + |
| 48 | + def parse(self, response): |
| 49 | + yield {'response': response} |
| 50 | + |
| 51 | + |
| 52 | +class ReloadSpider(ResponseSpider): |
| 53 | + """ Make two requests to URL, store both responses. |
| 54 | + This spider activates both start_requests and parse methods, |
| 55 | + and checks that dupefilter takes fragment into account. """ |
| 56 | + |
| 57 | + def parse(self, response): |
| 58 | + yield {'response': response} |
| 59 | + yield SplashRequest(self.url + '#foo') |
| 60 | + |
| 61 | + |
| 62 | +class LuaScriptSpider(ResponseSpider): |
| 63 | + """ Make a request using a Lua script similar to the one from README """ |
| 64 | + |
| 65 | + def start_requests(self): |
| 66 | + yield SplashRequest(self.url + "#foo", endpoint='execute', |
| 67 | + args={'lua_source': DEFAULT_SCRIPT, 'foo': 'bar'}) |
| 68 | + |
| 69 | + |
| 70 | +@requires_splash |
| 71 | +@inlineCallbacks |
| 72 | +def test_basic(settings): |
| 73 | + items, url, crawler = yield crawl_items(ResponseSpider, HelloWorld, |
| 74 | + settings) |
| 75 | + assert len(items) == 1 |
| 76 | + resp = items[0]['response'] |
| 77 | + assert resp.url == url |
| 78 | + assert resp.css('body::text').get().strip() == "hello world!" |
| 79 | + |
| 80 | + |
| 81 | +@requires_splash |
| 82 | +@inlineCallbacks |
| 83 | +def test_reload(settings): |
| 84 | + items, url, crawler = yield crawl_items(ReloadSpider, HelloWorld, settings) |
| 85 | + assert len(items) == 2 |
| 86 | + assert crawler.stats.get_value('dupefilter/filtered') == 1 |
| 87 | + resp = items[0]['response'] |
| 88 | + assert resp.url == url |
| 89 | + assert resp.css('body::text').get().strip() == "hello world!" |
| 90 | + |
| 91 | + resp2 = items[1]['response'] |
| 92 | + assert resp2.body == resp.body |
| 93 | + assert resp2 is not resp |
| 94 | + assert resp2.url == resp.url + "#foo" |
| 95 | + |
| 96 | + |
| 97 | +@requires_splash |
| 98 | +@inlineCallbacks |
| 99 | +def test_basic_lua(settings): |
| 100 | + items, url, crawler = yield crawl_items(LuaScriptSpider, HelloWorld, |
| 101 | + settings) |
| 102 | + assert len(items) == 1 |
| 103 | + resp = items[0]['response'] |
| 104 | + assert resp.url == url + "/#foo" |
| 105 | + assert resp.css('body::text').get().strip() == "hello world!" |
| 106 | + assert resp.data['jsvalue'] == 3 |
| 107 | + assert resp.headers['X-MyHeader'] == b'my value' |
| 108 | + assert resp.data['args']['foo'] == 'bar' |
0 commit comments