Skip to content

Commit c23eefb

Browse files
committed
TST basic integration tests
1 parent aa07ba0 commit c23eefb

File tree

8 files changed

+295
-29
lines changed

8 files changed

+295
-29
lines changed

.travis.yml

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,35 @@
11
language: python
2-
sudo: false
2+
sudo: required
3+
4+
services:
5+
- docker
36

47
branches:
58
only:
69
- master
710
- /^\d\.\d+$/
11+
812
matrix:
9-
include:
10-
- python: 2.7
11-
env: TOXENV=py27
12-
- python: 3.4
13-
env: TOXENV=py34
14-
- python: 3.5
15-
env: TOXENV=py35
16-
- python: 3.6
17-
env: TOXENV=py36
18-
- python: 2.7
19-
env: TOXENV=py27-scrapy10
13+
include:
14+
- python: 2.7
15+
env: TOXENV=py27
16+
- python: 3.4
17+
env: TOXENV=py34
18+
- python: 3.5
19+
env: TOXENV=py35
20+
- python: 3.6
21+
env: TOXENV=py36
22+
- python: 2.7
23+
env: TOXENV=py27-scrapy10
24+
25+
before_install:
26+
- docker pull scrapinghub/splash
27+
- docker run --rm -d -p 8050:8050 --network host scrapinghub/splash
2028

2129
install:
2230
- pip install -U tox codecov
2331

24-
script: tox
32+
script: SPLASH_URL=http://127.0.0.1:8050 tox
2533

2634
after_success:
2735
- codecov

README.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -679,3 +679,9 @@ https://github.com/scrapy-plugins/scrapy-splash
679679

680680
To run tests, install "tox" Python package and then run ``tox`` command
681681
from the source checkout.
682+
683+
To run integration tests, start Splash and set SPLASH_URL env variable
684+
to Splash address before running ``tox`` command::
685+
686+
docker run -d --rm -p8050:8050 scrapinghub/splash:3.0
687+
SPLASH_URL=http://127.0.0.1:8050 tox -e py36

requirements-test.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pytest >= 3.3.2
2+
pytest-cov >= 2.5.1
3+
pytest-twisted >= 1.6
4+
hypothesis >= 3.44.14
5+
hypothesis-pytest

tests/conftest.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import os
2+
3+
import pytest
4+
from scrapy.settings import Settings
5+
6+
7+
@pytest.fixture()
8+
def settings(request):
9+
""" Default scrapy-splash settings """
10+
s = dict(
11+
# collect scraped items to .collected_items attribute
12+
ITEM_PIPELINES={
13+
'tests.utils.CollectorPipeline': 100,
14+
},
15+
16+
# scrapy-splash settings
17+
SPLASH_URL=os.environ.get('SPLASH_URL'),
18+
DOWNLOADER_MIDDLEWARES={
19+
# Engine side
20+
'scrapy_splash.SplashCookiesMiddleware': 723,
21+
'scrapy_splash.SplashMiddleware': 725,
22+
'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware': 810,
23+
# Downloader side
24+
},
25+
SPIDER_MIDDLEWARES={
26+
'scrapy_splash.SplashDeduplicateArgsMiddleware': 100,
27+
},
28+
DUPEFILTER_CLASS='scrapy_splash.SplashAwareDupeFilter',
29+
HTTPCACHE_STORAGE='scrapy_splash.SplashAwareFSCacheStorage',
30+
)
31+
return Settings(s)
32+
33+

tests/mockserver.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#!/usr/bin/env python
2+
import argparse, socket, sys, time
3+
from subprocess import Popen, PIPE
4+
from importlib import import_module
5+
6+
from twisted.internet import reactor
7+
from twisted.web.server import Site
8+
9+
10+
def get_ephemeral_port():
11+
s = socket.socket()
12+
s.bind(("", 0))
13+
return s.getsockname()[1]
14+
15+
16+
class MockServer():
17+
def __init__(self, resource, port=None):
18+
self.resource = '{}.{}'.format(resource.__module__, resource.__name__)
19+
self.proc = None
20+
host = socket.gethostbyname(socket.gethostname())
21+
self.port = port or get_ephemeral_port()
22+
self.root_url = 'http://%s:%d' % (host, self.port)
23+
24+
def __enter__(self):
25+
self.proc = Popen(
26+
[sys.executable, '-u', '-m', 'tests.mockserver',
27+
self.resource, '--port', str(self.port)],
28+
stdout=PIPE)
29+
self.proc.stdout.readline()
30+
return self
31+
32+
def __exit__(self, exc_type, exc_value, traceback):
33+
self.proc.kill()
34+
self.proc.wait()
35+
time.sleep(0.2)
36+
37+
38+
def main():
39+
parser = argparse.ArgumentParser()
40+
parser.add_argument('resource')
41+
parser.add_argument('--port', type=int)
42+
args = parser.parse_args()
43+
module_name, name = args.resource.rsplit('.', 1)
44+
sys.path.append('.')
45+
resource = getattr(import_module(module_name), name)()
46+
http_port = reactor.listenTCP(args.port, Site(resource))
47+
def print_listening():
48+
host = http_port.getHost()
49+
print('Mock server {} running at http://{}:{}'.format(
50+
resource, host.host, host.port))
51+
reactor.callWhenRunning(print_listening)
52+
reactor.run()
53+
54+
55+
if __name__ == "__main__":
56+
main()

tests/test_integration.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
# -*- coding: utf-8 -*-
2+
import scrapy
3+
from pytest_twisted import inlineCallbacks
4+
5+
from scrapy_splash import SplashRequest
6+
from .utils import crawl_items, requires_splash, HtmlResource
7+
8+
DEFAULT_SCRIPT = """
9+
function main(splash)
10+
splash:init_cookies(splash.args.cookies)
11+
assert(splash:go{
12+
splash.args.url,
13+
headers=splash.args.headers,
14+
http_method=splash.args.http_method,
15+
body=splash.args.body,
16+
})
17+
assert(splash:wait(0.5))
18+
19+
local entries = splash:history()
20+
local last_response = entries[#entries].response
21+
return {
22+
url = splash:url(),
23+
headers = last_response.headers,
24+
http_status = last_response.status,
25+
cookies = splash:get_cookies(),
26+
html = splash:html(),
27+
args = splash.args,
28+
jsvalue = splash:evaljs("1+2"),
29+
}
30+
end
31+
"""
32+
33+
34+
class HelloWorld(HtmlResource):
35+
html = """
36+
<html><body><script>document.write('hello world!');</script></body></html>
37+
"""
38+
extra_headers = {'X-MyHeader': 'my value'}
39+
40+
41+
class ResponseSpider(scrapy.Spider):
42+
""" Make a request to URL, return Scrapy response """
43+
url = None
44+
45+
def start_requests(self):
46+
yield SplashRequest(self.url)
47+
48+
def parse(self, response):
49+
yield {'response': response}
50+
51+
52+
class ReloadSpider(ResponseSpider):
53+
""" Make two requests to URL, store both responses.
54+
This spider activates both start_requests and parse methods,
55+
and checks that dupefilter takes fragment into account. """
56+
57+
def parse(self, response):
58+
yield {'response': response}
59+
yield SplashRequest(self.url + '#foo')
60+
61+
62+
class LuaScriptSpider(ResponseSpider):
63+
""" Make a request using a Lua script similar to the one from README """
64+
65+
def start_requests(self):
66+
yield SplashRequest(self.url + "#foo", endpoint='execute',
67+
args={'lua_source': DEFAULT_SCRIPT, 'foo': 'bar'})
68+
69+
70+
@requires_splash
71+
@inlineCallbacks
72+
def test_basic(settings):
73+
items, url, crawler = yield crawl_items(ResponseSpider, HelloWorld,
74+
settings)
75+
assert len(items) == 1
76+
resp = items[0]['response']
77+
assert resp.url == url
78+
assert resp.css('body::text').get().strip() == "hello world!"
79+
80+
81+
@requires_splash
82+
@inlineCallbacks
83+
def test_reload(settings):
84+
items, url, crawler = yield crawl_items(ReloadSpider, HelloWorld, settings)
85+
assert len(items) == 2
86+
assert crawler.stats.get_value('dupefilter/filtered') == 1
87+
resp = items[0]['response']
88+
assert resp.url == url
89+
assert resp.css('body::text').get().strip() == "hello world!"
90+
91+
resp2 = items[1]['response']
92+
assert resp2.body == resp.body
93+
assert resp2 is not resp
94+
assert resp2.url == resp.url + "#foo"
95+
96+
97+
@requires_splash
98+
@inlineCallbacks
99+
def test_basic_lua(settings):
100+
items, url, crawler = yield crawl_items(LuaScriptSpider, HelloWorld,
101+
settings)
102+
assert len(items) == 1
103+
resp = items[0]['response']
104+
assert resp.url == url + "/#foo"
105+
assert resp.css('body::text').get().strip() == "hello world!"
106+
assert resp.data['jsvalue'] == 3
107+
assert resp.headers['X-MyHeader'] == b'my value'
108+
assert resp.data['args']['foo'] == 'bar'

tests/utils.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# -*- coding: utf-8 -*-
2+
import os
3+
import pytest
4+
from pytest_twisted import inlineCallbacks
5+
from twisted.web.resource import Resource
6+
from scrapy.crawler import CrawlerRunner
7+
from scrapy.utils.python import to_bytes
8+
from tests.mockserver import MockServer
9+
10+
11+
requires_splash = pytest.mark.skipif(
12+
not os.environ.get('SPLASH_URL', ''),
13+
reason="set SPLASH_URL environment variable to run integrational tests"
14+
)
15+
16+
17+
class HtmlResource(Resource):
18+
isLeaf = True
19+
content_type = 'text/html'
20+
html = ''
21+
extra_headers = {}
22+
23+
def render_GET(self, request):
24+
request.setHeader(b'content-type', to_bytes(self.content_type))
25+
for name, value in self.extra_headers.items():
26+
request.setHeader(to_bytes(name), to_bytes(value))
27+
return to_bytes(self.html)
28+
29+
30+
@inlineCallbacks
31+
def crawl_items(spider_cls, resource_cls, settings, spider_kwargs=None):
32+
""" Use spider_cls to crawl resource_cls. URL of the resource is passed
33+
to the spider as ``url`` argument.
34+
Return ``(items, resource_url, crawler)`` tuple.
35+
"""
36+
spider_kwargs = {} if spider_kwargs is None else spider_kwargs
37+
crawler = make_crawler(spider_cls, settings)
38+
with MockServer(resource_cls) as s:
39+
root_url = s.root_url
40+
yield crawler.crawl(url=root_url, **spider_kwargs)
41+
return crawler.spider.collected_items, s.root_url, crawler
42+
43+
44+
def make_crawler(spider_cls, settings):
45+
if not getattr(spider_cls, 'name', None):
46+
class Spider(spider_cls):
47+
name = 'test_spider'
48+
Spider.__name__ = spider_cls.__name__
49+
Spider.__module__ = spider_cls.__module__
50+
spider_cls = Spider
51+
return CrawlerRunner(settings).create_crawler(spider_cls)
52+
53+
54+
class CollectorPipeline:
55+
def process_item(self, item, spider):
56+
if not hasattr(spider, 'collected_items'):
57+
spider.collected_items = []
58+
spider.collected_items.append(item)
59+
return item

tox.ini

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,13 @@
44
# and then run "tox" from this directory.
55

66
[tox]
7-
envlist = py27,py34,py35,py36,py27-scrapy10
8-
9-
[base]
10-
deps =
11-
pytest
12-
pytest-cov
13-
hypothesis
14-
hypothesis-pytest
7+
envlist = py27,py34,py35,py36,py27-scrapy11
158

169
[testenv]
10+
passenv = SPLASH_URL
1711
deps =
12+
-rrequirements-test.txt
1813
-rrequirements.txt
19-
{[base]deps}
2014

2115
commands =
2216
pip install -e .
@@ -25,11 +19,8 @@ commands =
2519
[testenv:py34]
2620
basepython = python3.4
2721
deps =
22+
-rrequirements-test.txt
2823
-rrequirements-py3.txt
29-
pytest
30-
pytest-cov
31-
hypothesis
32-
hypothesis-pytest
3324

3425
[testenv:py35]
3526
basepython = python3.5
@@ -39,8 +30,8 @@ deps = {[testenv:py34]deps}
3930
basepython = python3.6
4031
deps = {[testenv:py34]deps}
4132

42-
[testenv:py27-scrapy10]
33+
[testenv:py27-scrapy11]
4334
deps =
44-
scrapy < 1.1
35+
-rrequirements-test.txt
36+
scrapy == 1.1.4
4537
service_identity
46-
{[base]deps}

0 commit comments

Comments
 (0)