Skip to content

Commit 9caf83d

Browse files
committed
Fail gracefully on SPLASH_URL without protocol
1 parent e40ca4f commit 9caf83d

File tree

2 files changed

+20
-0
lines changed

2 files changed

+20
-0
lines changed

scrapy_splash/middleware.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import copy
55
import json
66
import logging
7+
import re
78
import warnings
89
from collections import defaultdict
910

@@ -233,6 +234,11 @@ def __init__(self, crawler, splash_base_url, slot_policy, log_400):
233234
def from_crawler(cls, crawler):
234235
splash_base_url = crawler.settings.get('SPLASH_URL',
235236
cls.default_splash_url)
237+
if not re.match('^https?://', splash_base_url):
238+
raise NotConfigured(
239+
'The SPLASH_URL setting does not start with http:// or '
240+
'https://: {}'.format(splash_base_url)
241+
)
236242
log_400 = crawler.settings.getbool('SPLASH_LOG_400', True)
237243
slot_policy = crawler.settings.get('SPLASH_SLOT_POLICY',
238244
cls.default_policy)

tests/test_middleware.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@
44
import json
55
import base64
66

7+
from pytest import raises
78
import scrapy
89
from scrapy.core.engine import ExecutionEngine
10+
from scrapy.exceptions import NotConfigured
911
from scrapy.utils.test import get_crawler
1012
from scrapy.http import Response, TextResponse
1113
from scrapy.downloadermiddlewares.httpcache import HttpCacheMiddleware
@@ -765,3 +767,15 @@ def test_adjust_timeout():
765767
})
766768
req2 = mw.process_request(req2, None)
767769
assert req2.meta['download_timeout'] == 30
770+
771+
772+
def test_bad_splash_url():
773+
crawler = _get_crawler({'SPLASH_URL': 'localhost:1234'})
774+
with raises(NotConfigured):
775+
mw = SplashMiddleware.from_crawler(crawler)
776+
777+
778+
def test_bad_slot_policy():
779+
crawler = _get_crawler({'SPLASH_SLOT_POLICY': 'asdf'})
780+
with raises(NotConfigured):
781+
mw = SplashMiddleware.from_crawler(crawler)

0 commit comments

Comments
 (0)