Skip to content

Commit 58ccf93

Browse files
committed
Fix handling urls without hostname
For weird URLs like "http:something" host is None but scheme is "http". I believe we should not fail for such urls. Before the patch we had the following exception: ``` Traceback (most recent call last): File "referer_parser/test/__init__.py", line 204, in test_no_host r = Referer("http:some/path") File "referer_parser/__init__.py", line 57, in __init__ referer = self._lookup_referer(ref_host, ref_uri.path, True) File "referer_parser/__init__.py", line 79, in _lookup_referer referer = self.referers[ref_host + ref_path] TypeError: unsupported operand type(s) for +: 'NoneType' and 'str' ```
1 parent dbc3cec commit 58ccf93

File tree

2 files changed

+16
-6
lines changed

2 files changed

+16
-6
lines changed

python/referer_parser/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ def __init__(self, ref_url, curr_url=None, referers=REFERERS):
4141

4242
ref_uri = urlparse(ref_url)
4343
ref_host = ref_uri.hostname
44-
self.known = ref_uri.scheme in {'http', 'https'}
44+
self.known = (ref_uri.scheme in {'http', 'https'} and
45+
ref_host is not None)
4546
self.uri = ref_uri
4647

4748
if not self.known:
@@ -63,7 +64,7 @@ def __init__(self, ref_url, curr_url=None, referers=REFERERS):
6364

6465
self.referer = referer['name']
6566
self.medium = referer['medium']
66-
67+
6768
if referer['medium'] == 'search':
6869
if 'params' not in referer or not referer['params']:
6970
return

python/referer_parser/test/__init__.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def test_google_minimal(self):
2323
self.assertEqual(r.referer, 'Google')
2424
self.assertIsNone(r.search_term)
2525
self.assertEqual(r.medium, 'search')
26-
26+
2727
def test_google_term(self):
2828
""" Google search #2
2929
"""
@@ -115,7 +115,7 @@ def test_ask_search(self):
115115
dm=ctry&atb=sysid%3D406%3Aappid%3D113%3Auid%3D8f40f651e7b608b5%3Auc%3D1346336505%3Aqu%3D\
116116
pendulums%3Asrc%3Dcrt%3Ao%3D1921&locale=en_GB""")
117117
self.check_equals(r, 'Ask', 'pendulums', 'search')
118-
118+
119119
def test_mailru_search(self):
120120
""" Mail.ru search
121121
"""
@@ -184,7 +184,7 @@ def test_outlookcom_mail(self):
184184
"""
185185
r = Referer("http://co106w.col106.mail.live.com/default.aspx?rru=inbox")
186186
self.check_no_term(r, 'Outlook.com', 'email')
187-
187+
188188
def test_orange_webmail(self):
189189
""" Orange Webmail
190190
"""
@@ -193,12 +193,21 @@ def test_orange_webmail(self):
193193
self.check_no_term(r, 'Orange Webmail', 'email')
194194

195195
def test_internal(self):
196-
r = Referer("http://www.snowplowanalytics.com/about/team",
196+
r = Referer("http://www.snowplowanalytics.com/about/team",
197197
"http://www.snowplowanalytics.com/account/profile")
198198
self.assertTrue(r.known)
199199
self.assertEqual(r.medium, 'internal')
200200
self.assertIsNone(r.search_term)
201201
self.assertIsNone(r.referer)
202202

203+
def test_no_host(self):
204+
r = Referer("http:some/path")
205+
self.assertFalse(r.known)
206+
self.assertIsNone(r.uri.hostname)
207+
self.assertEqual(r.medium, 'unknown')
208+
self.assertIsNone(r.search_term)
209+
self.assertIsNone(r.referer)
210+
211+
203212
if __name__ == '__main__':
204213
unittest.main()

0 commit comments

Comments
 (0)