Skip to content

Commit f618587

Browse files
committed
Merge pull request #1 from donspaulding/master
Refactor the loading of the JSON files
2 parents 6af29d9 + 5759b53 commit f618587

File tree

3 files changed

+52
-35
lines changed

3 files changed

+52
-35
lines changed

python/referer_parser/__init__.py

Lines changed: 39 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,49 @@
11
import os
22
import json
3-
from urlparse import urlparse, parse_qsl
43

4+
try:
5+
from urlparse import urlparse, parse_qsl
6+
iteritems = lambda dikt: dikt.iteritems()
7+
text_type = unicode
8+
except ImportError: # urlparse was renamed urllib.parse in Python 3
9+
from urllib.parse import urlparse, parse_qsl
10+
iteritems = lambda dikt: dikt.items()
11+
text_type = str
512

6-
JSON_FILE = os.path.join(os.path.dirname(__file__), 'data', 'referers.json')
7-
REFERERS = {}
8-
9-
with open(JSON_FILE) as json_content:
10-
for medium, conf_list in json.load(json_content).iteritems():
11-
for ref, config in conf_list.iteritems():
12-
make_ref = None
13-
if 'parameters' in config:
14-
def make_ref_params(config_dict):
15-
return {
16-
'name': ref,
17-
'params': map(unicode.lower, config_dict['parameters']),
18-
'medium': medium,
13+
def load_referers(json_file):
14+
referers_dict = {}
15+
with open(json_file) as json_content:
16+
for medium, conf_list in iteritems(json.load(json_content)):
17+
for referer_name, config in iteritems(conf_list):
18+
params = None
19+
if 'parameters' in config:
20+
params = list(map(text_type.lower, config['parameters']))
21+
for domain in config['domains']:
22+
referers_dict[domain] = {
23+
'name': referer_name,
24+
'medium': medium
1925
}
20-
make_ref = make_ref_params
21-
else:
22-
make_ref = lambda _: {'name': ref, 'medium': medium}
23-
for domain in config['domains']:
24-
REFERERS[domain] = make_ref(config)
26+
if params is not None:
27+
referers_dict[domain]['params'] = params
28+
return referers_dict
2529

30+
JSON_FILE = os.path.join(os.path.dirname(__file__), 'data', 'referers.json')
31+
REFERERS = load_referers(JSON_FILE)
2632

2733
class Referer(object):
28-
def __init__(self, ref_url, curr_url=None):
34+
def __init__(self, ref_url, curr_url=None, referers=REFERERS):
2935
self.known = False
3036
self.referer = None
3137
self.medium = 'unknown'
3238
self.search_parameter = None
3339
self.search_term = None
40+
self.referers = referers
3441

3542
ref_uri = urlparse(ref_url)
3643
ref_host = ref_uri.hostname
3744
self.known = ref_uri.scheme in {'http', 'https'}
3845
self.uri = ref_uri
3946

40-
# print "Scheme: %s" % ref_uri.scheme
41-
4247
if not self.known:
4348
return
4449

@@ -49,48 +54,47 @@ def __init__(self, ref_url, curr_url=None):
4954
self.medium = 'internal'
5055
return
5156

52-
# print "Getting referer with path"
53-
referer = self.__lookup_referer(ref_host, ref_uri.path, True)
54-
# print "Got %s" % referer
57+
referer = self._lookup_referer(ref_host, ref_uri.path, True)
5558
if not referer:
56-
# print "Getting referer without path"
57-
referer = self.__lookup_referer(ref_host, ref_uri.path, False)
58-
# print "Got %s" % referer
59+
referer = self._lookup_referer(ref_host, ref_uri.path, False)
5960
if not referer:
6061
self.medium = 'unknown'
6162
return
6263

63-
# print "Assigning name %s" % referer['name']
6464
self.referer = referer['name']
6565
self.medium = referer['medium']
6666

6767
if referer['medium'] == 'search':
6868
if 'params' not in referer or not referer['params']:
69-
# print "Returning"
7069
return
7170
for param, val in parse_qsl(ref_uri.query):
7271
if param.lower() in referer['params']:
7372
self.search_parameter = param
7473
self.search_term = val
7574

76-
def __lookup_referer(self, ref_host, ref_path, include_path):
75+
def _lookup_referer(self, ref_host, ref_path, include_path):
7776
referer = None
7877
try:
79-
referer = REFERERS[ref_host + ref_path] if include_path else REFERERS[ref_host]
78+
if include_path:
79+
referer = self.referers[ref_host + ref_path]
80+
else:
81+
referer = self.referers[ref_host]
8082
except KeyError:
8183
if include_path:
8284
path_parts = ref_path.split('/')
8385
if len(path_parts) > 1:
8486
try:
85-
referer = REFERERS[ref_host + '/' + path_parts[1]]
87+
referer = self.referers[ref_host + '/' + path_parts[1]]
8688
except KeyError:
8789
pass
8890
if not referer:
8991
try:
9092
idx = ref_host.index('.')
91-
return self.__lookup_referer(ref_host[idx + 1:], ref_path, include_path)
93+
return self._lookup_referer(
94+
ref_host[idx + 1:],
95+
ref_path, include_path
96+
)
9297
except ValueError:
9398
return None
9499
else:
95100
return referer
96-

python/tox.ini

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Tox (http://tox.testrun.org/) is a tool for running tests
2+
# in multiple virtualenvs. This configuration file will run the
3+
# test suite on all supported python versions. To use it, "pip install tox"
4+
# and then run "tox" from this directory.
5+
6+
[tox]
7+
#envlist = py27, py33, pypy, jython
8+
envlist = py27
9+
10+
[testenv]
11+
commands = nosetests referer_parser.test
12+
deps =
13+
nose

0 commit comments

Comments
 (0)