1
1
import os
2
2
import json
3
- from urlparse import urlparse , parse_qsl
4
3
4
+ try :
5
+ from urlparse import urlparse , parse_qsl
6
+ iteritems = lambda dikt : dikt .iteritems ()
7
+ text_type = unicode
8
+ except ImportError : # urlparse was renamed urllib.parse in Python 3
9
+ from urllib .parse import urlparse , parse_qsl
10
+ iteritems = lambda dikt : dikt .items ()
11
+ text_type = str
5
12
6
- JSON_FILE = os .path .join (os .path .dirname (__file__ ), 'data' , 'referers.json' )
7
- REFERERS = {}
8
-
9
- with open (JSON_FILE ) as json_content :
10
- for medium , conf_list in json .load (json_content ).iteritems ():
11
- for ref , config in conf_list .iteritems ():
12
- make_ref = None
13
- if 'parameters' in config :
14
- def make_ref_params (config_dict ):
15
- return {
16
- 'name' : ref ,
17
- 'params' : map (unicode .lower , config_dict ['parameters' ]),
18
- 'medium' : medium ,
13
+ def load_referers (json_file ):
14
+ referers_dict = {}
15
+ with open (json_file ) as json_content :
16
+ for medium , conf_list in iteritems (json .load (json_content )):
17
+ for referer_name , config in iteritems (conf_list ):
18
+ params = None
19
+ if 'parameters' in config :
20
+ params = list (map (text_type .lower , config ['parameters' ]))
21
+ for domain in config ['domains' ]:
22
+ referers_dict [domain ] = {
23
+ 'name' : referer_name ,
24
+ 'medium' : medium
19
25
}
20
- make_ref = make_ref_params
21
- else :
22
- make_ref = lambda _ : {'name' : ref , 'medium' : medium }
23
- for domain in config ['domains' ]:
24
- REFERERS [domain ] = make_ref (config )
26
+ if params is not None :
27
+ referers_dict [domain ]['params' ] = params
28
+ return referers_dict
25
29
30
+ JSON_FILE = os .path .join (os .path .dirname (__file__ ), 'data' , 'referers.json' )
31
+ REFERERS = load_referers (JSON_FILE )
26
32
27
33
class Referer (object ):
28
- def __init__ (self , ref_url , curr_url = None ):
34
+ def __init__ (self , ref_url , curr_url = None , referers = REFERERS ):
29
35
self .known = False
30
36
self .referer = None
31
37
self .medium = 'unknown'
32
38
self .search_parameter = None
33
39
self .search_term = None
40
+ self .referers = referers
34
41
35
42
ref_uri = urlparse (ref_url )
36
43
ref_host = ref_uri .hostname
37
44
self .known = ref_uri .scheme in {'http' , 'https' }
38
45
self .uri = ref_uri
39
46
40
- # print "Scheme: %s" % ref_uri.scheme
41
-
42
47
if not self .known :
43
48
return
44
49
@@ -49,48 +54,47 @@ def __init__(self, ref_url, curr_url=None):
49
54
self .medium = 'internal'
50
55
return
51
56
52
- # print "Getting referer with path"
53
- referer = self .__lookup_referer (ref_host , ref_uri .path , True )
54
- # print "Got %s" % referer
57
+ referer = self ._lookup_referer (ref_host , ref_uri .path , True )
55
58
if not referer :
56
- # print "Getting referer without path"
57
- referer = self .__lookup_referer (ref_host , ref_uri .path , False )
58
- # print "Got %s" % referer
59
+ referer = self ._lookup_referer (ref_host , ref_uri .path , False )
59
60
if not referer :
60
61
self .medium = 'unknown'
61
62
return
62
63
63
- # print "Assigning name %s" % referer['name']
64
64
self .referer = referer ['name' ]
65
65
self .medium = referer ['medium' ]
66
66
67
67
if referer ['medium' ] == 'search' :
68
68
if 'params' not in referer or not referer ['params' ]:
69
- # print "Returning"
70
69
return
71
70
for param , val in parse_qsl (ref_uri .query ):
72
71
if param .lower () in referer ['params' ]:
73
72
self .search_parameter = param
74
73
self .search_term = val
75
74
76
- def __lookup_referer (self , ref_host , ref_path , include_path ):
75
+ def _lookup_referer (self , ref_host , ref_path , include_path ):
77
76
referer = None
78
77
try :
79
- referer = REFERERS [ref_host + ref_path ] if include_path else REFERERS [ref_host ]
78
+ if include_path :
79
+ referer = self .referers [ref_host + ref_path ]
80
+ else :
81
+ referer = self .referers [ref_host ]
80
82
except KeyError :
81
83
if include_path :
82
84
path_parts = ref_path .split ('/' )
83
85
if len (path_parts ) > 1 :
84
86
try :
85
- referer = REFERERS [ref_host + '/' + path_parts [1 ]]
87
+ referer = self . referers [ref_host + '/' + path_parts [1 ]]
86
88
except KeyError :
87
89
pass
88
90
if not referer :
89
91
try :
90
92
idx = ref_host .index ('.' )
91
- return self .__lookup_referer (ref_host [idx + 1 :], ref_path , include_path )
93
+ return self ._lookup_referer (
94
+ ref_host [idx + 1 :],
95
+ ref_path , include_path
96
+ )
92
97
except ValueError :
93
98
return None
94
99
else :
95
100
return referer
96
-
0 commit comments