13
13
# Copyright:: Copyright (c) 2014 Inside Systems Inc
14
14
# License:: Apache License Version 2.0
15
15
16
+ # frozen_string_literal: true
17
+
16
18
require 'uri'
17
19
require 'cgi'
18
20
19
21
module RefererParser
20
22
class Parser
21
- DefaultFile = File . expand_path ( File . join ( File . dirname ( __FILE__ ) , '..' , '..' , 'data' , 'referers.json' ) )
23
+ DefaultFile = File . expand_path ( File . join ( File . dirname ( __FILE__ ) , '..' , '..' , 'data' , 'referers.json' ) ) . freeze
22
24
23
25
# Create a new parser from one or more filenames/uris, defaults to ../data/referers.json
24
- def initialize ( uris = DefaultFile )
26
+ def initialize ( uris = DefaultFile )
25
27
@domain_index ||= { }
26
28
@name_hash ||= { }
27
29
@@ -39,74 +41,73 @@ def update(uris)
39
41
40
42
# Clean out the database
41
43
def clear!
42
- @domain_index , @name_hash = { } , { }
44
+ @domain_index = { }
45
+ @name_hash = { }
43
46
44
47
true
45
48
end
46
49
47
50
# Add a referer to the database with medium, name, domain or array of domains, and a parameter or array of parameters
48
51
# If called manually and a domain is added to an existing entry with a path, you may need to call optimize_index! afterwards.
49
- def add_referer ( medium , name , domains , parameters = nil )
52
+ def add_referer ( medium , name , domains , parameters = nil )
50
53
# The same name can be used with multiple mediums so we make a key here
51
54
name_key = "#{ name } -#{ medium } "
52
55
53
56
# Update the name has with the parameter and medium data
54
- @name_hash [ name_key ] = { : source => name , : medium => medium , : parameters => [ parameters ] . flatten }
57
+ @name_hash [ name_key ] = { source : name , medium : medium , parameters : [ parameters ] . flatten }
55
58
56
59
# Update the domain to name index
57
60
[ domains ] . flatten . each do |domain_url |
58
61
domain , *path = domain_url . split ( '/' )
59
- if domain =~ /\A www\. (.*)\z /i
60
- domain = $1
61
- end
62
+ domain = Regexp . last_match ( 1 ) if domain =~ /\A www\. (.*)\z /i
62
63
63
64
domain . downcase!
64
65
65
66
@domain_index [ domain ] ||= [ ]
66
- if !path . empty?
67
- @domain_index [ domain ] << [ '/' + path . join ( '/' ) , name_key ]
68
- else
69
- @domain_index [ domain ] << [ '/' , name_key ]
70
- end
67
+ @domain_index [ domain ] << if !path . empty?
68
+ [ '/' + path . join ( '/' ) , name_key ]
69
+ else
70
+ [ '/' , name_key ]
71
+ end
71
72
end
72
73
end
73
74
74
75
# Prune duplicate entries and sort with the most specific path first if there is more than one entry
75
76
# In this case, sorting by the longest string works fine
76
77
def optimize_index!
77
- @domain_index . each do |key , val |
78
+ @domain_index . each do |key , _val |
78
79
# Sort each path/name_key pair by the longest path
79
- @domain_index [ key ] . sort! { |a , b |
80
+ @domain_index [ key ] . sort! do |a , b |
80
81
b [ 0 ] . size <=> a [ 0 ] . size
81
- } . uniq!
82
+ end . uniq!
82
83
end
83
84
end
84
85
85
86
# Given a string or URI, return a hash of data
86
87
def parse ( obj )
87
88
url = obj . is_a? ( URI ) ? obj : URI . parse ( obj . to_s )
88
89
89
- if ! [ 'android-app' , 'http' , 'https' ] . include? ( url . scheme )
90
- raise InvalidUriError . new ( "Only Android-App, HTTP, and HTTPS schemes are supported -- #{ url . scheme } " )
90
+ unless [ 'android-app' , 'http' , 'https' ] . include? ( url . scheme )
91
+ raise InvalidUriError , "Only Android-App, HTTP, and HTTPS schemes are supported -- #{ url . scheme } "
91
92
end
92
93
93
- data = { : known => false , : uri => url . to_s }
94
+ data = { known : false , uri : url . to_s }
94
95
95
96
domain , name_key = domain_and_name_key_for ( url )
96
- if domain and name_key
97
+ if domain && name_key
97
98
referer_data = @name_hash [ name_key ]
98
99
data [ :known ] = true
99
100
data [ :source ] = referer_data [ :source ]
100
101
data [ :medium ] = referer_data [ :medium ]
101
102
data [ :domain ] = domain
102
103
103
104
# Parse parameters if the referer uses them
104
- if url . query and referer_data [ :parameters ]
105
+ if url . query && referer_data [ :parameters ]
105
106
query_params = CGI . parse ( url . query )
106
107
referer_data [ :parameters ] . each do |param |
107
108
# If there is a matching parameter, get the first non-blank value
108
- if ! ( values = query_params [ param ] ) . empty?
109
- data [ :term ] = values . select { |v | v . strip != "" } . first
109
+ unless ( values = query_params [ param ] ) . empty?
110
+ data [ :term ] = values . reject { |v | v . strip == '' } . first
110
111
break if data [ :term ]
111
112
end
112
113
end
@@ -115,15 +116,15 @@ def parse(obj)
115
116
116
117
data
117
118
rescue URI ::InvalidURIError
118
- raise InvalidUriError . new ( "Unable to parse URI, not a URI? -- #{ obj . inspect } " , $! )
119
+ raise InvalidUriError . new ( "Unable to parse URI, not a URI? -- #{ obj . inspect } " , $ERROR_INFO )
119
120
end
120
121
121
122
protected
122
123
123
124
# Determine the correct name_key for this host and path
124
125
def domain_and_name_key_for ( uri )
125
126
# Create a proc that will return immediately
126
- check = Proc . new do |domain |
127
+ check = proc do |domain |
127
128
domain . downcase!
128
129
if paths = @domain_index [ domain ]
129
130
paths . each do |path , name_key |
@@ -134,16 +135,16 @@ def domain_and_name_key_for(uri)
134
135
135
136
# First check hosts with and without the www prefix with the path
136
137
if uri . host =~ /\A www\. (.+)\z /i
137
- check . call $1
138
+ check . call Regexp . last_match ( 1 )
138
139
else
139
140
check . call uri . host
140
141
end
141
142
142
143
# Remove subdomains until only three are left (probably good enough)
143
- host_arr = uri . host . split ( "." )
144
- while host_arr . size > 2 do
144
+ host_arr = uri . host . split ( '.' )
145
+ while host_arr . size > 2
145
146
host_arr . shift
146
- check . call host_arr . join ( "." )
147
+ check . call host_arr . join ( '.' )
147
148
end
148
149
149
150
nil
@@ -152,32 +153,37 @@ def domain_and_name_key_for(uri)
152
153
def deserialize_referer_data ( data , ext )
153
154
# Parse the loaded data with the correct parser
154
155
deserialized_data = if [ '.yml' , '.yaml' ] . include? ( ext )
155
- deserialize_yaml ( data )
156
- elsif ext == '.json'
157
- deserialize_json ( data )
158
- else
159
- raise UnsupportedFormatError . new ( "Only yaml and json file formats are currently supported -- #{ @msg } " )
156
+ deserialize_yaml ( data )
157
+ elsif ext == '.json'
158
+ deserialize_json ( data )
159
+ else
160
+ raise UnsupportedFormatError , "Only yaml and json file formats are currently supported -- #{ @msg } "
160
161
end
161
162
162
163
begin
163
164
parse_referer_data deserialized_data
164
- rescue
165
- raise CorruptReferersError . new ( "Unable to parse data file -- #{ $! . class } #{ $! . to_s } " , $! )
165
+ rescue StandardError
166
+ raise CorruptReferersError . new ( "Unable to parse data file -- #{ $ERROR_INFO . class } #{ $ERROR_INFO } " , $ERROR_INFO )
166
167
end
167
168
end
168
169
169
170
def deserialize_yaml ( data )
170
171
require 'yaml'
171
- YAML . load ( data )
172
+ YAML . safe_load ( data )
172
173
rescue Exception => e
173
- raise CorruptReferersError . new ( "Unable to YAML file -- #{ e . to_s } " , e )
174
+ raise CorruptReferersError . new ( "Unable to YAML file -- #{ e } " , e )
174
175
end
175
176
176
177
def deserialize_json ( data )
177
- require 'json'
178
- JSON . parse ( data )
178
+ begin
179
+ require 'oj'
180
+ Oj . load ( data )
181
+ rescue LoadError => e
182
+ require 'json'
183
+ JSON . parse ( data )
184
+ end
179
185
rescue JSON ::ParserError
180
- raise CorruptReferersError . new ( "Unable to JSON file -- #{ $! . to_s } " , $! )
186
+ raise CorruptReferersError . new ( "Unable to JSON file -- #{ $ERROR_INFO } " , $ERROR_INFO )
181
187
end
182
188
183
189
def read_referer_data ( uri )
@@ -187,7 +193,7 @@ def read_referer_data(uri)
187
193
begin
188
194
open ( uri ) . read
189
195
rescue OpenURI ::HTTPError
190
- raise InvalidUriError . new ( "Cannot load referer data from URI #{ uri } -- #{ $! . to_s } " , $! )
196
+ raise InvalidUriError . new ( "Cannot load referer data from URI #{ uri } -- #{ $ERROR_INFO } " , $ERROR_INFO )
191
197
end
192
198
else
193
199
File . read ( uri )
@@ -208,8 +214,8 @@ def parse_referer_data(data)
208
214
end
209
215
210
216
optimize_index!
211
- rescue
212
- raise CorruptReferersError . new ( " Unable to parse referer data" , $! )
217
+ rescue StandardError
218
+ raise CorruptReferersError . new ( ' Unable to parse referer data' , $ERROR_INFO )
213
219
end
214
220
end
215
221
end
0 commit comments