1313# Copyright:: Copyright (c) 2014 Inside Systems Inc
1414# License:: Apache License Version 2.0
1515
16+ # frozen_string_literal: true
17+
1618require 'uri'
1719require 'cgi'
1820
1921module RefererParser
2022 class Parser
21- DefaultFile = File . expand_path ( File . join ( File . dirname ( __FILE__ ) , '..' , '..' , 'data' , 'referers.json' ) )
23+ DefaultFile = File . expand_path ( File . join ( File . dirname ( __FILE__ ) , '..' , '..' , 'data' , 'referers.json' ) ) . freeze
2224
2325 # Create a new parser from one or more filenames/uris, defaults to ../data/referers.json
24- def initialize ( uris = DefaultFile )
26+ def initialize ( uris = DefaultFile )
2527 @domain_index ||= { }
2628 @name_hash ||= { }
2729
@@ -39,74 +41,73 @@ def update(uris)
3941
4042 # Clean out the database
4143 def clear!
42- @domain_index , @name_hash = { } , { }
44+ @domain_index = { }
45+ @name_hash = { }
4346
4447 true
4548 end
4649
4750 # Add a referer to the database with medium, name, domain or array of domains, and a parameter or array of parameters
4851 # If called manually and a domain is added to an existing entry with a path, you may need to call optimize_index! afterwards.
49- def add_referer ( medium , name , domains , parameters = nil )
52+ def add_referer ( medium , name , domains , parameters = nil )
5053 # The same name can be used with multiple mediums so we make a key here
5154 name_key = "#{ name } -#{ medium } "
5255
5356 # Update the name has with the parameter and medium data
54- @name_hash [ name_key ] = { : source => name , : medium => medium , : parameters => [ parameters ] . flatten }
57+ @name_hash [ name_key ] = { source : name , medium : medium , parameters : [ parameters ] . flatten }
5558
5659 # Update the domain to name index
5760 [ domains ] . flatten . each do |domain_url |
5861 domain , *path = domain_url . split ( '/' )
59- if domain =~ /\A www\. (.*)\z /i
60- domain = $1
61- end
62+ domain = Regexp . last_match ( 1 ) if domain =~ /\A www\. (.*)\z /i
6263
6364 domain . downcase!
6465
6566 @domain_index [ domain ] ||= [ ]
66- if !path . empty?
67- @domain_index [ domain ] << [ '/' + path . join ( '/' ) , name_key ]
68- else
69- @domain_index [ domain ] << [ '/' , name_key ]
70- end
67+ @domain_index [ domain ] << if !path . empty?
68+ [ '/' + path . join ( '/' ) , name_key ]
69+ else
70+ [ '/' , name_key ]
71+ end
7172 end
7273 end
7374
7475 # Prune duplicate entries and sort with the most specific path first if there is more than one entry
7576 # In this case, sorting by the longest string works fine
7677 def optimize_index!
77- @domain_index . each do |key , val |
78+ @domain_index . each do |key , _val |
7879 # Sort each path/name_key pair by the longest path
79- @domain_index [ key ] . sort! { |a , b |
80+ @domain_index [ key ] . sort! do |a , b |
8081 b [ 0 ] . size <=> a [ 0 ] . size
81- } . uniq!
82+ end . uniq!
8283 end
8384 end
8485
8586 # Given a string or URI, return a hash of data
8687 def parse ( obj )
8788 url = obj . is_a? ( URI ) ? obj : URI . parse ( obj . to_s )
8889
89- if ! [ 'android-app' , 'http' , 'https' ] . include? ( url . scheme )
90- raise InvalidUriError . new ( "Only Android-App, HTTP, and HTTPS schemes are supported -- #{ url . scheme } " )
90+ unless [ 'android-app' , 'http' , 'https' ] . include? ( url . scheme )
91+ raise InvalidUriError , "Only Android-App, HTTP, and HTTPS schemes are supported -- #{ url . scheme } "
9192 end
9293
93- data = { : known => false , : uri => url . to_s }
94+ data = { known : false , uri : url . to_s }
9495
9596 domain , name_key = domain_and_name_key_for ( url )
96- if domain and name_key
97+ if domain && name_key
9798 referer_data = @name_hash [ name_key ]
9899 data [ :known ] = true
99100 data [ :source ] = referer_data [ :source ]
100101 data [ :medium ] = referer_data [ :medium ]
101102 data [ :domain ] = domain
102103
103104 # Parse parameters if the referer uses them
104- if url . query and referer_data [ :parameters ]
105+ if url . query && referer_data [ :parameters ]
105106 query_params = CGI . parse ( url . query )
106107 referer_data [ :parameters ] . each do |param |
107108 # If there is a matching parameter, get the first non-blank value
108- if ! ( values = query_params [ param ] ) . empty?
109- data [ :term ] = values . select { |v | v . strip != "" } . first
109+ unless ( values = query_params [ param ] ) . empty?
110+ data [ :term ] = values . reject { |v | v . strip == '' } . first
110111 break if data [ :term ]
111112 end
112113 end
@@ -115,15 +116,15 @@ def parse(obj)
115116
116117 data
117118 rescue URI ::InvalidURIError
118- raise InvalidUriError . new ( "Unable to parse URI, not a URI? -- #{ obj . inspect } " , $! )
119+ raise InvalidUriError . new ( "Unable to parse URI, not a URI? -- #{ obj . inspect } " , $ERROR_INFO )
119120 end
120121
121122 protected
122123
123124 # Determine the correct name_key for this host and path
124125 def domain_and_name_key_for ( uri )
125126 # Create a proc that will return immediately
126- check = Proc . new do |domain |
127+ check = proc do |domain |
127128 domain . downcase!
128129 if paths = @domain_index [ domain ]
129130 paths . each do |path , name_key |
@@ -134,16 +135,16 @@ def domain_and_name_key_for(uri)
134135
135136 # First check hosts with and without the www prefix with the path
136137 if uri . host =~ /\A www\. (.+)\z /i
137- check . call $1
138+ check . call Regexp . last_match ( 1 )
138139 else
139140 check . call uri . host
140141 end
141142
142143 # Remove subdomains until only three are left (probably good enough)
143- host_arr = uri . host . split ( "." )
144- while host_arr . size > 2 do
144+ host_arr = uri . host . split ( '.' )
145+ while host_arr . size > 2
145146 host_arr . shift
146- check . call host_arr . join ( "." )
147+ check . call host_arr . join ( '.' )
147148 end
148149
149150 nil
@@ -152,32 +153,37 @@ def domain_and_name_key_for(uri)
152153 def deserialize_referer_data ( data , ext )
153154 # Parse the loaded data with the correct parser
154155 deserialized_data = if [ '.yml' , '.yaml' ] . include? ( ext )
155- deserialize_yaml ( data )
156- elsif ext == '.json'
157- deserialize_json ( data )
158- else
159- raise UnsupportedFormatError . new ( "Only yaml and json file formats are currently supported -- #{ @msg } " )
156+ deserialize_yaml ( data )
157+ elsif ext == '.json'
158+ deserialize_json ( data )
159+ else
160+ raise UnsupportedFormatError , "Only yaml and json file formats are currently supported -- #{ @msg } "
160161 end
161162
162163 begin
163164 parse_referer_data deserialized_data
164- rescue
165- raise CorruptReferersError . new ( "Unable to parse data file -- #{ $! . class } #{ $! . to_s } " , $! )
165+ rescue StandardError
166+ raise CorruptReferersError . new ( "Unable to parse data file -- #{ $ERROR_INFO . class } #{ $ERROR_INFO } " , $ERROR_INFO )
166167 end
167168 end
168169
169170 def deserialize_yaml ( data )
170171 require 'yaml'
171- YAML . load ( data )
172+ YAML . safe_load ( data )
172173 rescue Exception => e
173- raise CorruptReferersError . new ( "Unable to YAML file -- #{ e . to_s } " , e )
174+ raise CorruptReferersError . new ( "Unable to YAML file -- #{ e } " , e )
174175 end
175176
176177 def deserialize_json ( data )
177- require 'json'
178- JSON . parse ( data )
178+ begin
179+ require 'oj'
180+ Oj . load ( data )
181+ rescue LoadError => e
182+ require 'json'
183+ JSON . parse ( data )
184+ end
179185 rescue JSON ::ParserError
180- raise CorruptReferersError . new ( "Unable to JSON file -- #{ $! . to_s } " , $! )
186+ raise CorruptReferersError . new ( "Unable to JSON file -- #{ $ERROR_INFO } " , $ERROR_INFO )
181187 end
182188
183189 def read_referer_data ( uri )
@@ -187,7 +193,7 @@ def read_referer_data(uri)
187193 begin
188194 open ( uri ) . read
189195 rescue OpenURI ::HTTPError
190- raise InvalidUriError . new ( "Cannot load referer data from URI #{ uri } -- #{ $! . to_s } " , $! )
196+ raise InvalidUriError . new ( "Cannot load referer data from URI #{ uri } -- #{ $ERROR_INFO } " , $ERROR_INFO )
191197 end
192198 else
193199 File . read ( uri )
@@ -208,8 +214,8 @@ def parse_referer_data(data)
208214 end
209215
210216 optimize_index!
211- rescue
212- raise CorruptReferersError . new ( " Unable to parse referer data" , $! )
217+ rescue StandardError
218+ raise CorruptReferersError . new ( ' Unable to parse referer data' , $ERROR_INFO )
213219 end
214220 end
215221end
0 commit comments