Skip to content

Commit 9b7436e

Browse files
redtachyonsbenjben
authored andcommitted
Ruby: fix style and add minor code optimisations (closes #180)
1 parent 26b5058 commit 9b7436e

File tree

1 file changed

+50
-44
lines changed

1 file changed

+50
-44
lines changed

ruby/lib/referer-parser/parser.rb

Lines changed: 50 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,17 @@
1313
# Copyright:: Copyright (c) 2014 Inside Systems Inc
1414
# License:: Apache License Version 2.0
1515

16+
# frozen_string_literal: true
17+
1618
require 'uri'
1719
require 'cgi'
1820

1921
module RefererParser
2022
class Parser
21-
DefaultFile = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'data', 'referers.json'))
23+
DefaultFile = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'data', 'referers.json')).freeze
2224

2325
# Create a new parser from one or more filenames/uris, defaults to ../data/referers.json
24-
def initialize(uris=DefaultFile)
26+
def initialize(uris = DefaultFile)
2527
@domain_index ||= {}
2628
@name_hash ||= {}
2729

@@ -39,74 +41,73 @@ def update(uris)
3941

4042
# Clean out the database
4143
def clear!
42-
@domain_index, @name_hash = {}, {}
44+
@domain_index = {}
45+
@name_hash = {}
4346

4447
true
4548
end
4649

4750
# Add a referer to the database with medium, name, domain or array of domains, and a parameter or array of parameters
4851
# If called manually and a domain is added to an existing entry with a path, you may need to call optimize_index! afterwards.
49-
def add_referer(medium, name, domains, parameters=nil)
52+
def add_referer(medium, name, domains, parameters = nil)
5053
# The same name can be used with multiple mediums so we make a key here
5154
name_key = "#{name}-#{medium}"
5255

5356
# Update the name has with the parameter and medium data
54-
@name_hash[name_key] = {:source => name, :medium => medium, :parameters => [parameters].flatten }
57+
@name_hash[name_key] = { source: name, medium: medium, parameters: [parameters].flatten }
5558

5659
# Update the domain to name index
5760
[domains].flatten.each do |domain_url|
5861
domain, *path = domain_url.split('/')
59-
if domain =~ /\Awww\.(.*)\z/i
60-
domain = $1
61-
end
62+
domain = Regexp.last_match(1) if domain =~ /\Awww\.(.*)\z/i
6263

6364
domain.downcase!
6465

6566
@domain_index[domain] ||= []
66-
if !path.empty?
67-
@domain_index[domain] << ['/' + path.join('/'), name_key]
68-
else
69-
@domain_index[domain] << ['/', name_key]
70-
end
67+
@domain_index[domain] << if !path.empty?
68+
['/' + path.join('/'), name_key]
69+
else
70+
['/', name_key]
71+
end
7172
end
7273
end
7374

7475
# Prune duplicate entries and sort with the most specific path first if there is more than one entry
7576
# In this case, sorting by the longest string works fine
7677
def optimize_index!
77-
@domain_index.each do |key, val|
78+
@domain_index.each do |key, _val|
7879
# Sort each path/name_key pair by the longest path
79-
@domain_index[key].sort! { |a, b|
80+
@domain_index[key].sort! do |a, b|
8081
b[0].size <=> a[0].size
81-
}.uniq!
82+
end.uniq!
8283
end
8384
end
8485

8586
# Given a string or URI, return a hash of data
8687
def parse(obj)
8788
url = obj.is_a?(URI) ? obj : URI.parse(obj.to_s)
8889

89-
if !['android-app', 'http', 'https'].include?(url.scheme)
90-
raise InvalidUriError.new("Only Android-App, HTTP, and HTTPS schemes are supported -- #{url.scheme}")
90+
unless ['android-app', 'http', 'https'].include?(url.scheme)
91+
raise InvalidUriError, "Only Android-App, HTTP, and HTTPS schemes are supported -- #{url.scheme}"
9192
end
9293

93-
data = { :known => false, :uri => url.to_s }
94+
data = { known: false, uri: url.to_s }
9495

9596
domain, name_key = domain_and_name_key_for(url)
96-
if domain and name_key
97+
if domain && name_key
9798
referer_data = @name_hash[name_key]
9899
data[:known] = true
99100
data[:source] = referer_data[:source]
100101
data[:medium] = referer_data[:medium]
101102
data[:domain] = domain
102103

103104
# Parse parameters if the referer uses them
104-
if url.query and referer_data[:parameters]
105+
if url.query && referer_data[:parameters]
105106
query_params = CGI.parse(url.query)
106107
referer_data[:parameters].each do |param|
107108
# If there is a matching parameter, get the first non-blank value
108-
if !(values = query_params[param]).empty?
109-
data[:term] = values.select { |v| v.strip != "" }.first
109+
unless (values = query_params[param]).empty?
110+
data[:term] = values.reject { |v| v.strip == '' }.first
110111
break if data[:term]
111112
end
112113
end
@@ -115,15 +116,15 @@ def parse(obj)
115116

116117
data
117118
rescue URI::InvalidURIError
118-
raise InvalidUriError.new("Unable to parse URI, not a URI? -- #{obj.inspect}", $!)
119+
raise InvalidUriError.new("Unable to parse URI, not a URI? -- #{obj.inspect}", $ERROR_INFO)
119120
end
120121

121122
protected
122123

123124
# Determine the correct name_key for this host and path
124125
def domain_and_name_key_for(uri)
125126
# Create a proc that will return immediately
126-
check = Proc.new do |domain|
127+
check = proc do |domain|
127128
domain.downcase!
128129
if paths = @domain_index[domain]
129130
paths.each do |path, name_key|
@@ -134,16 +135,16 @@ def domain_and_name_key_for(uri)
134135

135136
# First check hosts with and without the www prefix with the path
136137
if uri.host =~ /\Awww\.(.+)\z/i
137-
check.call $1
138+
check.call Regexp.last_match(1)
138139
else
139140
check.call uri.host
140141
end
141142

142143
# Remove subdomains until only three are left (probably good enough)
143-
host_arr = uri.host.split(".")
144-
while host_arr.size > 2 do
144+
host_arr = uri.host.split('.')
145+
while host_arr.size > 2
145146
host_arr.shift
146-
check.call host_arr.join(".")
147+
check.call host_arr.join('.')
147148
end
148149

149150
nil
@@ -152,32 +153,37 @@ def domain_and_name_key_for(uri)
152153
def deserialize_referer_data(data, ext)
153154
# Parse the loaded data with the correct parser
154155
deserialized_data = if ['.yml', '.yaml'].include?(ext)
155-
deserialize_yaml(data)
156-
elsif ext == '.json'
157-
deserialize_json(data)
158-
else
159-
raise UnsupportedFormatError.new("Only yaml and json file formats are currently supported -- #{@msg}")
156+
deserialize_yaml(data)
157+
elsif ext == '.json'
158+
deserialize_json(data)
159+
else
160+
raise UnsupportedFormatError, "Only yaml and json file formats are currently supported -- #{@msg}"
160161
end
161162

162163
begin
163164
parse_referer_data deserialized_data
164-
rescue
165-
raise CorruptReferersError.new("Unable to parse data file -- #{$!.class} #{$!.to_s}", $!)
165+
rescue StandardError
166+
raise CorruptReferersError.new("Unable to parse data file -- #{$ERROR_INFO.class} #{$ERROR_INFO}", $ERROR_INFO)
166167
end
167168
end
168169

169170
def deserialize_yaml(data)
170171
require 'yaml'
171-
YAML.load(data)
172+
YAML.safe_load(data)
172173
rescue Exception => e
173-
raise CorruptReferersError.new("Unable to YAML file -- #{e.to_s}", e)
174+
raise CorruptReferersError.new("Unable to YAML file -- #{e}", e)
174175
end
175176

176177
def deserialize_json(data)
177-
require 'json'
178-
JSON.parse(data)
178+
begin
179+
require 'oj'
180+
Oj.load(data)
181+
rescue LoadError => e
182+
require 'json'
183+
JSON.parse(data)
184+
end
179185
rescue JSON::ParserError
180-
raise CorruptReferersError.new("Unable to JSON file -- #{$!.to_s}", $!)
186+
raise CorruptReferersError.new("Unable to JSON file -- #{$ERROR_INFO}", $ERROR_INFO)
181187
end
182188

183189
def read_referer_data(uri)
@@ -187,7 +193,7 @@ def read_referer_data(uri)
187193
begin
188194
open(uri).read
189195
rescue OpenURI::HTTPError
190-
raise InvalidUriError.new("Cannot load referer data from URI #{uri} -- #{$!.to_s}", $!)
196+
raise InvalidUriError.new("Cannot load referer data from URI #{uri} -- #{$ERROR_INFO}", $ERROR_INFO)
191197
end
192198
else
193199
File.read(uri)
@@ -208,8 +214,8 @@ def parse_referer_data(data)
208214
end
209215

210216
optimize_index!
211-
rescue
212-
raise CorruptReferersError.new("Unable to parse referer data", $!)
217+
rescue StandardError
218+
raise CorruptReferersError.new('Unable to parse referer data', $ERROR_INFO)
213219
end
214220
end
215221
end

0 commit comments

Comments
 (0)