Skip to content

Commit d746f84

Browse files
author
Kelley Reynolds
committed
Return the normalized domain along with the referer, and handle crazy cases
1 parent b310d3a commit d746f84

File tree

2 files changed

+15
-4
lines changed

2 files changed

+15
-4
lines changed

ruby/lib/referer-parser/parser.rb

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,13 @@ def parse(obj)
5151

5252
data = { :known => false, :uri => url.to_s }
5353

54-
if (name_key = name_key_for(url))
54+
domain, name_key = domain_and_name_key_for(url)
55+
if domain and name_key
5556
referer_data = @name_hash[name_key]
5657
data[:known] = true
5758
data[:source] = referer_data[:source]
5859
data[:medium] = referer_data[:medium]
60+
data[:domain] = domain
5961

6062
# Parse parameters if the referer uses them
6163
if url.query and referer_data[:parameters]
@@ -78,12 +80,13 @@ def parse(obj)
7880
protected
7981

8082
# Determine the correct name_key for this host and path
81-
def name_key_for(uri)
83+
def domain_and_name_key_for(uri)
8284
# Create a proc that will return immediately
8385
check = Proc.new do |domain|
86+
domain.downcase!
8487
if paths = @domain_index[domain]
8588
paths.each do |path, name_key|
86-
return name_key if uri.path.include?(path)
89+
return [domain, name_key] if uri.path.include?(path)
8790
end
8891
end
8992
end
@@ -174,6 +177,9 @@ def parse_referer_data(data)
174177
if domain =~ /\Awww\.(.*)\z/i
175178
domain = $1
176179
end
180+
181+
domain.downcase!
182+
177183
@domain_index[domain] ||= []
178184
if !path.empty?
179185
@domain_index[domain] << ['/' + path.join('/'), name_key]

ruby/spec/parser_spec.rb

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,9 +109,14 @@
109109
parsed[:term].should == "hello"
110110
end
111111

112-
it "Should return the better result when the referer contains same parameters" do
112+
it "should return the better result when the referer contains same parameters" do
113113
parsed = default_parser.parse("http://search.tiscali.it/?tiscalitype=web&collection=web&key=&key=hello")
114114
parsed[:term].should == "hello"
115115
end
116+
117+
it "should return the normalized domain" do
118+
parsed = default_parser.parse("http://it.images.search.YAHOO.COM/images/view;_ylt=A0PDodgQmGBQpn4AWQgdDQx.;_ylu=X3oDMTBlMTQ4cGxyBHNlYwNzcgRzbGsDaW1n?back=http%3A%2F%2Fit.images.search.yahoo.com%2Fsearch%2Fimages%3Fp%3DEarth%2BMagic%2BOracle%2BCards%26fr%3Dmcafee%26fr2%3Dpiv-web%26tab%3Dorganic%26ri%3D5&w=1064&h=1551&imgurl=mdm.pbzstatic.com%2Foracles%2Fearth-magic-oracle-cards%2Fcard-1.png&rurl=http%3A%2F%2Fwww.psychicbazaar.com%2Foracles%2F143-earth-magic-oracle-cards.html&size=2.8+KB&name=Earth+Magic+Oracle+Cards+-+Psychic+Bazaar&p=Earth+Magic+Oracle+Cards&oid=f0a5ad5c4211efe1c07515f56cf5a78e&fr2=piv-web&fr=mcafee&tt=Earth%2BMagic%2BOracle%2BCards%2B-%2BPsychic%2BBazaar&b=0&ni=90&no=5&ts=&tab=organic&sigr=126n355ib&sigb=13hbudmkc&sigi=11ta8f0gd&.crumb=IZBOU1c0UHU")
119+
parsed[:domain].should == "images.search.yahoo.com"
120+
end
116121
end
117122
end

0 commit comments

Comments
 (0)