Skip to content

Commit 62c9d61

Browse files
committed
strip google parameters
1 parent 00cea30 commit 62c9d61

File tree

1 file changed

+38
-5
lines changed

1 file changed

+38
-5
lines changed

lib/serp_parser/processors.rb

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,18 +51,51 @@ def self.remove_span_elements(element)
5151
duplicate_element
5252
end
5353

54-
# Extract the URL from a Google redirect URL
54+
# Extract the URL from a Google redirect URL and strip Google-specific parameters
5555
# Handles both "/url?q=" and "/url?sa=...&url=" formats.
56+
# Also removes Google tracking parameters like srsltid, ved, usg, etc.
5657
# @param url [String]
5758
# @return [String, nil]
5859
def self.clean_google_url(url)
5960
return if url.nil?
60-
return url unless url.start_with?("/url?")
61+
62+
# First, extract URL from Google redirect if needed
63+
extracted_url = if url.start_with?("/url?")
64+
begin
65+
query = URI.parse(url).query
66+
params = URI.decode_www_form(query.to_s).to_h
67+
params["q"] || params["url"] || url
68+
rescue
69+
url
70+
end
71+
else
72+
url
73+
end
74+
75+
# Strip Google-specific parameters from the URL
76+
strip_google_params(extracted_url)
77+
end
78+
79+
# Strip Google-specific tracking parameters from a URL
80+
# @param url [String]
81+
# @return [String]
82+
def self.strip_google_params(url)
83+
return url if url.nil? || url.empty?
6184

6285
begin
63-
query = URI.parse(url).query
64-
params = URI.decode_www_form(query.to_s).to_h
65-
params["q"] || params["url"] || url
86+
uri = URI.parse(url)
87+
return url unless uri.query
88+
89+
# List of Google-specific parameters to remove
90+
google_params = %w[srsltid ved usg opi sa source rct]
91+
92+
params = URI.decode_www_form(uri.query).reject do |key, _value|
93+
google_params.include?(key)
94+
end
95+
96+
# Rebuild URI without Google parameters
97+
uri.query = params.empty? ? nil : URI.encode_www_form(params)
98+
uri.to_s
6699
rescue
67100
url
68101
end

0 commit comments

Comments
 (0)