Skip to content

Commit 887aea8

Browse files
committed
Rename and extend recommended_search to related_search
Refactored the recommended_search component and model to related_search, updating all references and adding support for new question_link variants. Updated processors and element parsers to handle both filter pills and question links, and modified output keys in search and spec files to use related_searches.
1 parent 7e38fc8 commit 887aea8

File tree

7 files changed

+77
-15
lines changed

7 files changed

+77
-15
lines changed

lib/serp_parser/google/config.rb

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,12 +64,18 @@ def self.config_block
6464
end
6565
end
6666

67-
component :recommended_search do
67+
component :related_search do
6868
variant "filter_pill", meta: { first_seen: "2025-12-23" } do
6969
match "div.T3FoJb[role=\"listitem\"] a"
70-
model SerpParser::Models::RecommendedSearch
70+
model SerpParser::Models::RelatedSearch
7171
url :query, attribute: "href", processors: [ :extract_query_from_search_url ]
7272
end
73+
74+
variant "question_link", meta: { first_seen: "2025-12-23" } do
75+
match "a.ocRFx.aXYP2e.DxAvsd.sG4dYe"
76+
model SerpParser::Models::RelatedSearch
77+
text :query, "span.dg6jd.JGD2rd", processors: [ :clean_text, :downcase ]
78+
end
7379
end
7480

7581
# --- TOP LEVEL ELEMENTS ---
@@ -118,10 +124,15 @@ def self.config_block
118124
end
119125
end
120126

121-
element :recommended_searches do
127+
element :related_searches do
122128
variant "filter_pills", meta: { first_seen: "2025-12-23" } do
123129
container "div.fBctee"
124-
has_many :recommended_searches, component: :recommended_search
130+
has_many :related_searches, component: :related_search
131+
end
132+
133+
variant "questions", meta: { first_seen: "2025-12-23" } do
134+
container "div.Wt5Tfe"
135+
has_many :related_searches, component: :related_search
125136
end
126137
end
127138
end

lib/serp_parser/google/parsers/element.rb

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,8 @@ def apply_processor(processor_name, value, element = nil)
159159
extract_title_with_fallback(element)
160160
when :extract_query_from_search_url
161161
Processors.extract_query_from_search_url(value)
162+
when :downcase
163+
value.is_a?(String) ? value.downcase : value
162164
else
163165
value
164166
end
@@ -244,8 +246,8 @@ def extract_component_data(element, variant)
244246
# For sitelinks, need at least title or url
245247
elsif model_class == SerpParser::Models::OrganicResults::SiteLink
246248
return nil unless data[:title] || data[:url]
247-
# For recommended searches, need a query
248-
elsif model_class == SerpParser::Models::RecommendedSearch
249+
# For related searches, need a query
250+
elsif model_class == SerpParser::Models::RelatedSearch
249251
return nil unless data[:query] && !data[:query].to_s.strip.empty?
250252
end
251253

lib/serp_parser/google/search.rb

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,21 @@ def organic_results
1919
SerpParser::Collection.new(models)
2020
end
2121

22-
def recommended_searches
23-
results = Parsers::Element.find_all(@doc, :recommended_searches, @registry)
24-
# Extract recommended search models from results
22+
def related_searches
23+
results = Parsers::Element.find_all(@doc, :related_searches, @registry)
24+
# Extract related search models from results (combines both filter pills and questions)
2525
searches = []
26+
seen_queries = {}
2627
results.each do |data|
27-
if data[:recommended_searches].is_a?(Array)
28-
searches.concat(data[:recommended_searches])
28+
if data[:related_searches].is_a?(Array)
29+
data[:related_searches].each do |search|
30+
query = search.query
31+
# Only add if we haven't seen this query before
32+
unless seen_queries.key?(query)
33+
seen_queries[query] = true
34+
searches << search
35+
end
36+
end
2937
end
3038
end
3139
SerpParser::Collection.new(searches)
@@ -38,7 +46,7 @@ def search_information
3846
def to_h
3947
{
4048
organic_results: organic_results.map(&:to_h),
41-
recommended_searches: recommended_searches.map(&:to_h)
49+
related_searches: related_searches.map(&:to_h)
4250
}
4351
end
4452
end

lib/serp_parser/models/recommended_search.rb renamed to lib/serp_parser/models/related_search.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
module SerpParser
22
module Models
3-
class RecommendedSearch
3+
class RelatedSearch
44
attr_reader :query
55

66
def initialize(query:)

lib/serp_parser/processors.rb

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,5 +141,33 @@ def self.extract_query_from_search_url(url)
141141
nil
142142
end
143143
end
144+
145+
# Extract query from text content, removing HTML tags and normalizing
146+
# @param text [String] Text content that may contain HTML
147+
# @return [String, nil] Cleaned and downcased query string
148+
def self.extract_query_from_text(text)
149+
return nil if text.nil?
150+
# Clean text, remove extra whitespace, and downcase
151+
clean_text(text).downcase
152+
end
153+
154+
# Extract query from URL or fallback to text
155+
# Used when URL might be javascript:void(0) or a search URL
156+
# @param url [String] The href value
157+
# @param element [Nokogiri::XML::Element] The element to extract text from if URL fails
158+
# @return [String, nil] Extracted and normalized query
159+
def self.extract_query_from_url_or_text(url, element = nil)
160+
# Try to extract from URL first
161+
query = extract_query_from_search_url(url)
162+
return query if query
163+
164+
# Fallback to text content if URL didn't yield a result
165+
if element
166+
text = element.at_css("span.dg6jd.JGD2rd")&.text || element.text
167+
extract_query_from_text(text) if text
168+
else
169+
nil
170+
end
171+
end
144172
end
145173
end

spec/files/google/2025-12-23-mobile-matkasse.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@
199199
"site_links": []
200200
}
201201
],
202-
"recommended_searches": [
202+
"related_searches": [
203203
"matkasse ica",
204204
"citygross matkasse",
205205
"coop matkasse",

spec/files/google/2025-12-23-mobile-middagsfrid-rabattkod.json

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,5 +135,18 @@
135135
"site_links": []
136136
}
137137
],
138-
"recommended_searches": []
138+
"related_searches": [
139+
"middagsfrid recept",
140+
"middagsfrid recept kyckling",
141+
"middagsfrid matkasse",
142+
"middagsfrid recept sydsvenskan",
143+
"linas matkasse",
144+
"matkasse rabattkod",
145+
"hellofresh rabattkod",
146+
"matkomfort",
147+
"matkasse hemleverans",
148+
"mat och ro",
149+
"matkassar sverige",
150+
"ica matkasse"
151+
]
139152
}

0 commit comments

Comments
 (0)