Skip to content

Commit 00cea30

Browse files
committed
Enhance Google SERP parser with new variants and update site_links handling
Added two new variants, "featured_links" and "featured_with_sitelinks," to the Google config for improved parsing of search results. Updated the organic_result model to wrap site_links in a Collection for better position assignment. Adjusted the search_spec to include both 2025 and 2026 HTML files for testing.
1 parent 96cdb26 commit 00cea30

File tree

8 files changed

+204
-27
lines changed

8 files changed

+204
-27
lines changed

.rubocop.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,6 @@ AllCops:
33

44
# Omakase Ruby styling for Rails
55
inherit_gem: { rubocop-rails-omakase: rubocop.yml }
6+
7+
Layout/TrailingEmptyLines:
8+
Enabled: false

lib/serp_parser/google/config.rb

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,13 @@ def self.config_block
6262
text :title, ".", processors: [ :extract_title_with_fallback, :clean_text ]
6363
url :url, attribute: "href"
6464
end
65+
66+
variant "featured_links", meta: { first_seen: "2026-01-01" } do
67+
match "a.tNxQIb"
68+
model SerpParser::Models::OrganicResults::SiteLink
69+
text :title, ".lKeYrd span"
70+
url :url, attribute: "href"
71+
end
6572
end
6673

6774
component :related_search do
@@ -122,6 +129,16 @@ def self.config_block
122129
has_one :rating
123130
has_many :site_links, component: :sitelinks
124131
end
132+
133+
variant "featured_with_sitelinks", meta: { first_seen: "2026-01-01" } do
134+
container "div.Ww4FFb.vt6azd:not(.xxAJT):not(.eDSE7e)"
135+
required_children [ ".GkAmnd" ]
136+
text :title, ".GkAmnd"
137+
text :description, ".VwiC3b"
138+
url :url, "a.rTyHce", attribute: "href"
139+
has_one :rating
140+
has_many :site_links, component: :sitelinks
141+
end
125142
end
126143

127144
element :related_searches do

lib/serp_parser/models/organic_result.rb

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,15 @@ def initialize(**args)
1212
@description = args[:description]
1313
@url = args[:url]
1414
@rating = args[:rating]
15-
@site_links = args[:site_links]
15+
# Wrap site_links in a Collection to assign positions
16+
@site_links = args[:site_links].is_a?(Array) ? SerpParser::Collection.new(args[:site_links]) : args[:site_links]
1617
end
1718

1819
# Parse domain from url
1920
def domain
21+
return nil unless url
2022
_domain = URI.parse(url).host
23+
return nil unless _domain
2124
_domain.gsub(/www\./, "")
2225
end
2326

spec/files/google/2025-12-23-mobile-matkasse.json

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,37 +10,37 @@
1010
"rating": null,
1111
"site_links": [
1212
{
13-
"position": null,
13+
"position": 1,
1414
"title": "Matkasse en person",
1515
"url": "https://www.hellofresh.se/matkasse/en-person"
1616
},
1717
{
18-
"position": null,
18+
"position": 2,
1919
"title": "Matkasse för två personer",
2020
"url": "https://www.hellofresh.se/matkasse/for-tva"
2121
},
2222
{
23-
"position": null,
23+
"position": 3,
2424
"title": "Vegetarisk matkasse",
2525
"url": "https://www.hellofresh.se/matkasse/vegetarisk"
2626
},
2727
{
28-
"position": null,
28+
"position": 4,
2929
"title": "Flexitarian matkasse",
3030
"url": "https://www.hellofresh.se/matkasse/flexitarian"
3131
},
3232
{
33-
"position": null,
33+
"position": 5,
3434
"title": "Matkasse student",
3535
"url": "https://www.hellofresh.se/matkasse/student"
3636
},
3737
{
38-
"position": null,
38+
"position": 6,
3939
"title": "Veckans matkasse",
4040
"url": "https://www.hellofresh.se/matkasse/veckans"
4141
},
4242
{
43-
"position": null,
43+
"position": 7,
4444
"title": "Billig matkasse",
4545
"url": "https://www.hellofresh.se/matkasse/billig"
4646
}
@@ -56,37 +56,37 @@
5656
"rating": null,
5757
"site_links": [
5858
{
59-
"position": null,
59+
"position": 1,
6060
"title": "På menyn",
6161
"url": "https://www.linasmatkasse.se/pa-menyn"
6262
},
6363
{
64-
"position": null,
64+
"position": 2,
6565
"title": "Beställ matkasse",
6666
"url": "https://www.linasmatkasse.se/registrering"
6767
},
6868
{
69-
"position": null,
69+
"position": 3,
7070
"title": "Hållbarhet på Linas Matkasse",
7171
"url": "https://www.linasmatkasse.se/hallbarhet"
7272
},
7373
{
74-
"position": null,
74+
"position": 4,
7575
"title": "Logga in",
7676
"url": "https://www.linasmatkasse.se/logga-in"
7777
},
7878
{
79-
"position": null,
79+
"position": 5,
8080
"title": "Kundservice",
8181
"url": "https://www.linasmatkasse.se/kontaktuppgifter"
8282
},
8383
{
84-
"position": null,
84+
"position": 6,
8585
"title": "Snabbt & lättlagat",
8686
"url": "https://www.linasmatkasse.se/matkasse/snabbtochlattlagat"
8787
},
8888
{
89-
"position": null,
89+
"position": 7,
9090
"title": "Leverans",
9191
"url": "https://www.linasmatkasse.se/leverans"
9292
}
@@ -102,27 +102,27 @@
102102
"rating": null,
103103
"site_links": [
104104
{
105-
"position": null,
105+
"position": 1,
106106
"title": "Mer om icas matkasse",
107107
"url": "https://www.ica.se/icas-matkasse/om-icas-matkasse/"
108108
},
109109
{
110-
"position": null,
110+
"position": 2,
111111
"title": "Familjens Favoriter",
112112
"url": "https://www.ica.se/icas-matkasse/familjens-favoriter/"
113113
},
114114
{
115-
"position": null,
115+
"position": 3,
116116
"title": "Bra att ha hemma",
117117
"url": "https://www.ica.se/icas-matkasse/bra-att-ha-hemma/"
118118
},
119119
{
120-
"position": null,
120+
"position": 4,
121121
"title": "Familjens Favoriter laktosfri",
122122
"url": "https://www.ica.se/icas-matkasse/familjens-favoriter-laktosfri/"
123123
},
124124
{
125-
"position": null,
125+
"position": 5,
126126
"title": "Familjens Favoriter vego",
127127
"url": "https://www.ica.se/icas-matkasse/familjens-favoriter-vego/"
128128
}

spec/files/google/2025-12-23-mobile-middagsfrid-rabattkod.json

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,32 +23,32 @@
2323
"rating": null,
2424
"site_links": [
2525
{
26-
"position": null,
26+
"position": 1,
2727
"title": "Vårt betyg",
2828
"url": "javascript:void(0)"
2929
},
3030
{
31-
"position": null,
31+
"position": 2,
3232
"title": "Varför Middagsfrid?",
3333
"url": "javascript:void(0)"
3434
},
3535
{
36-
"position": null,
36+
"position": 3,
3737
"title": "Hur beställer jag från...",
3838
"url": "javascript:void(0)"
3939
},
4040
{
41-
"position": null,
41+
"position": 4,
4242
"title": "Matkassar från Middagsfrid",
4343
"url": "javascript:void(0)"
4444
},
4545
{
46-
"position": null,
46+
"position": 5,
4747
"title": "Hur mycket kostar det?",
4848
"url": "javascript:void(0)"
4949
},
5050
{
51-
"position": null,
51+
"position": 6,
5252
"title": "Leveransalternativ",
5353
"url": "javascript:void(0)"
5454
}

0 commit comments

Comments
 (0)