Skip to content

Commit bf111e1

Browse files
clucraftclaude
andcommitted
Fix Amazon scraper picking up coupon prices instead of product price
- Add detection for coupon/savings containers and skip prices within them - Check parent elements for coupon-related IDs, classes, and text - Add minimum price threshold of $2 (coupons are typically $1-5) - Add fallback to parse Amazon's whole/fraction price format directly - Increase findMostLikelyPrice threshold from $0.99 to $5 This fixes the issue where $1 coupon savings were being scraped instead of the actual $25.99 product price. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent ba9e52b commit bf111e1

File tree

2 files changed

+106
-23
lines changed

2 files changed

+106
-23
lines changed

backend/src/services/scraper.ts

Lines changed: 91 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,26 +24,101 @@ const siteScrapers: SiteScraper[] = [
2424
{
2525
match: (url) => /amazon\.(com|co\.uk|ca|de|fr|es|it|co\.jp|in|com\.au)/i.test(url),
2626
scrape: ($) => {
27-
// Price selectors in order of preference (sale price first)
28-
const priceSelectors = [
29-
'#corePrice_feature_div .a-price .a-offscreen',
30-
'#corePriceDisplay_desktop_feature_div .a-price .a-offscreen',
31-
'#priceblock_dealprice',
32-
'#priceblock_saleprice',
33-
'#priceblock_ourprice',
34-
'.a-price .a-offscreen',
35-
'#price_inside_buybox',
36-
'#newBuyBoxPrice',
37-
'span[data-a-color="price"] .a-offscreen',
27+
// Helper to check if element is inside a coupon/savings container
28+
const isInCouponContainer = (el: ReturnType<typeof $>) => {
29+
const parents = el.parents().toArray();
30+
for (const parent of parents) {
31+
const id = $(parent).attr('id') || '';
32+
const className = $(parent).attr('class') || '';
33+
const text = $(parent).text().toLowerCase();
34+
if (/coupon|savings|save\s*\$|clipcoupon|promoprice/i.test(id + className)) {
35+
return true;
36+
}
37+
// Check if the immediate container mentions "save" or "coupon"
38+
if (text.includes('save $') || text.includes('coupon') || text.includes('clip')) {
39+
// Only consider it a coupon if it's a small container
40+
if (text.length < 100) return true;
41+
}
42+
}
43+
return false;
44+
};
45+
46+
// Try to get the main displayed price from specific containers first
47+
// These are the primary price display areas on Amazon
48+
const primaryPriceContainers = [
49+
'#corePrice_feature_div',
50+
'#corePriceDisplay_desktop_feature_div',
51+
'#apex_desktop_newAccordionRow',
52+
'#apex_offerDisplay_desktop',
3853
];
3954

4055
let price: ParsedPrice | null = null;
41-
for (const selector of priceSelectors) {
42-
const el = $(selector).first();
43-
if (el.length) {
56+
57+
// First, try the primary price containers
58+
for (const containerId of primaryPriceContainers) {
59+
const container = $(containerId);
60+
if (!container.length) continue;
61+
62+
// Look for the main price display (not savings/coupons)
63+
const priceElements = container.find('.a-price .a-offscreen');
64+
65+
for (let i = 0; i < priceElements.length; i++) {
66+
const el = $(priceElements[i]);
67+
68+
// Skip if this is inside a coupon container
69+
if (isInCouponContainer(el)) continue;
70+
71+
// Skip if the parent has "savings" or similar class
72+
const parentClass = el.parent().attr('class') || '';
73+
if (/savings|coupon|save/i.test(parentClass)) continue;
74+
4475
const text = el.text().trim();
45-
price = parsePrice(text);
46-
if (price) break;
76+
const parsed = parsePrice(text);
77+
78+
// Validate the price is reasonable (not a $1 coupon)
79+
if (parsed && parsed.price >= 2) {
80+
price = parsed;
81+
break;
82+
}
83+
}
84+
85+
if (price) break;
86+
}
87+
88+
// Fallback: try other known price selectors
89+
if (!price) {
90+
const fallbackSelectors = [
91+
'#priceblock_dealprice',
92+
'#priceblock_saleprice',
93+
'#priceblock_ourprice',
94+
'#price_inside_buybox',
95+
'#newBuyBoxPrice',
96+
'span[data-a-color="price"] .a-offscreen',
97+
];
98+
99+
for (const selector of fallbackSelectors) {
100+
const el = $(selector).first();
101+
if (el.length && !isInCouponContainer(el)) {
102+
const text = el.text().trim();
103+
const parsed = parsePrice(text);
104+
if (parsed && parsed.price >= 2) {
105+
price = parsed;
106+
break;
107+
}
108+
}
109+
}
110+
}
111+
112+
// Last resort: look for the whole/fraction price format
113+
if (!price) {
114+
const whole = $('#corePrice_feature_div .a-price-whole').first().text().replace(',', '');
115+
const fraction = $('#corePrice_feature_div .a-price-fraction').first().text();
116+
if (whole) {
117+
const priceStr = `$${whole}${fraction ? '.' + fraction : ''}`;
118+
const parsed = parsePrice(priceStr);
119+
if (parsed && parsed.price >= 2) {
120+
price = parsed;
121+
}
47122
}
48123
}
49124

backend/src/utils/priceParser.ts

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -107,15 +107,23 @@ export function findMostLikelyPrice(prices: ParsedPrice[]): ParsedPrice | null {
107107
if (prices.length === 0) return null;
108108
if (prices.length === 1) return prices[0];
109109

110-
// Filter out very small prices (likely not product prices)
111-
const validPrices = prices.filter((p) => p.price >= 0.99);
112-
113-
if (validPrices.length === 0) return prices[0];
110+
// Filter out very small prices (likely coupons, savings amounts, not actual product prices)
111+
// Most real products cost at least $2-3, and coupon amounts are often $1-5
112+
const validPrices = prices.filter((p) => p.price >= 5);
113+
114+
// If no prices above $5, try with a lower threshold but above typical coupon amounts
115+
if (validPrices.length === 0) {
116+
const lowThresholdPrices = prices.filter((p) => p.price >= 2);
117+
if (lowThresholdPrices.length > 0) {
118+
lowThresholdPrices.sort((a, b) => a.price - b.price);
119+
return lowThresholdPrices[0];
120+
}
121+
// Fall back to original list if nothing matches
122+
return prices[0];
123+
}
114124

115-
// Sort by price and pick the middle one (often the actual price)
116-
// This helps avoid picking shipping costs or discounts
125+
// Sort by price - the lowest valid price is often the sale/current price
117126
validPrices.sort((a, b) => a.price - b.price);
118127

119-
// Return the first (lowest) valid price - often the current/sale price
120128
return validPrices[0];
121129
}

0 commit comments

Comments
 (0)