Skip to content

Commit 3a3291a

Browse files
clucraftclaude
andcommitted
Fix Amazon multi-price extraction and prevent AI override
Amazon products can have multiple prices: - Main buy box price - "Other Sellers" prices - Subscribe & Save prices - New & Used prices The scraper now extracts ALL prices, not just the main one. This allows anchor price matching to find the specific price the user selected. Also fixed AI verification overriding user's anchor price selection. When user deliberately chose a price (e.g., "other sellers" at $52.91), don't let AI "correct" it to the main buy box price ($79.99). Changes: - Amazon scraper now collects all price variants - extractSiteSpecificCandidates handles allPrices array - Anchor matching now always returns (no fall-through to AI override) - Increased anchor tolerance from 10% to 15% for small sales - Added debug logging showing all candidate prices Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 8131017 commit 3a3291a

File tree

1 file changed

+121
-47
lines changed

1 file changed

+121
-47
lines changed

backend/src/services/scraper.ts

Lines changed: 121 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -132,14 +132,33 @@ function extractSiteSpecificCandidates($: CheerioAPI, url: string): { candidates
132132

133133
const siteScraper = siteScrapers.find((s) => s.match(url));
134134
if (siteScraper) {
135-
const siteResult = siteScraper.scrape($, url);
136-
if (siteResult.price) {
135+
const siteResult = siteScraper.scrape($, url) as {
136+
price?: ParsedPrice | null;
137+
name?: string | null;
138+
imageUrl?: string | null;
139+
stockStatus?: StockStatus;
140+
allPrices?: ParsedPrice[]; // Some scrapers return multiple prices (e.g., Amazon)
141+
};
142+
143+
// If scraper returned multiple prices, add them all as candidates
144+
if (siteResult.allPrices && siteResult.allPrices.length > 0) {
145+
for (const p of siteResult.allPrices) {
146+
candidates.push({
147+
price: p.price,
148+
currency: p.currency,
149+
method: 'site-specific',
150+
context: `Site-specific extractor for ${new URL(url).hostname}`,
151+
confidence: 0.85,
152+
});
153+
}
154+
} else if (siteResult.price) {
155+
// Single price result
137156
candidates.push({
138157
price: siteResult.price.price,
139158
currency: siteResult.price.currency,
140159
method: 'site-specific',
141160
context: `Site-specific extractor for ${new URL(url).hostname}`,
142-
confidence: 0.85, // Site-specific scrapers are well-tested
161+
confidence: 0.85,
143162
});
144163
}
145164
name = siteResult.name || null;
@@ -336,85 +355,129 @@ const siteScrapers: SiteScraper[] = [
336355
return false;
337356
};
338357

339-
// Try to get the main displayed price from specific containers first
340-
// These are the primary price display areas on Amazon
358+
// Collect ALL prices found on the page (for variant/seller support)
359+
const allPrices: ParsedPrice[] = [];
360+
const seenPrices = new Set<number>();
361+
362+
const addPrice = (parsed: ParsedPrice | null) => {
363+
if (parsed && parsed.price >= 2 && !seenPrices.has(parsed.price)) {
364+
seenPrices.add(parsed.price);
365+
allPrices.push(parsed);
366+
}
367+
};
368+
369+
// 1. Main buy box price
341370
const primaryPriceContainers = [
342371
'#corePrice_feature_div',
343372
'#corePriceDisplay_desktop_feature_div',
344373
'#apex_desktop_newAccordionRow',
345374
'#apex_offerDisplay_desktop',
346375
];
347376

348-
let price: ParsedPrice | null = null;
377+
let mainPrice: ParsedPrice | null = null;
349378

350-
// First, try the primary price containers
351379
for (const containerId of primaryPriceContainers) {
352380
const container = $(containerId);
353381
if (!container.length) continue;
354382

355-
// Look for the main price display (not savings/coupons)
356383
const priceElements = container.find('.a-price .a-offscreen');
357384

358385
for (let i = 0; i < priceElements.length; i++) {
359386
const el = $(priceElements[i]);
360-
361-
// Skip if this is inside a coupon container
362387
if (isInCouponContainer(el)) continue;
363388

364-
// Skip if the parent has "savings" or similar class
365389
const parentClass = el.parent().attr('class') || '';
366390
if (/savings|coupon|save/i.test(parentClass)) continue;
367391

368392
const text = el.text().trim();
369393
const parsed = parsePrice(text);
370394

371-
// Validate the price is reasonable (not a $1 coupon)
372395
if (parsed && parsed.price >= 2) {
373-
price = parsed;
374-
break;
396+
if (!mainPrice) mainPrice = parsed;
397+
addPrice(parsed);
375398
}
376399
}
400+
}
377401

378-
if (price) break;
402+
// 2. "Other Sellers" / "New & Used" prices
403+
// Look for "Other Sellers on Amazon" section
404+
const otherSellersSelectors = [
405+
'#aod-offer-price .a-offscreen', // "All Offers" display
406+
'#olp-upd-new .a-color-price', // "New from $X"
407+
'#olp-upd-used .a-color-price', // "Used from $X"
408+
'#usedBuySection .a-color-price',
409+
'#newBuySection .a-color-price',
410+
'.olp-from-new-price',
411+
'.olp-from-used-price',
412+
'#buyNew_noncbb .a-color-price', // "Buy New" non-buy-box
413+
];
414+
415+
for (const selector of otherSellersSelectors) {
416+
$(selector).each((_, el) => {
417+
const text = $(el).text().trim();
418+
addPrice(parsePrice(text));
419+
});
379420
}
380421

381-
// Fallback: try other known price selectors
382-
if (!price) {
383-
const fallbackSelectors = [
384-
'#priceblock_dealprice',
385-
'#priceblock_saleprice',
386-
'#priceblock_ourprice',
387-
'#price_inside_buybox',
388-
'#newBuyBoxPrice',
389-
'span[data-a-color="price"] .a-offscreen',
390-
];
422+
// 3. "New & Used from $X" link text
423+
const newUsedLink = $('#usedAndNewBuySection, #newUsedBuyBox, [id*="olp"]').text();
424+
const newUsedMatch = newUsedLink.match(/\$[\d,]+\.?\d*/g);
425+
if (newUsedMatch) {
426+
for (const priceStr of newUsedMatch) {
427+
addPrice(parsePrice(priceStr));
428+
}
429+
}
391430

392-
for (const selector of fallbackSelectors) {
393-
const el = $(selector).first();
394-
if (el.length && !isInCouponContainer(el)) {
395-
const text = el.text().trim();
396-
const parsed = parsePrice(text);
397-
if (parsed && parsed.price >= 2) {
398-
price = parsed;
399-
break;
400-
}
431+
// 4. Subscribe & Save price
432+
const snsPrice = $('#subscribeAndSavePrice, #sns-price, .sns-price-block .a-offscreen').first().text();
433+
if (snsPrice) {
434+
addPrice(parsePrice(snsPrice));
435+
}
436+
437+
// 5. Fallback selectors
438+
const fallbackSelectors = [
439+
'#priceblock_dealprice',
440+
'#priceblock_saleprice',
441+
'#priceblock_ourprice',
442+
'#price_inside_buybox',
443+
'#newBuyBoxPrice',
444+
'span[data-a-color="price"] .a-offscreen',
445+
];
446+
447+
for (const selector of fallbackSelectors) {
448+
const el = $(selector).first();
449+
if (el.length && !isInCouponContainer(el)) {
450+
const text = el.text().trim();
451+
const parsed = parsePrice(text);
452+
if (parsed && parsed.price >= 2) {
453+
if (!mainPrice) mainPrice = parsed;
454+
addPrice(parsed);
401455
}
402456
}
403457
}
404458

405-
// Last resort: look for the whole/fraction price format
406-
if (!price) {
459+
// 6. Whole/fraction price format
460+
if (!mainPrice) {
407461
const whole = $('#corePrice_feature_div .a-price-whole').first().text().replace(',', '');
408462
const fraction = $('#corePrice_feature_div .a-price-fraction').first().text();
409463
if (whole) {
410464
const priceStr = `$${whole}${fraction ? '.' + fraction : ''}`;
411465
const parsed = parsePrice(priceStr);
412466
if (parsed && parsed.price >= 2) {
413-
price = parsed;
467+
mainPrice = parsed;
468+
addPrice(parsed);
414469
}
415470
}
416471
}
417472

473+
// Use main price as the primary, but we've collected all prices for candidate matching
474+
const price = mainPrice;
475+
476+
// Log what we found for debugging
477+
if (allPrices.length > 1) {
478+
console.log(`[Amazon] Found ${allPrices.length} prices: ${allPrices.map(p => p.price).join(', ')}`);
479+
}
480+
418481
// Product name
419482
const name = $('#productTitle').text().trim() ||
420483
$('h1.a-size-large').text().trim() ||
@@ -432,15 +495,13 @@ const siteScrapers: SiteScraper[] = [
432495
const outOfStockDiv = $('#outOfStock').length > 0;
433496
const unavailableText = $('body').text().toLowerCase();
434497

435-
// Check for out of stock indicators
436498
if (
437499
outOfStockDiv ||
438500
availabilityText.includes('currently unavailable') ||
439501
availabilityText.includes('out of stock') ||
440502
availabilityText.includes('not available') ||
441-
$('#add-to-cart-button').length === 0 && $('#buy-now-button').length === 0
503+
($('#add-to-cart-button').length === 0 && $('#buy-now-button').length === 0)
442504
) {
443-
// Verify it's truly out of stock by checking for unavailable messaging
444505
if (
445506
unavailableText.includes('currently unavailable') ||
446507
unavailableText.includes("we don't know when or if this item will be back in stock") ||
@@ -457,7 +518,7 @@ const siteScrapers: SiteScraper[] = [
457518
stockStatus = 'in_stock';
458519
}
459520

460-
return { name, price, imageUrl, stockStatus };
521+
return { name, price, imageUrl, stockStatus, allPrices };
461522
},
462523
},
463524

@@ -1429,10 +1490,13 @@ export async function scrapeProductWithVoting(
14291490
// Store all candidates
14301491
result.priceCandidates = allCandidates;
14311492

1493+
// Track if we used anchor price (to prevent AI from overriding user's choice)
1494+
let usedAnchorPrice = false;
1495+
14321496
// PRIORITY 1: If we have an anchor price, it takes precedence (user confirmed this price)
14331497
// This handles variant products where multiple prices exist on the page
14341498
if (anchorPrice && allCandidates.length > 0) {
1435-
console.log(`[Voting] Have anchor price ${anchorPrice}, searching ${allCandidates.length} candidates...`);
1499+
console.log(`[Voting] Have anchor price ${anchorPrice}, searching ${allCandidates.length} candidates: ${allCandidates.map(c => c.price).join(', ')}`);
14361500

14371501
// Find the candidate closest to the anchor price
14381502
const closestCandidate = allCandidates.reduce((closest, candidate) => {
@@ -1443,17 +1507,27 @@ export async function scrapeProductWithVoting(
14431507

14441508
const priceDiff = Math.abs(closestCandidate.price - anchorPrice) / anchorPrice;
14451509

1446-
// Use anchor matching if within 10% (tight tolerance for variants)
1510+
// Use anchor matching if within 15% (allows for small sales)
14471511
// or if it's an exact match
1448-
if (closestCandidate.price === anchorPrice || priceDiff < 0.10) {
1512+
if (closestCandidate.price === anchorPrice || priceDiff < 0.15) {
14491513
console.log(`[Voting] Found match for anchor price ${anchorPrice}: ${closestCandidate.price} via ${closestCandidate.method} (${(priceDiff * 100).toFixed(1)}% diff)`);
14501514
result.price = { price: closestCandidate.price, currency: closestCandidate.currency };
14511515
result.selectedMethod = closestCandidate.method;
1516+
usedAnchorPrice = true;
1517+
result.aiStatus = 'verified'; // Mark as verified to skip AI override
14521518
return result;
14531519
} else {
1454-
// No close match - price may have legitimately changed
1455-
console.log(`[Voting] No candidate close to anchor price ${anchorPrice} (closest: ${closestCandidate.price}, ${(priceDiff * 100).toFixed(1)}% diff)`);
1456-
// Fall through to preferred method or consensus
1520+
// No close match - still use the closest candidate
1521+
// This prevents AI from picking a completely different price (like main buy box vs other sellers)
1522+
console.log(`[Voting] No close match for anchor ${anchorPrice}, using closest: ${closestCandidate.price} (${(priceDiff * 100).toFixed(1)}% diff) - may be a price change`);
1523+
result.price = { price: closestCandidate.price, currency: closestCandidate.currency };
1524+
result.selectedMethod = closestCandidate.method;
1525+
usedAnchorPrice = true;
1526+
// IMPORTANT: Mark as verified to prevent AI from overriding user's deliberate choice
1527+
// The user selected a specific price (e.g., "other sellers" on Amazon), don't let AI
1528+
// "correct" it to the main buy box price
1529+
result.aiStatus = 'verified';
1530+
return result;
14571531
}
14581532
}
14591533

0 commit comments

Comments
 (0)