Skip to content

Commit 10cdc5a

Browse files
fix carmax scrapper
1 parent 2d96b61 commit 10cdc5a

File tree

1 file changed

+76
-17
lines changed

1 file changed

+76
-17
lines changed

scrapers/carmax/scrape.js

Lines changed: 76 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,28 @@ class CarMaxScraper extends BaseScraper {
9696
}
9797
});
9898

99-
await this.page.goto(searchUrl, {
99+
const response = await this.page.goto(searchUrl, {
100100
waitUntil: 'networkidle2',
101101
timeout: 30000
102102
});
103103

104+
// Check for HTTP errors (403 Forbidden, etc.)
105+
if (response && response.status() >= 400) {
106+
throw new Error(`HTTP ${response.status()} error - likely blocked by anti-bot protection`);
107+
}
108+
109+
// Check page content for bot detection (but ignore meta robots tag)
110+
const html = await this.page.content();
111+
const bodyText = html.toLowerCase();
112+
// Check for actual bot detection messages, but exclude meta robots tag
113+
const hasAccessDenied = bodyText.includes('access denied') || bodyText.includes('access blocked');
114+
const hasCaptcha = bodyText.includes('captcha');
115+
const hasRobotBlock = bodyText.includes('detected as a robot') || bodyText.includes('automated access');
116+
117+
if (hasAccessDenied || hasCaptcha || hasRobotBlock) {
118+
throw new Error('Bot detection triggered - access denied or CAPTCHA page');
119+
}
120+
104121
// Wait for page body and give time for dynamic content
105122
await this.page.waitForSelector('body', { timeout: 5000 });
106123
await new Promise(resolve => setTimeout(resolve, 2000));
@@ -118,29 +135,71 @@ class CarMaxScraper extends BaseScraper {
118135
// Get page HTML
119136
const html = await this.page.content();
120137

121-
// Extract VIN data from JavaScript array: const cars = [{"stockNumber":...,"vin":"..."}...]
122-
const vinMap = new Map();
123-
const carsMatch = html.match(/const cars = (\[.*?\]);/s);
124-
if (carsMatch) {
138+
// CarMax now uses const searchResponse = {...} with items array
139+
const searchResponseMatch = html.match(/const searchResponse = (\{[\s\S]*?\});/);
140+
let pageListings = [];
141+
142+
if (searchResponseMatch) {
125143
try {
126-
const carsData = JSON.parse(carsMatch[1]);
127-
for (const car of carsData) {
128-
if (car.stockNumber && car.vin) {
129-
vinMap.set(car.stockNumber.toString(), car.vin);
130-
}
144+
const searchResponse = JSON.parse(searchResponseMatch[1]);
145+
if (searchResponse.items && Array.isArray(searchResponse.items)) {
146+
const queryModelLower = query.model.toLowerCase();
147+
148+
pageListings = searchResponse.items
149+
.filter(item => {
150+
// Filter to only items matching our query model (allows "ID.4 EV" when searching for "ID.4")
151+
const itemModelLower = (item.model || '').toLowerCase();
152+
return itemModelLower.includes(queryModelLower) || queryModelLower.includes(itemModelLower);
153+
})
154+
.map(item => ({
155+
id: item.stockNumber.toString(),
156+
vin: item.vin,
157+
make: item.make,
158+
model: query.model, // Use query model for consistency with validation
159+
year: item.year,
160+
trim: item.trim || 'Base', // Use 'Base' if trim is null/empty
161+
price: item.basePrice,
162+
mileage: item.mileage,
163+
location: 'CarMax',
164+
url: `https://www.carmax.com/car/${item.stockNumber}`,
165+
listing_date: new Date().toISOString().split('T')[0]
166+
}));
131167
}
132168
} catch (e) {
133-
// Failed to parse, continue without VINs
169+
console.error(` ⚠ Error parsing searchResponse JSON:`, e.message);
170+
}
171+
} else {
172+
// Fallback to old HTML parsing method
173+
const vinMap = new Map();
174+
const carsMatch = html.match(/const cars = (\[.*?\]);/s);
175+
if (carsMatch) {
176+
try {
177+
const carsData = JSON.parse(carsMatch[1]);
178+
for (const car of carsData) {
179+
if (car.stockNumber && car.vin) {
180+
vinMap.set(car.stockNumber.toString(), car.vin);
181+
}
182+
}
183+
} catch (e) {
184+
// Failed to parse
185+
}
186+
}
187+
188+
// Merge API VIN data
189+
for (const [stockNumber, vin] of apiVinData) {
190+
vinMap.set(stockNumber, vin);
134191
}
135-
}
136192

137-
// Merge API VIN data (from pagination) with static VIN data
138-
for (const [stockNumber, vin] of apiVinData) {
139-
vinMap.set(stockNumber, vin);
193+
const $ = cheerio.load(html);
194+
pageListings = parseListings($, query.make, query.model, vinMap);
140195
}
141196

142-
const $ = cheerio.load(html);
143-
const pageListings = parseListings($, query.make, query.model, vinMap);
197+
// If page 1 has 0 results, stop immediately (model doesn't exist on CarMax)
198+
if (pageNum === 1 && pageListings.length === 0) {
199+
console.log(` ℹ No listings found for ${query.make} ${query.model} on CarMax`);
200+
hasMorePages = false;
201+
break;
202+
}
144203

145204
// Deduplicate - only add listings we haven't seen before
146205
for (const listing of pageListings) {

0 commit comments

Comments
 (0)