Skip to content

Commit 059712b

Browse files
get vin data in scrapes
1 parent 4830a78 commit 059712b

File tree

3 files changed

+77
-12
lines changed

3 files changed

+77
-12
lines changed

scrapers/carmax/scrape.js

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,27 @@ class CarMaxScraper extends BaseScraper {
9595
// Wait a bit for content to render
9696
await new Promise(resolve => setTimeout(resolve, 1000));
9797

98-
// Get page HTML and parse
98+
// Get page HTML
9999
const html = await this.page.content();
100+
101+
// Extract VIN data from JavaScript array: const cars = [{"stockNumber":...,"vin":"..."}...]
102+
const vinMap = new Map();
103+
const carsMatch = html.match(/const cars = (\[.*?\]);/s);
104+
if (carsMatch) {
105+
try {
106+
const carsData = JSON.parse(carsMatch[1]);
107+
for (const car of carsData) {
108+
if (car.stockNumber && car.vin) {
109+
vinMap.set(car.stockNumber.toString(), car.vin);
110+
}
111+
}
112+
} catch (e) {
113+
// Failed to parse, continue without VINs
114+
}
115+
}
116+
100117
const $ = cheerio.load(html);
101-
const pageListings = parseListings($, query.make, query.model);
118+
const pageListings = parseListings($, query.make, query.model, vinMap);
102119

103120
// Deduplicate - only add listings we haven't seen before
104121
for (const listing of pageListings) {
@@ -162,7 +179,7 @@ function buildSearchUrl(make, model) {
162179
return `https://www.carmax.com/cars/${makeSlug}/${modelSlug}`;
163180
}
164181

165-
function parseListings($, make, model) {
182+
function parseListings($, make, model, vinMap = new Map()) {
166183
const listings = [];
167184

168185
// CarMax car tiles
@@ -177,6 +194,9 @@ function parseListings($, make, model) {
177194
return; // Skip listings without valid IDs
178195
}
179196

197+
// Get VIN from map
198+
const vin = vinMap.get(stockId) || null;
199+
180200
// Extract price
181201
const priceText = $card.find('.scct--price-miles-info--price').text().trim();
182202
const price = parseInt(priceText.replace(/[$,*]/g, ''));
@@ -229,6 +249,7 @@ function parseListings($, make, model) {
229249

230250
listings.push({
231251
id: stockId,
252+
vin,
232253
make,
233254
model,
234255
year,

scrapers/carvana/scrape.js

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -97,13 +97,55 @@ class CarvanaScraper extends BaseScraper {
9797

9898
await new Promise(resolve => setTimeout(resolve, 1000));
9999

100-
// Get page HTML and parse
101-
const html = await this.page.content();
102-
const $ = cheerio.load(html);
103-
104-
const pageListings = parseListings($, query.make, query.model);
105-
106-
allListings.push(...pageListings);
100+
// Extract vehicle data from __NEXT_DATA__ script tag
101+
const pageListings = await this.page.evaluate((make, model) => {
102+
const script = document.getElementById('__NEXT_DATA__');
103+
if (!script) return [];
104+
105+
try {
106+
const data = JSON.parse(script.textContent);
107+
const vehiclesArray = data?.props?.pageProps?.forProviders?.forInventoryContext?.inventoryData?.inventory?.vehicles || [];
108+
109+
return vehiclesArray.map(v => ({
110+
vin: v.vin,
111+
year: v.year,
112+
make: v.make,
113+
model: v.model,
114+
trim: v.trim,
115+
price: v.price?.total,
116+
mileage: v.mileage,
117+
vehicleId: v.id
118+
}));
119+
} catch (e) {
120+
return [];
121+
}
122+
}, query.make, query.model);
123+
124+
// Filter to only matching make/model and format
125+
const formattedListings = pageListings
126+
.filter(v => {
127+
if (!v.vin) return false;
128+
const makeLower = v.make?.toLowerCase() || '';
129+
const modelLower = v.model?.toLowerCase() || '';
130+
const queryMakeLower = query.make.toLowerCase();
131+
const queryModelLower = query.model.toLowerCase();
132+
return makeLower.includes(queryMakeLower) && modelLower.includes(queryModelLower);
133+
})
134+
.map(v => ({
135+
id: `carvana-${v.vehicleId}`,
136+
vin: v.vin,
137+
make: query.make,
138+
model: query.model,
139+
year: v.year,
140+
trim: v.trim,
141+
price: v.price || null,
142+
mileage: v.mileage || null,
143+
location: 'Carvana',
144+
url: `https://www.carvana.com/vehicle/${v.vehicleId}`,
145+
listing_date: new Date().toISOString().split('T')[0]
146+
}));
147+
148+
allListings.push(...formattedListings);
107149

108150
// Stop if we've reached the target count
109151
if (allListings.length >= targetCount) {

scrapers/plattauto/scrape.js

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,9 +161,10 @@ function parseListings($, make, model) {
161161
const urlPath = linkElement.attr('href');
162162
const url = urlPath ? (urlPath.startsWith('http') ? urlPath : `https://www.plattauto.com${urlPath}`) : null;
163163

164-
// Extract stock number or VIN for ID - required for tracking
164+
// Extract stock number and VIN
165165
const stockText = $card.find('.dws-vehicle-field-stock-number').text().trim().replace(/\s+/g, ' ');
166-
const vinText = $card.find('.dws-vehicle-field-vin').text().trim().replace(/\s+/g, ' ');
166+
const vinText = $card.find('.dws-vehicle-field-vin').text().trim().replace(/\s+/g, ' ').replace(/^VIN\s+/i, '');
167+
const vin = vinText || null;
167168
const id = stockText || vinText;
168169
if (!id) {
169170
console.error(` ⚠ Warning: Could not extract stock number or VIN from listing`);
@@ -178,6 +179,7 @@ function parseListings($, make, model) {
178179
seenIds.add(id);
179180
listings.push({
180181
id,
182+
vin,
181183
make,
182184
model,
183185
year,

0 commit comments

Comments
 (0)