@@ -96,11 +96,28 @@ class CarMaxScraper extends BaseScraper {
9696 }
9797 } ) ;
9898
99- await this . page . goto ( searchUrl , {
99+ const response = await this . page . goto ( searchUrl , {
100100 waitUntil : 'networkidle2' ,
101101 timeout : 30000
102102 } ) ;
103103
104+ // Check for HTTP errors (403 Forbidden, etc.)
105+ if ( response && response . status ( ) >= 400 ) {
106+ throw new Error ( `HTTP ${ response . status ( ) } error - likely blocked by anti-bot protection` ) ;
107+ }
108+
109+ // Check page content for bot detection (but ignore meta robots tag)
110+ const html = await this . page . content ( ) ;
111+ const bodyText = html . toLowerCase ( ) ;
112+ // Check for actual bot detection messages, but exclude meta robots tag
113+ const hasAccessDenied = bodyText . includes ( 'access denied' ) || bodyText . includes ( 'access blocked' ) ;
114+ const hasCaptcha = bodyText . includes ( 'captcha' ) ;
115+ const hasRobotBlock = bodyText . includes ( 'detected as a robot' ) || bodyText . includes ( 'automated access' ) ;
116+
117+ if ( hasAccessDenied || hasCaptcha || hasRobotBlock ) {
118+ throw new Error ( 'Bot detection triggered - access denied or CAPTCHA page' ) ;
119+ }
120+
104121 // Wait for page body and give time for dynamic content
105122 await this . page . waitForSelector ( 'body' , { timeout : 5000 } ) ;
106123 await new Promise ( resolve => setTimeout ( resolve , 2000 ) ) ;
@@ -118,29 +135,71 @@ class CarMaxScraper extends BaseScraper {
118135 // Get page HTML
119136 const html = await this . page . content ( ) ;
120137
121- // Extract VIN data from JavaScript array: const cars = [{"stockNumber":...,"vin":"..."}...]
122- const vinMap = new Map ( ) ;
123- const carsMatch = html . match ( / c o n s t c a r s = ( \[ .* ?\] ) ; / s) ;
124- if ( carsMatch ) {
138+ // CarMax now uses const searchResponse = {...} with items array
139+ const searchResponseMatch = html . match ( / c o n s t s e a r c h R e s p o n s e = ( \{ [ \s \S ] * ?\} ) ; / ) ;
140+ let pageListings = [ ] ;
141+
142+ if ( searchResponseMatch ) {
125143 try {
126- const carsData = JSON . parse ( carsMatch [ 1 ] ) ;
127- for ( const car of carsData ) {
128- if ( car . stockNumber && car . vin ) {
129- vinMap . set ( car . stockNumber . toString ( ) , car . vin ) ;
130- }
144+ const searchResponse = JSON . parse ( searchResponseMatch [ 1 ] ) ;
145+ if ( searchResponse . items && Array . isArray ( searchResponse . items ) ) {
146+ const queryModelLower = query . model . toLowerCase ( ) ;
147+
148+ pageListings = searchResponse . items
149+ . filter ( item => {
150+ // Filter to only items matching our query model (allows "ID.4 EV" when searching for "ID.4")
151+ const itemModelLower = ( item . model || '' ) . toLowerCase ( ) ;
152+ return itemModelLower . includes ( queryModelLower ) || queryModelLower . includes ( itemModelLower ) ;
153+ } )
154+ . map ( item => ( {
155+ id : item . stockNumber . toString ( ) ,
156+ vin : item . vin ,
157+ make : item . make ,
158+ model : query . model , // Use query model for consistency with validation
159+ year : item . year ,
160+ trim : item . trim || 'Base' , // Use 'Base' if trim is null/empty
161+ price : item . basePrice ,
162+ mileage : item . mileage ,
163+ location : 'CarMax' ,
164+ url : `https://www.carmax.com/car/${ item . stockNumber } ` ,
165+ listing_date : new Date ( ) . toISOString ( ) . split ( 'T' ) [ 0 ]
166+ } ) ) ;
131167 }
132168 } catch ( e ) {
133- // Failed to parse, continue without VINs
169+ console . error ( ` ⚠ Error parsing searchResponse JSON:` , e . message ) ;
170+ }
171+ } else {
172+ // Fallback to old HTML parsing method
173+ const vinMap = new Map ( ) ;
174+ const carsMatch = html . match ( / c o n s t c a r s = ( \[ .* ?\] ) ; / s) ;
175+ if ( carsMatch ) {
176+ try {
177+ const carsData = JSON . parse ( carsMatch [ 1 ] ) ;
178+ for ( const car of carsData ) {
179+ if ( car . stockNumber && car . vin ) {
180+ vinMap . set ( car . stockNumber . toString ( ) , car . vin ) ;
181+ }
182+ }
183+ } catch ( e ) {
184+ // Failed to parse
185+ }
186+ }
187+
188+ // Merge API VIN data
189+ for ( const [ stockNumber , vin ] of apiVinData ) {
190+ vinMap . set ( stockNumber , vin ) ;
134191 }
135- }
136192
137- // Merge API VIN data (from pagination) with static VIN data
138- for ( const [ stockNumber , vin ] of apiVinData ) {
139- vinMap . set ( stockNumber , vin ) ;
193+ const $ = cheerio . load ( html ) ;
194+ pageListings = parseListings ( $ , query . make , query . model , vinMap ) ;
140195 }
141196
142- const $ = cheerio . load ( html ) ;
143- const pageListings = parseListings ( $ , query . make , query . model , vinMap ) ;
197+ // If page 1 has 0 results, stop immediately (model doesn't exist on CarMax)
198+ if ( pageNum === 1 && pageListings . length === 0 ) {
199+ console . log ( ` ℹ No listings found for ${ query . make } ${ query . model } on CarMax` ) ;
200+ hasMorePages = false ;
201+ break ;
202+ }
144203
145204 // Deduplicate - only add listings we haven't seen before
146205 for ( const listing of pageListings ) {
0 commit comments