@@ -48,8 +48,8 @@ def _parse_address(self) -> str:
4848
4949 def _parse_main_link (self ) -> str :
5050 """Extract main property link from property card."""
51- link_element = self .card .find ("a" , class_ = "property-card- link" , attrs = {"data-test" : "property-card- link" })
52- if not isinstance (link_element , Tag ):
51+ link_element = self .card .find ("a" , class_ = re . compile ( "property.+- link" ) , attrs = {"data-test" : re . compile ( "property.+- link" ) })
52+ if not isinstance (link_element , Tag ) or not link_element . get ( "href" ) :
5353 return ""
5454
5555 href = cast ("str" , link_element .get ("href" , "" )).strip ()
@@ -118,11 +118,11 @@ def _format_price_range(self, prices: list[str]) -> str | None:
118118
119119 def _get_units_count (self ) -> int :
120120 """Extract number of available units."""
121- badge_area = self .card .find ("div" , class_ = re . compile ( r"StyledPropertyCardBadgeArea" ) )
121+ badge_area = self .card .find ("div" , attrs = { "data-c11n-component" : "PropertyCard.BadgeArea" } )
122122 if not badge_area or isinstance (badge_area , NavigableString ):
123123 return 1
124124
125- badges = badge_area .find_all ("span" , class_ = re . compile ( r"StyledPropertyCardBadge" ) )
125+ badges = badge_area .find_all ("span" , attrs = { "data-c11n-component" : "PropertyCard.Badge" } )
126126 for badge in badges :
127127 badge_text = badge .get_text (strip = True ).lower ()
128128 unit_match = self ._PATTERN_UNIT_COUNT .search (badge_text )
@@ -151,7 +151,23 @@ def _get_main_price_listings(self) -> list[PropertyListing]:
151151 if not main_price_element :
152152 return []
153153
154- price_text = self ._clean_price_text (main_price_element .get_text (strip = True ))
154+ # The price container can contains multiple spans like "Fees may apply".
155+ # get_text() on the outer span concatenates them without spaces, corrupting
156+ # the price text (e.g. "$1,608+ 2 bdsFees may apply"). Instead, grab only
157+ # the first nested span which contains the actual price string.
158+ inner_span = main_price_element .find ("span" )
159+ if inner_span and not isinstance (inner_span , Tag ):
160+ msg = f"inner_span type is incorrect: { type (inner_span )} "
161+ raise TypeError (msg )
162+
163+ price_span = inner_span .find ("span" ) if inner_span else None
164+ if price_span and not isinstance (price_span , Tag ):
165+ msg = f"price_span type is incorrect: { type (price_span )} "
166+ raise TypeError (msg )
167+
168+ raw_text = (price_span or inner_span or main_price_element ).get_text (strip = True )
169+
170+ price_text = self ._clean_price_text (raw_text )
155171 if not price_text :
156172 return []
157173
@@ -170,22 +186,32 @@ def _get_inventory_listings(self) -> list[PropertyListing]:
170186 if not inventory_section or isinstance (inventory_section , NavigableString ):
171187 return []
172188
173- # Extract price and bedroom data
174- price_elements = inventory_section .find_all ("span" , class_ = re .compile (r"PriceText" ))
175- bed_elements = inventory_section .find_all ("span" , class_ = re .compile (r"BedText" ))
189+ price_bed_pairs : list [tuple [str , str , str ]] = [] # (price, bed_info, link)
190+ for anchor in inventory_section .find_all ("a" ):
191+ box = anchor .find ("div" , attrs = {"data-testid" : "PropertyCardInventoryBox" })
192+ if not box or isinstance (box , NavigableString ):
193+ continue
194+
195+ spans = box .find_all ("span" )
196+ if not spans :
197+ continue
198+
199+ price_text = self ._clean_price_text (spans [0 ].get_text (strip = True ))
200+ if not price_text :
201+ continue
202+
203+ bed_info = spans [1 ].get_text (strip = True ) if len (spans ) > 1 else ""
204+
205+ href = cast ("str" , anchor .get ("href" , "" )).strip ()
206+ link = href if href .startswith ("http" ) else f"https://www.zillow.com{ href } "
176207
177- price_bed_pairs = []
178- for i , price_elem in enumerate (price_elements ):
179- price_text = self ._clean_price_text (price_elem .get_text (strip = True ))
180- if price_text :
181- bed_info = bed_elements [i ].get_text (strip = True ) if i < len (bed_elements ) else ""
182- price_bed_pairs .append ((price_text , bed_info ))
208+ price_bed_pairs .append ((price_text , bed_info , link ))
183209
184210 units_count = self ._get_units_count ()
185211
186212 # Handle multiple units with price range
187213 if units_count > 1 and len (price_bed_pairs ) > 1 :
188- prices = [price for price , _ in price_bed_pairs ]
214+ prices = [price for price , _ , __ in price_bed_pairs ]
189215 price_range = cast ("str" , self ._format_price_range (prices ))
190216
191217 # Calculate median of the range
@@ -199,15 +225,14 @@ def _get_inventory_listings(self) -> list[PropertyListing]:
199225
200226 # Create individual listings
201227 listings = []
202- for price , bed_info in price_bed_pairs :
228+ for price , bed_info , link in price_bed_pairs :
203229 address = self .address + (f" ({ bed_info } )" if bed_info else "" )
204- specific_link = self ._create_specific_link (bed_info )
205230
206231 # For individual listings, median is same as price
207232 numeric_price = self ._extract_numeric_price (price )
208233 median_price = str (numeric_price ) if numeric_price else price
209234
210- listings .append (PropertyListing (address , price , median_price , specific_link ))
235+ listings .append (PropertyListing (address , price , median_price , link ))
211236
212237 return listings
213238
0 commit comments