@@ -82,6 +82,9 @@ def parse_product_page(response: ScrapeApiResponse) -> Dict:
8282 """parse hidden product data from product pages"""
8383 selector = response .selector
8484 script = selector .xpath ("//script[contains(text(),'offers')]/text()" ).get ()
85+ if not script :
86+ log .warning (f"Could not find product data script on { response .context ['url' ]} " )
87+ return {}
8588 data = json .loads (script )
8689 return data
8790
@@ -90,9 +93,15 @@ def parse_shop_page(response: ScrapeApiResponse) -> Dict:
9093 """parse hidden shop data from shop pages"""
9194 selector = response .selector
9295 script = selector .xpath ("//script[contains(text(),'itemListElement')]/text()" ).get ()
93- data = json .loads (script )
94- return data
95-
96+ if not script :
97+ log .warning (f"Could not find shop data script on { response .context ['url' ]} " )
98+ return {}
99+ try :
100+ data = json .loads (script )
101+ return data
102+ except json .JSONDecodeError as e :
103+ log .error (f"Failed to parse JSON from shop page { response .context ['url' ]} : { e } " )
104+ return {}
96105
97106async def scrape_search (url : str , max_pages : int = None ) -> List [Dict ]:
98107 """scrape product listing data from Etsy search pages"""
@@ -159,7 +168,7 @@ async def scrape_shop(urls: List[str]) -> List[Dict]:
159168 # scrape all the shop pages concurrently
160169 async for response in SCRAPFLY .concurrent_scrape (to_scrape ):
161170 data = parse_shop_page (response )
162- data [' url' ] = response .context [' url' ]
171+ data [" url" ] = response .context [" url" ]
163172 shops .append (data )
164173 log .success (f"scraped { len (shops )} shops from shop pages" )
165174 return shops
0 commit comments