Skip to content

Commit 78b145f

Browse files
Copilotlurenss
andcommitted
Address code review feedback
- Add version constraint to scrapegraph-py dependency - Improve error handling with specific exceptions in close() method - Add response validation in scrape_product() and scrape_search_results() - Use safe dictionary access with isinstance checks - Add logging for exception types in error handlers Co-authored-by: lurenss <[email protected]>
1 parent 695ccda commit 78b145f

File tree

2 files changed

+46
-29
lines changed

2 files changed

+46
-29
lines changed

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# ScrapeGraphAI SDK (API-based)
2-
scrapegraph-py
2+
scrapegraph-py>=1.0.0
33

44
# Elasticsearch
55
elasticsearch>=8.0.0

src/scrapegraph_demo/scraper.py

Lines changed: 45 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,15 @@ def scrape_product(self, url: str, marketplace: str) -> Optional[Product]:
9595
output_schema=output_schema
9696
)
9797

98-
# Parse SDK response
99-
if response and 'result' in response:
100-
product = self._parse_scraped_data(response['result'], url, marketplace)
101-
return product
98+
# Parse SDK response with validation
99+
if response and isinstance(response, dict) and 'result' in response:
100+
result_data = response['result']
101+
if isinstance(result_data, dict):
102+
product = self._parse_scraped_data(result_data, url, marketplace)
103+
return product
104+
else:
105+
print(f"Warning: Invalid result structure in SDK response for {url}")
106+
return self._mock_scrape_product(url, marketplace)
102107
else:
103108
print(f"Warning: No valid response from SDK for {url}")
104109
return self._mock_scrape_product(url, marketplace)
@@ -173,30 +178,37 @@ def scrape_search_results(
173178
output_schema=output_schema
174179
)
175180

176-
# Parse response
181+
# Parse response with validation
177182
products = []
178-
if response and 'result' in response and 'products' in response['result']:
179-
for i, product_data in enumerate(response['result']['products'][:max_results]):
180-
product_url = product_data.get('product_url', f"https://{marketplace.lower()}.com/product/{i}")
181-
182-
# Create Product object from search result data
183-
product = Product(
184-
product_id=self._extract_product_id(product_url),
185-
name=product_data.get('name', f"{search_query} Product {i+1}"),
186-
price=self._extract_price(str(product_data.get('price', 0))),
187-
currency=product_data.get('currency', 'USD'),
188-
url=product_url,
189-
marketplace=marketplace,
190-
rating=product_data.get('rating'),
191-
review_count=product_data.get('review_count'),
192-
scraped_at=datetime.utcnow()
193-
)
194-
products.append(product)
195-
196-
return products
197-
else:
198-
print(f"Warning: No valid search results from SDK, using mock data")
199-
return self._get_mock_search_results(search_query, marketplace, max_results)
183+
if response and isinstance(response, dict) and 'result' in response:
184+
result_data = response.get('result', {})
185+
if isinstance(result_data, dict) and 'products' in result_data:
186+
products_list = result_data.get('products', [])
187+
if isinstance(products_list, list):
188+
for i, product_data in enumerate(products_list[:max_results]):
189+
if not isinstance(product_data, dict):
190+
continue
191+
product_url = product_data.get('product_url', f"https://{marketplace.lower()}.com/product/{i}")
192+
193+
# Create Product object from search result data
194+
product = Product(
195+
product_id=self._extract_product_id(product_url),
196+
name=product_data.get('name', f"{search_query} Product {i+1}"),
197+
price=self._extract_price(str(product_data.get('price', 0))),
198+
currency=product_data.get('currency', 'USD'),
199+
url=product_url,
200+
marketplace=marketplace,
201+
rating=product_data.get('rating'),
202+
review_count=product_data.get('review_count'),
203+
scraped_at=datetime.utcnow()
204+
)
205+
products.append(product)
206+
207+
return products
208+
209+
# If we get here, no valid results were found
210+
print(f"Warning: No valid search results from SDK, using mock data")
211+
return self._get_mock_search_results(search_query, marketplace, max_results)
200212

201213
except Exception as e:
202214
print(f"Error scraping search results: {str(e)}")
@@ -342,5 +354,10 @@ def close(self):
342354
if self.client is not None:
343355
try:
344356
self.client.close()
357+
except (AttributeError, RuntimeError) as e:
358+
# AttributeError: if close method doesn't exist
359+
# RuntimeError: if connection already closed or other runtime errors
360+
print(f"Warning: Error closing SDK client ({type(e).__name__}): {e}")
345361
except Exception as e:
346-
print(f"Warning: Error closing SDK client: {e}")
362+
# Catch any other unexpected exceptions but log the type
363+
print(f"Warning: Unexpected error closing SDK client ({type(e).__name__}): {e}")

0 commit comments

Comments
 (0)