Skip to content
This repository was archived by the owner on Apr 2, 2026. It is now read-only.

Commit a859b0e

Browse files
committed
don't catch bare except
1 parent 41a2196 commit a859b0e

File tree

1 file changed

+5
-19
lines changed

1 file changed

+5
-19
lines changed

src/main.py

Lines changed: 5 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from urllib.parse import urljoin
1515

1616
import cloudscraper
17+
import requests
1718
from bs4 import BeautifulSoup, ResultSet
1819
from tqdm import tqdm
1920

@@ -28,20 +29,11 @@
2829
RESTAURANT_PATH_MARKER = "/restaurants/"
2930

3031
# Create a cloudscraper instance that handles Cloudflare
31-
scraper = cloudscraper.create_scraper(
32-
browser={
33-
'browser': 'firefox',
34-
'platform': 'windows',
35-
'mobile': False
36-
}
37-
)
32+
scraper = cloudscraper.create_scraper(browser={"browser": "firefox", "platform": "windows", "mobile": False})
3833

3934

4035
def fetch_url(url: str, error_context: str) -> bytes | None:
41-
"""
42-
Fetch URL content with unified error handling.
43-
"""
44-
36+
"""Fetch URL content with unified error handling."""
4537
try:
4638
response = scraper.get(url, timeout=15)
4739

@@ -51,7 +43,7 @@ def fetch_url(url: str, error_context: str) -> bytes | None:
5143

5244
return response.content
5345

54-
except Exception as e:
46+
except requests.exceptions.RequestException as e:
5547
logger.error("%s error for %s (%s): %s", type(e).__name__, error_context, url, e)
5648
return None
5749

@@ -63,7 +55,6 @@ def find_restaurant_schema(schema_scripts: ResultSet) -> dict | None:
6355
Searches for @type: "Restaurant" in the schema data, handling both
6456
single objects and arrays of objects.
6557
"""
66-
6758
for script in schema_scripts:
6859
try:
6960
data = json.loads(script.string)
@@ -86,7 +77,6 @@ def find_restaurant_schema(schema_scripts: ResultSet) -> dict | None:
8677

8778
def extract_schema_data(url: str) -> dict | None:
8879
"""Extract Restaurant schema.org data from a restaurant page."""
89-
9080
data = fetch_url(url, "restaurant page")
9181
if not data:
9282
return None
@@ -99,7 +89,6 @@ def extract_schema_data(url: str) -> dict | None:
9989

10090
def get_restaurant_links(list_url: str) -> list[str]:
10191
"""Get all unique restaurant links from the list view page."""
102-
10392
data = fetch_url(list_url, "list page")
10493
if not data:
10594
return []
@@ -128,7 +117,6 @@ def flatten_schema_data(schema: dict) -> dict[str, str]:
128117
129118
Extracts common fields, address components, and geo coordinates.
130119
"""
131-
132120
flat = {}
133121

134122
# Map schema.org field names to CSV column names
@@ -170,7 +158,6 @@ def flatten_schema_data(schema: dict) -> dict[str, str]:
170158

171159
def save_to_csv(restaurants: list[dict], output_path: Path) -> None:
172160
"""Save restaurant data to CSV file."""
173-
174161
# Collect all unique field names
175162
fieldnames = sorted(set().union(*(r.keys() for r in restaurants)))
176163

@@ -185,7 +172,6 @@ def save_to_csv(restaurants: list[dict], output_path: Path) -> None:
185172

186173
def main() -> None:
187174
"""Hey, I just met you, and this is crazy, but I'm the main function, so call me maybe."""
188-
189175
# Fetch restaurant URLs
190176
logger.info("Fetching restaurant links...")
191177
restaurant_urls = get_restaurant_links(BASE_URL)
@@ -216,4 +202,4 @@ def main() -> None:
216202

217203

218204
if __name__ == "__main__":
219-
main()
205+
main()

0 commit comments

Comments
 (0)