Skip to content

Commit 8ed09ca

Browse files
committed
check encoding error
1 parent 8e64707 commit 8ed09ca

File tree

2 files changed

+5
-5
lines changed

2 files changed

+5
-5
lines changed

scrapers/locations.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ def run():
130130

131131
try:
132132
rows = get_raw_data()
133-
except (URLError, socket.timeout) as e:
133+
except (URLError, socket.timeout, UnicodeDecodeError) as e:
134134
print(f"Unable to scrape locations data: {e}")
135135
if not os.path.exists(fname):
136136
with open(fname, "w", encoding="utf-8") as location_file:

scrapers/utils.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ def is_url(path_string: str) -> bool:
252252
return False
253253

254254

255-
def read_csv(path: str, types_dict: type) -> list:
255+
def read_csv(path: str, types_dict: type, encoding: str = "utf-8") -> list:
256256
"""
257257
Parses data from file according to a specific format from a CSV
258258
@@ -274,18 +274,18 @@ def read_csv(path: str, types_dict: type) -> list:
274274
if path_is_url:
275275
with_open = urlopen(path, timeout=15)
276276
else:
277-
with_open = open(path, mode="r", newline="", encoding="utf-8")
277+
with_open = open(path, mode="r", newline="", encoding=encoding)
278278

279279
with with_open as csvfile:
280280
reader = csv.DictReader(
281281
csvfile
282282
if not path_is_url
283-
else csvfile.read().decode("utf-8")[1:].splitlines() # type: ignore
283+
else csvfile.read().decode(encoding)[1:].splitlines() # type: ignore
284284
)
285285
for row in reader:
286286
assert all(
287287
col in row for col in cols
288-
), f"Missing columns in CSV file: {path}"
288+
), f"Missing columns in CSV file: {[col for col in cols if col not in row]}"
289289
data.append({col: row[col] for col in cols}) # type: ignore
290290

291291
return data

0 commit comments

Comments
 (0)