Skip to content

Commit 1666a00

Browse files
committed
Add timeouts to requests
Also use more specific Exception
1 parent feca592 commit 1666a00

File tree

1 file changed

+11
-11
lines changed

1 file changed

+11
-11
lines changed

pythainlp/corpus/core.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@ def get_corpus_db(url: str):
2626

2727
corpus_db = None
2828
try:
29-
corpus_db = requests.get(url)
29+
corpus_db = requests.get(url, timeout=10)
3030
except requests.exceptions.HTTPError as http_err:
3131
print(f"HTTP error occurred: {http_err}")
32-
except Exception as err:
32+
except requests.exceptions.RequestException as err:
3333
print(f"Non-HTTP error occurred: {err}")
3434

3535
return corpus_db
@@ -252,11 +252,11 @@ def get_corpus_path(
252252
"""
253253
from typing import Dict
254254

255-
_CUSTOMIZE: Dict[str, str] = {
255+
CUSTOMIZE: Dict[str, str] = {
256256
# "the corpus name":"path"
257257
}
258-
if name in list(_CUSTOMIZE):
259-
return _CUSTOMIZE[name]
258+
if name in list(CUSTOMIZE):
259+
return CUSTOMIZE[name]
260260

261261
default_path = get_corpus_default_db(name=name, version=version)
262262
if default_path is not None:
@@ -291,14 +291,14 @@ def _download(url: str, dst: str) -> int:
291291
@param: URL for downloading file
292292
@param: dst place to put the file into
293293
"""
294-
_CHUNK_SIZE = 64 * 1024 # 64 KiB
294+
CHUNK_SIZE = 64 * 1024 # 64 KiB
295295

296296
from urllib.request import urlopen
297297

298298
import requests
299299

300300
file_size = int(urlopen(url).info().get("Content-Length", -1))
301-
r = requests.get(url, stream=True)
301+
r = requests.get(url, stream=True, timeout=10)
302302
with open(get_full_data_path(dst), "wb") as f:
303303
pbar = None
304304
try:
@@ -308,7 +308,7 @@ def _download(url: str, dst: str) -> int:
308308
except ImportError:
309309
pbar = None
310310

311-
for chunk in r.iter_content(chunk_size=_CHUNK_SIZE):
311+
for chunk in r.iter_content(chunk_size=CHUNK_SIZE):
312312
if chunk:
313313
f.write(chunk)
314314
if pbar:
@@ -335,7 +335,7 @@ def _check_hash(dst: str, md5: str) -> None:
335335
file_md5 = hashlib.md5(content).hexdigest()
336336

337337
if md5 != file_md5:
338-
raise Exception("Hash does not match expected.")
338+
raise ValueError("Hash does not match expected.")
339339

340340

341341
def _version2int(v: str) -> int:
@@ -511,8 +511,8 @@ def download(
511511
os.mkdir(get_full_data_path(foldername))
512512
with zipfile.ZipFile(
513513
get_full_data_path(file_name), "r"
514-
) as zip:
515-
zip.extractall(path=get_full_data_path(foldername))
514+
) as zip_file:
515+
zip_file.extractall(path=get_full_data_path(foldername))
516516

517517
if found:
518518
local_db["_default"][found]["version"] = version

0 commit comments

Comments
 (0)