Skip to content

Commit c177d39

Browse files
LennartPuruckerpre-commit-ci[bot]PGijsbers
authored
Better Error for Checksum Mismatch (#1225)
* add better error handling for checksum when downloading a file * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update usage of __is_checksum_equal * Update openml/_api_calls.py Co-authored-by: Pieter Gijsbers <[email protected]> --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Pieter Gijsbers <[email protected]>
1 parent ce82fd5 commit c177d39

File tree

1 file changed

+17
-4
lines changed

1 file changed

+17
-4
lines changed

openml/_api_calls.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -297,11 +297,11 @@ def __read_url(url, request_method, data=None, md5_checksum=None):
297297
)
298298

299299

300-
def __is_checksum_equal(downloaded_file, md5_checksum=None):
300+
def __is_checksum_equal(downloaded_file_binary: bytes, md5_checksum: Optional[str] = None) -> bool:
301301
if md5_checksum is None:
302302
return True
303303
md5 = hashlib.md5()
304-
md5.update(downloaded_file.encode("utf-8"))
304+
md5.update(downloaded_file_binary)
305305
md5_checksum_download = md5.hexdigest()
306306
return md5_checksum == md5_checksum_download
307307

@@ -323,7 +323,21 @@ def _send_request(request_method, url, data, files=None, md5_checksum=None):
323323
else:
324324
raise NotImplementedError()
325325
__check_response(response=response, url=url, file_elements=files)
326-
if request_method == "get" and not __is_checksum_equal(response.text, md5_checksum):
326+
if request_method == "get" and not __is_checksum_equal(
327+
response.text.encode("utf-8"), md5_checksum
328+
):
329+
330+
# -- Check if encoding is not UTF-8 perhaps
331+
if __is_checksum_equal(response.content, md5_checksum):
332+
raise OpenMLHashException(
333+
"Checksum of downloaded file is unequal to the expected checksum {}"
334+
"because the text encoding is not UTF-8 when downloading {}. "
335+
"There might be a sever-sided issue with the file, "
336+
"see: https://github.com/openml/openml-python/issues/1180.".format(
337+
md5_checksum, url
338+
)
339+
)
340+
327341
raise OpenMLHashException(
328342
"Checksum of downloaded file is unequal to the expected checksum {} "
329343
"when downloading {}.".format(md5_checksum, url)
@@ -384,7 +398,6 @@ def __parse_server_exception(
384398
url: str,
385399
file_elements: Dict,
386400
) -> OpenMLServerError:
387-
388401
if response.status_code == 414:
389402
raise OpenMLServerError("URI too long! ({})".format(url))
390403
try:

0 commit comments

Comments
 (0)