Skip to content

Commit 17ac716

Browse files
authored
Update info_utils.py
1 parent 35ecbc9 commit 17ac716

File tree

1 file changed

+7
-10
lines changed

1 file changed

+7
-10
lines changed

src/datasets/utils/info_utils.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,12 @@ def verify_checksums(expected_checksums: Optional[dict], recorded_checksums: dic
4545
if expected_checksums is None:
4646
logger.info("Unable to verify checksums.")
4747
return
48-
if len(set(expected_checksums) - set(recorded_checksums)) > 0:
49-
raise ExpectedMoreDownloadedFilesError(str(set(expected_checksums) - set(recorded_checksums)))
50-
if len(set(recorded_checksums) - set(expected_checksums)) > 0:
51-
raise UnexpectedDownloadedFileError(str(set(recorded_checksums) - set(expected_checksums)))
52-
bad_urls = [url for url in expected_checksums if expected_checksums[url] != recorded_checksums[url]]
48+
bad_urls = [
49+
url
50+
for url in (set(recorded_checksums) & set(expected_checksums))
51+
if expected_checksums[url] != recorded_checksums[url]
52+
]
53+
5354
for_verification_name = " for " + verification_name if verification_name is not None else ""
5455
if len(bad_urls) > 0:
5556
raise NonMatchingChecksumError(
@@ -64,13 +65,9 @@ def verify_splits(expected_splits: Optional[dict], recorded_splits: dict):
6465
if expected_splits is None:
6566
logger.info("Unable to verify splits sizes.")
6667
return
67-
if len(set(expected_splits) - set(recorded_splits)) > 0:
68-
raise ExpectedMoreSplitsError(str(set(expected_splits) - set(recorded_splits)))
69-
if len(set(recorded_splits) - set(expected_splits)) > 0:
70-
raise UnexpectedSplitsError(str(set(recorded_splits) - set(expected_splits)))
7168
bad_splits = [
7269
{"expected": expected_splits[name], "recorded": recorded_splits[name]}
73-
for name in expected_splits
70+
for name in (set(recorded_splits) & set(expected_splits))
7471
if expected_splits[name].num_examples != recorded_splits[name].num_examples
7572
]
7673
if len(bad_splits) > 0:

0 commit comments

Comments
 (0)