@@ -45,11 +45,12 @@ def verify_checksums(expected_checksums: Optional[dict], recorded_checksums: dic
4545 if expected_checksums is None :
4646 logger .info ("Unable to verify checksums." )
4747 return
48- if len (set (expected_checksums ) - set (recorded_checksums )) > 0 :
49- raise ExpectedMoreDownloadedFilesError (str (set (expected_checksums ) - set (recorded_checksums )))
50- if len (set (recorded_checksums ) - set (expected_checksums )) > 0 :
51- raise UnexpectedDownloadedFileError (str (set (recorded_checksums ) - set (expected_checksums )))
52- bad_urls = [url for url in expected_checksums if expected_checksums [url ] != recorded_checksums [url ]]
48+ bad_urls = [
49+ url
50+ for url in (set (recorded_checksums ) & set (expected_checksums ))
51+ if expected_checksums [url ] != recorded_checksums [url ]
52+ ]
53+
5354 for_verification_name = " for " + verification_name if verification_name is not None else ""
5455 if len (bad_urls ) > 0 :
5556 raise NonMatchingChecksumError (
@@ -64,13 +65,9 @@ def verify_splits(expected_splits: Optional[dict], recorded_splits: dict):
6465 if expected_splits is None :
6566 logger .info ("Unable to verify splits sizes." )
6667 return
67- if len (set (expected_splits ) - set (recorded_splits )) > 0 :
68- raise ExpectedMoreSplitsError (str (set (expected_splits ) - set (recorded_splits )))
69- if len (set (recorded_splits ) - set (expected_splits )) > 0 :
70- raise UnexpectedSplitsError (str (set (recorded_splits ) - set (expected_splits )))
7168 bad_splits = [
7269 {"expected" : expected_splits [name ], "recorded" : recorded_splits [name ]}
73- for name in expected_splits
70+ for name in ( set ( recorded_splits ) & set ( expected_splits ))
7471 if expected_splits [name ].num_examples != recorded_splits [name ].num_examples
7572 ]
7673 if len (bad_splits ) > 0 :
0 commit comments