diff --git a/src/datasets/load.py b/src/datasets/load.py index 1218262a856..29fdd6fc913 100644 --- a/src/datasets/load.py +++ b/src/datasets/load.py @@ -977,10 +977,6 @@ def dataset_module_factory( elif e.response.status_code == 403: message += f" Visit the dataset page at https://huggingface.co/datasets/{path} to ask for access." raise DatasetNotFoundError(message) from e - except RevisionNotFoundError as e: - raise DatasetNotFoundError( - f"Revision '{revision}' doesn't exist for dataset '{path}' on the Hub." - ) from e except RepositoryNotFoundError as e: raise DatasetNotFoundError(f"Dataset '{path}' doesn't exist on the Hub or cannot be accessed.") from e try: @@ -1014,10 +1010,8 @@ def dataset_module_factory( elif e.response.status_code == 403: message += f" Visit the dataset page at https://huggingface.co/datasets/{path} to ask for access." raise DatasetNotFoundError(message) from e - except RevisionNotFoundError as e: - raise DatasetNotFoundError( - f"Revision '{revision}' doesn't exist for dataset '{path}' on the Hub." - ) from e + except RevisionNotFoundError as e: + raise DatasetNotFoundError(f"Revision '{revision}' doesn't exist for dataset '{path}' on the Hub.") from e except Exception as e1: # All the attempts failed, before raising the error we should check if the module is already cached try: diff --git a/tests/test_load.py b/tests/test_load.py index 422e6cd3180..06d4f4d2b8d 100644 --- a/tests/test_load.py +++ b/tests/test_load.py @@ -763,6 +763,18 @@ def test_load_dataset_from_hub(self): str(context.exception), ) + @pytest.mark.integration + def test_load_dataset_invalid_revision_with_cache(self): + repo_id = SAMPLE_DATASET_IDENTIFIER2 + builder = load_dataset_builder(repo_id, cache_dir=self.cache_dir) + builder.download_and_prepare() + with self.assertRaises(DatasetNotFoundError) as context: + datasets.load_dataset(repo_id, revision="invalid_revision", cache_dir=self.cache_dir) + self.assertIn( + "Revision 'invalid_revision' doesn't exist for dataset", + str(context.exception), + ) + def test_load_dataset_namespace(self): with self.assertRaises(DatasetNotFoundError) as context: datasets.load_dataset("hf-internal-testing/_dummy")