Skip to content

Commit 812e8a9

Browse files
tomvdwThe TensorFlow Datasets Authors
authored andcommitted
Skip download and prepare if the dataset is already prepared and download_mode is REUSE_DATASET_IF_EXISTS.
PiperOrigin-RevId: 642267366
1 parent 3ce5439 commit 812e8a9

File tree

2 files changed

+15
-3
lines changed

2 files changed

+15
-3
lines changed

tensorflow_datasets/core/load.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
from tensorflow_datasets.core import utils
4444
from tensorflow_datasets.core import visibility
4545
from tensorflow_datasets.core.dataset_builders import huggingface_dataset_builder # pylint:disable=unused-import
46+
from tensorflow_datasets.core.download import util
4647
from tensorflow_datasets.core.utils import error_utils
4748
from tensorflow_datasets.core.utils import gcs_utils
4849
from tensorflow_datasets.core.utils import py_utils
@@ -501,6 +502,16 @@ def _download_and_prepare_builder(
501502
download: bool,
502503
download_and_prepare_kwargs: Optional[Dict[str, Any]],
503504
) -> None:
505+
"""Downloads and prepares the dataset builder if necessary."""
506+
if dbuilder.is_prepared():
507+
if not download_and_prepare_kwargs:
508+
return
509+
if download_config := download_and_prepare_kwargs.get('download_config'):
510+
if (
511+
download_config.download_mode
512+
== util.GenerateMode.REUSE_DATASET_IF_EXISTS
513+
):
514+
return
504515
if download:
505516
download_and_prepare_kwargs = download_and_prepare_kwargs or {}
506517
dbuilder.download_and_prepare(**download_and_prepare_kwargs)
@@ -663,7 +674,7 @@ def load(
663674

664675

665676
def _set_file_format_for_data_source(
666-
builder_kwargs: Optional[Dict[str, Any]]
677+
builder_kwargs: Optional[Dict[str, Any]],
667678
) -> Dict[str, Any]:
668679
"""Normalizes file format in builder_kwargs for `tfds.data_source`."""
669680
if builder_kwargs is None:

tensorflow_datasets/core/registered_test.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,8 @@
1717

1818
import abc
1919
import re
20-
2120
from unittest import mock
2221
import pytest
23-
2422
from tensorflow_datasets import testing
2523
from tensorflow_datasets.core import constants
2624
from tensorflow_datasets.core import load
@@ -49,6 +47,9 @@ def as_dataset(self, **kwargs):
4947
self.as_dataset_kwargs = kwargs
5048
return self
5149

50+
def is_prepared(self) -> bool:
51+
return False
52+
5253
VERSION = utils.Version("1.0.0")
5354
BUILDER_CONFIGS = []
5455
builder_configs = {}

0 commit comments

Comments
 (0)