Merge pull request #5367 from albanie:patch-1

The TensorFlow Datasets Authors · The TensorFlow Datasets Authors · commit 93f323b8946a · 2024-04-23T01:51:52.000-07:00
PiperOrigin-RevId: 627306052
diff --git a/tensorflow_datasets/core/dataset_builder.py b/tensorflow_datasets/core/dataset_builder.py
@@ -259,7 +259,7 @@ def __init__(
     Args:
       data_dir: directory to read/write data. Defaults to the value of the
         environment variable TFDS_DATA_DIR, if set, otherwise falls back to
-        datasets are stored.
+        "~/tensorflow_datasets".
       config: `tfds.core.BuilderConfig` or `str` name, optional configuration
         for the dataset that affects the data generated on disk. Different
         `builder_config`s will have their own subdirectories and versions.
@@ -268,7 +268,7 @@ def __init__(
         special value "experimental_latest" will use the highest version, even
         if not default. This is not recommended unless you know what you are
         doing, as the version could be broken.
-    """
+    """  # fmt: skip
     if data_dir:
       data_dir = os.fspath(data_dir)  # Pathlib -> str
     # For pickling:
diff --git a/tensorflow_datasets/core/load.py b/tensorflow_datasets/core/load.py
@@ -583,7 +583,7 @@ def load(
       all splits in a `Dict[Split, tf.data.Dataset]`
     data_dir: directory to read/write data. Defaults to the value of the
       environment variable TFDS_DATA_DIR, if set, otherwise falls back to
-      datasets are stored.
+      '~/tensorflow_datasets'.
     batch_size: `int`, if set, add a batch dimension to examples. Note that
       variable length features will be 0-padded. If `batch_size=-1`, will return
       the full dataset as `tf.Tensor`s.
@@ -593,7 +593,7 @@ def load(
       `tfds.core.DatasetBuilder.download_and_prepare` before calling
       `tfds.core.DatasetBuilder.as_dataset`. If `False`, data is expected to be
       in `data_dir`. If `True` and the data is already in `data_dir`,
-      when data_dir is a Placer path.
+      `download_and_prepare` is a no-op.
     as_supervised: `bool`, if `True`, the returned `tf.data.Dataset` will have a
       2-tuple structure `(input, label)` according to
       `builder.info.supervised_keys`. If `False`, the default, the returned
@@ -637,7 +637,7 @@ def load(
       (version, features, splits, num_examples,...). Note that the `ds_info`
       object documents the entire dataset, regardless of the `split` requested.
       Split-specific information is available in `ds_info.splits`.
-  """
+  """  # fmt: skip
   dbuilder = _fetch_builder(
       name,
       data_dir,
@@ -752,12 +752,12 @@ def data_source(
       all splits in a `Dict[Split, Sequence]`
     data_dir: directory to read/write data. Defaults to the value of the
       environment variable TFDS_DATA_DIR, if set, otherwise falls back to
-      datasets are stored.
+      '~/tensorflow_datasets'.
     download: `bool` (optional), whether to call
       `tfds.core.DatasetBuilder.download_and_prepare` before calling
       `tfds.core.DatasetBuilder.as_data_source`. If `False`, data is expected to
       be in `data_dir`. If `True` and the data is already in `data_dir`,
-      when data_dir is a Placer path.
+      `download_and_prepare` is a no-op.
     decoders: Nested dict of `Decoder` objects which allow to customize the
       decoding. The structure should match the feature structure, but only
       customized feature keys need to be present. See [the
@@ -784,7 +784,7 @@ def data_source(
   Returns:
     `Sequence` if `split`,
     `dict<key: tfds.Split, value: Sequence>` otherwise.
-  """
+  """  # fmt:skip
   builder_kwargs = _set_file_format_for_data_source(builder_kwargs)
   dbuilder = _fetch_builder(
       name,
diff --git a/tensorflow_datasets/core/naming.py b/tensorflow_datasets/core/naming.py
@@ -202,12 +202,13 @@ class DatasetReference:
       provided.
     data_dir: Optional data dir where this dataset is located. If None, defaults
       to the value of the environment variable TFDS_DATA_DIR, if set, otherwise
+      falls back to '~/tensorflow_datasets'.
     split_mapping: mapping between split names. If the `DatasetCollection` wants
       to use different split names than the source datasets, then this mapping
       can be used. For example, if the collection uses the split `valid`, but
       this dataset uses the split `validation`, then the `split_mapping` should
       be `{'validation': 'valid'}`.
-  """
+  """  # fmt: skip
 
   dataset_name: str
   namespace: None | str = None