26
26
import json
27
27
import os
28
28
import sys
29
- from typing import Any , ClassVar , Dict , List , Optional , Tuple , Type , Union
29
+ from typing import Any , ClassVar , Type
30
30
31
31
from absl import logging
32
32
from etils import epy
69
69
ListOrTreeOrElem = type_utils .ListOrTreeOrElem
70
70
Tree = type_utils .Tree
71
71
TreeDict = type_utils .TreeDict
72
- VersionOrStr = Union [ utils .Version , str ]
72
+ VersionOrStr = utils .Version | str
73
73
74
74
FORCE_REDOWNLOAD = download .GenerateMode .FORCE_REDOWNLOAD
75
75
REUSE_CACHE_IF_EXISTS = download .GenerateMode .REUSE_CACHE_IF_EXISTS
@@ -108,7 +108,7 @@ class BuilderConfig:
108
108
109
109
name : str
110
110
version : VersionOrStr | None = None
111
- release_notes : Dict [str , str ] | None = None
111
+ release_notes : dict [str , str ] | None = None
112
112
supported_versions : list [VersionOrStr ] = dataclasses .field (
113
113
default_factory = list
114
114
)
@@ -192,12 +192,12 @@ class DatasetBuilder(registered.RegisteredDataset):
192
192
"""
193
193
194
194
# Semantic version of the dataset (ex: tfds.core.Version('1.2.0'))
195
- VERSION : Optional [ utils .Version ] = None
195
+ VERSION : utils .Version | None = None
196
196
197
197
# Release notes
198
198
# Metadata only used for documentation. Should be a dict[version,description]
199
199
# Multi-lines are automatically dedent
200
- RELEASE_NOTES : ClassVar [Dict [str , str ]] = {}
200
+ RELEASE_NOTES : ClassVar [dict [str , str ]] = {}
201
201
202
202
# List dataset versions which can be loaded using current code.
203
203
# Data can only be prepared with canonical VERSION or above.
@@ -209,7 +209,7 @@ class DatasetBuilder(registered.RegisteredDataset):
209
209
# Name of the builder config that should be used in case the user doesn't
210
210
# specify a config when loading a dataset. If None, then the first config in
211
211
# `BUILDER_CONFIGS` is used.
212
- DEFAULT_BUILDER_CONFIG_NAME : Optional [ str ] = None
212
+ DEFAULT_BUILDER_CONFIG_NAME : str | None = None
213
213
214
214
# Must be set for datasets that use 'manual_dir' functionality - the ones
215
215
# that require users to do additional steps to download the data
@@ -222,15 +222,15 @@ class DatasetBuilder(registered.RegisteredDataset):
222
222
223
223
# Optional max number of simultaneous downloads. Setting this value will
224
224
# override download config settings if necessary.
225
- MAX_SIMULTANEOUS_DOWNLOADS : Optional [ int ] = None
225
+ MAX_SIMULTANEOUS_DOWNLOADS : int | None = None
226
226
227
227
# If not set, pkg_dir_path is inferred. However, if user of class knows better
228
228
# then this can be set directly before init, to avoid heuristic inferences.
229
229
# Example: `imported_builder_cls` function in `registered.py` module sets it.
230
- pkg_dir_path : Optional [ epath .Path ] = None
230
+ pkg_dir_path : epath .Path | None = None
231
231
232
232
# Holds information on versions and configs that should not be used.
233
- BLOCKED_VERSIONS : ClassVar [Optional [ utils .BlockedVersions ] ] = None
233
+ BLOCKED_VERSIONS : ClassVar [utils .BlockedVersions | None ] = None
234
234
235
235
@classmethod
236
236
def _get_pkg_dir_path (cls ) -> epath .Path :
@@ -309,7 +309,7 @@ def __init__(
309
309
@utils .classproperty
310
310
@classmethod
311
311
@utils .memoize ()
312
- def code_path (cls ) -> Optional [ epath .Path ] :
312
+ def code_path (cls ) -> epath .Path | None :
313
313
"""Returns the path to the file where the Dataset class is located.
314
314
315
315
Note: As the code can be run inside zip file. The returned value is
@@ -373,7 +373,7 @@ def supported_versions(self):
373
373
return self .SUPPORTED_VERSIONS
374
374
375
375
@functools .cached_property
376
- def versions (self ) -> List [utils .Version ]:
376
+ def versions (self ) -> list [utils .Version ]:
377
377
"""Versions (canonical + availables), in preference order."""
378
378
return [
379
379
utils .Version (v ) if isinstance (v , str ) else v
@@ -407,7 +407,7 @@ def version(self) -> utils.Version:
407
407
return self ._version
408
408
409
409
@property
410
- def release_notes (self ) -> Dict [str , str ]:
410
+ def release_notes (self ) -> dict [str , str ]:
411
411
if self .builder_config and self .builder_config .release_notes :
412
412
return self .builder_config .release_notes
413
413
else :
@@ -452,7 +452,7 @@ def data_path(self) -> epath.Path:
452
452
453
453
@utils .classproperty
454
454
@classmethod
455
- def _checksums_path (cls ) -> Optional [ epath .Path ] :
455
+ def _checksums_path (cls ) -> epath .Path | None :
456
456
"""Returns the checksums path."""
457
457
# Used:
458
458
# * To load the checksums (in url_infos)
@@ -476,7 +476,7 @@ def _checksums_path(cls) -> Optional[epath.Path]:
476
476
@utils .classproperty
477
477
@classmethod
478
478
@functools .lru_cache (maxsize = None )
479
- def url_infos (cls ) -> Optional [ Dict [ str , download .checksums .UrlInfo ]] :
479
+ def url_infos (cls ) -> dict [ str , download .checksums .UrlInfo ] | None :
480
480
"""Load `UrlInfo` from the given path."""
481
481
# Note: If the dataset is downloaded with `record_checksums=True`, urls
482
482
# might be updated but `url_infos` won't as it is memoized.
@@ -516,13 +516,13 @@ def info(self) -> dataset_info.DatasetInfo:
516
516
517
517
@utils .classproperty
518
518
@classmethod
519
- def default_builder_config (cls ) -> Optional [ BuilderConfig ] :
519
+ def default_builder_config (cls ) -> BuilderConfig | None :
520
520
return _get_default_config (
521
521
builder_configs = cls .BUILDER_CONFIGS ,
522
522
default_config_name = cls .DEFAULT_BUILDER_CONFIG_NAME ,
523
523
)
524
524
525
- def get_default_builder_config (self ) -> Optional [ BuilderConfig ] :
525
+ def get_default_builder_config (self ) -> BuilderConfig | None :
526
526
"""Returns the default builder config if there is one.
527
527
528
528
Note that for dataset builders that cannot use the `cls.BUILDER_CONFIGS`, we
@@ -539,7 +539,7 @@ def get_default_builder_config(self) -> Optional[BuilderConfig]:
539
539
540
540
def get_reference (
541
541
self ,
542
- namespace : Optional [ str ] = None ,
542
+ namespace : str | None = None ,
543
543
) -> naming .DatasetReference :
544
544
"""Returns a reference to the dataset produced by this dataset builder.
545
545
@@ -807,9 +807,9 @@ def _update_dataset_info(self) -> None:
807
807
@tfds_logging .as_data_source ()
808
808
def as_data_source (
809
809
self ,
810
- split : Optional [ Tree [splits_lib .SplitArg ]] = None ,
810
+ split : Tree [splits_lib .SplitArg ] | None = None ,
811
811
* ,
812
- decoders : Optional [ TreeDict [decode .partial_decode .DecoderArg ]] = None ,
812
+ decoders : TreeDict [decode .partial_decode .DecoderArg ] | None = None ,
813
813
deserialize_method : decode .DeserializeMethod = decode .DeserializeMethod .DESERIALIZE_AND_DECODE ,
814
814
) -> ListOrTreeOrElem [Sequence [Any ]]:
815
815
"""Constructs an `ArrayRecordDataSource`.
@@ -818,7 +818,7 @@ def as_data_source(
818
818
split: Which split of the data to load (e.g. `'train'`, `'test'`,
819
819
`['train', 'test']`, `'train[80%:]'`,...). See our [split API
820
820
guide](https://www.tensorflow.org/datasets/splits). If `None`, will
821
- return all splits in a `Dict [Split, Sequence]`.
821
+ return all splits in a `dict [Split, Sequence]`.
822
822
decoders: Nested dict of `Decoder` objects which allow to customize the
823
823
decoding. The structure should match the feature structure, but only
824
824
customized feature keys need to be present. See [the
@@ -913,12 +913,12 @@ def build_single_data_source(split: str) -> Sequence[Any]:
913
913
@tfds_logging .as_dataset ()
914
914
def as_dataset (
915
915
self ,
916
- split : Optional [ Tree [splits_lib .SplitArg ]] = None ,
916
+ split : Tree [splits_lib .SplitArg ] | None = None ,
917
917
* ,
918
- batch_size : Optional [ int ] = None ,
918
+ batch_size : int | None = None ,
919
919
shuffle_files : bool = False ,
920
- decoders : Optional [ TreeDict [decode .partial_decode .DecoderArg ]] = None ,
921
- read_config : Optional [ read_config_lib .ReadConfig ] = None ,
920
+ decoders : TreeDict [decode .partial_decode .DecoderArg ] | None = None ,
921
+ read_config : read_config_lib .ReadConfig | None = None ,
922
922
as_supervised : bool = False ,
923
923
):
924
924
# pylint: disable=line-too-long
@@ -1029,9 +1029,9 @@ def as_dataset(
1029
1029
def _build_single_dataset (
1030
1030
self ,
1031
1031
split : splits_lib .Split ,
1032
- batch_size : Optional [ int ] ,
1032
+ batch_size : int | None ,
1033
1033
shuffle_files : bool ,
1034
- decoders : Optional [ TreeDict [decode .partial_decode .DecoderArg ]] ,
1034
+ decoders : TreeDict [decode .partial_decode .DecoderArg ] | None ,
1035
1035
read_config : read_config_lib .ReadConfig ,
1036
1036
as_supervised : bool ,
1037
1037
) -> tf .data .Dataset :
@@ -1064,7 +1064,7 @@ def _build_single_dataset(
1064
1064
"structure."
1065
1065
)
1066
1066
1067
- def lookup_nest (features : Dict [str , Any ]) -> Tuple [Any , ...]:
1067
+ def lookup_nest (features : dict [str , Any ]) -> tuple [Any , ...]:
1068
1068
"""Converts `features` to the structure described by `supervised_keys`.
1069
1069
1070
1070
Note that there is currently no way to access features in nested
@@ -1208,7 +1208,7 @@ def _info(self) -> dataset_info.DatasetInfo:
1208
1208
def _download_and_prepare (
1209
1209
self ,
1210
1210
dl_manager : download .DownloadManager ,
1211
- download_config : Optional [ download .DownloadConfig ] = None ,
1211
+ download_config : download .DownloadConfig | None = None ,
1212
1212
) -> None :
1213
1213
"""Downloads and prepares dataset for reading.
1214
1214
@@ -1228,8 +1228,8 @@ def _download_and_prepare(
1228
1228
def _as_dataset (
1229
1229
self ,
1230
1230
split : splits_lib .Split ,
1231
- decoders : Optional [ TreeDict [decode .partial_decode .DecoderArg ]] = None ,
1232
- read_config : Optional [ read_config_lib .ReadConfig ] = None ,
1231
+ decoders : TreeDict [decode .partial_decode .DecoderArg ] | None = None ,
1232
+ read_config : read_config_lib .ReadConfig | None = None ,
1233
1233
shuffle_files : bool = False ,
1234
1234
) -> tf .data .Dataset :
1235
1235
"""Constructs a `tf.data.Dataset`.
@@ -1313,7 +1313,7 @@ def _make_download_manager(
1313
1313
@utils .docs .do_not_doc_in_subclasses
1314
1314
@utils .classproperty
1315
1315
@classmethod
1316
- def builder_config_cls (cls ) -> Optional [ type [BuilderConfig ]] :
1316
+ def builder_config_cls (cls ) -> type [BuilderConfig ] | None :
1317
1317
"""Returns the builder config class."""
1318
1318
if not cls .BUILDER_CONFIGS :
1319
1319
return None
@@ -1328,7 +1328,7 @@ def builder_config_cls(cls) -> Optional[type[BuilderConfig]]:
1328
1328
return builder_cls
1329
1329
1330
1330
@property
1331
- def builder_config (self ) -> Optional [ Any ] :
1331
+ def builder_config (self ) -> Any | None :
1332
1332
"""`tfds.core.BuilderConfig` for this builder."""
1333
1333
return self ._builder_config
1334
1334
@@ -1410,6 +1410,19 @@ def builder_configs(cls) -> dict[str, BuilderConfig]:
1410
1410
)
1411
1411
return config_dict
1412
1412
1413
+ @classmethod
1414
+ def get_builder_config (
1415
+ cls , name : str , version : str | utils .Version | None = None
1416
+ ) -> BuilderConfig | None :
1417
+ """Returns the builder config with the given name and version."""
1418
+ if version is not None :
1419
+ name_with_version = f"{ name } :{ version } "
1420
+ if builder_config := cls .builder_configs .get (name_with_version ):
1421
+ return builder_config
1422
+ if builder_config := cls .builder_configs .get (name ):
1423
+ return builder_config
1424
+ return None
1425
+
1413
1426
def _get_filename_template (
1414
1427
self , split_name : str
1415
1428
) -> naming .ShardedFileTemplate :
@@ -1437,7 +1450,7 @@ class FileReaderBuilder(DatasetBuilder):
1437
1450
def __init__ (
1438
1451
self ,
1439
1452
* ,
1440
- file_format : Union [ None , str , file_adapters .FileFormat ] = None ,
1453
+ file_format : str | file_adapters .FileFormat | None = None ,
1441
1454
** kwargs : Any ,
1442
1455
):
1443
1456
"""Initializes an instance of FileReaderBuilder.
@@ -1460,7 +1473,7 @@ def _example_specs(self):
1460
1473
def _as_dataset ( # pytype: disable=signature-mismatch # overriding-parameter-type-checks
1461
1474
self ,
1462
1475
split : splits_lib .Split ,
1463
- decoders : Optional [ TreeDict [decode .partial_decode .DecoderArg ]] ,
1476
+ decoders : TreeDict [decode .partial_decode .DecoderArg ] | None ,
1464
1477
read_config : read_config_lib .ReadConfig ,
1465
1478
shuffle_files : bool ,
1466
1479
) -> tf .data .Dataset :
@@ -1508,7 +1521,7 @@ class GeneratorBasedBuilder(FileReaderBuilder):
1508
1521
def _split_generators (
1509
1522
self ,
1510
1523
dl_manager : download .DownloadManager ,
1511
- ) -> Dict [splits_lib .Split , split_builder_lib .SplitGenerator ]:
1524
+ ) -> dict [splits_lib .Split , split_builder_lib .SplitGenerator ]:
1512
1525
"""Downloads the data and returns dataset splits with associated examples.
1513
1526
1514
1527
Example:
@@ -1743,7 +1756,7 @@ def _download_and_prepare( # pytype: disable=signature-mismatch # overriding-p
1743
1756
self .info .set_splits (split_dict )
1744
1757
1745
1758
def read_text_file (
1746
- self , filename : epath .PathLike , encoding : Optional [ str ] = None
1759
+ self , filename : epath .PathLike , encoding : str | None = None
1747
1760
) -> str :
1748
1761
"""Returns the text in the given file and records the lineage."""
1749
1762
filename = epath .Path (filename )
@@ -1775,7 +1788,7 @@ def read_tfrecord_as_dataset(
1775
1788
1776
1789
def read_tfrecord_as_examples (
1777
1790
self ,
1778
- filenames : Union [ str , Sequence [str ] ],
1791
+ filenames : str | Sequence [str ],
1779
1792
compression_type : str | None = None ,
1780
1793
num_parallel_reads : int | None = None ,
1781
1794
) -> Iterator [tf .train .Example ]:
@@ -1932,9 +1945,9 @@ def _check_split_names(split_names: Iterable[str]) -> None:
1932
1945
1933
1946
1934
1947
def _get_default_config (
1935
- builder_configs : List [BuilderConfig ],
1936
- default_config_name : Optional [ str ] ,
1937
- ) -> Optional [ BuilderConfig ] :
1948
+ builder_configs : list [BuilderConfig ],
1949
+ default_config_name : str | None ,
1950
+ ) -> BuilderConfig | None :
1938
1951
"""Returns the default config from the given builder configs.
1939
1952
1940
1953
Arguments:
@@ -1995,8 +2008,8 @@ def load_default_config_name(builder_dir: epath.Path) -> str | None:
1995
2008
1996
2009
1997
2010
def canonical_version_for_config (
1998
- instance_or_cls : Union [ DatasetBuilder , Type [DatasetBuilder ] ],
1999
- config : Optional [ BuilderConfig ] = None ,
2011
+ instance_or_cls : DatasetBuilder | Type [DatasetBuilder ],
2012
+ config : BuilderConfig | None = None ,
2000
2013
) -> utils .Version :
2001
2014
"""Get the canonical version for the given config.
2002
2015
0 commit comments