40
40
import posixpath
41
41
import tempfile
42
42
import time
43
- from typing import Any , Optional
43
+ from typing import Any
44
44
45
45
from absl import logging
46
46
from etils import epath
66
66
# pylint: enable=g-import-not-at-top
67
67
68
68
69
- # TODO(b/109648354): Remove the "pytype: disable" comment.
70
- Nest = tuple ["Nest" , ...] | dict [str , "Nest" ] | str # pytype: disable=not-supported-yet
69
+ Nest = tuple ["Nest" , ...] | dict [str , "Nest" ] | str
71
70
SupervisedKeysType = tuple [Nest , Nest ] | tuple [Nest , Nest , Nest ]
72
71
73
72
@@ -104,7 +103,7 @@ def load_metadata(self, data_dir):
104
103
raise NotImplementedError ()
105
104
106
105
107
- @dataclasses .dataclass ()
106
+ @dataclasses .dataclass
108
107
class DatasetIdentity :
109
108
"""Identity of a dataset that completely identifies a dataset."""
110
109
@@ -167,7 +166,7 @@ def from_proto(
167
166
)
168
167
169
168
170
- class DatasetInfo ( object ) :
169
+ class DatasetInfo :
171
170
"""Information about a dataset.
172
171
173
172
`DatasetInfo` documents datasets, including its name, version, and features.
@@ -185,15 +184,15 @@ def __init__(
185
184
* ,
186
185
builder : DatasetIdentity | Any ,
187
186
description : str | None = None ,
188
- features : Optional [ feature_lib .FeatureConnector ] = None ,
189
- supervised_keys : Optional [ SupervisedKeysType ] = None ,
187
+ features : feature_lib .FeatureConnector | None = None ,
188
+ supervised_keys : SupervisedKeysType | None = None ,
190
189
disable_shuffling : bool = False ,
191
190
homepage : str | None = None ,
192
191
citation : str | None = None ,
193
192
metadata : Metadata | None = None ,
194
193
license : str | None = None , # pylint: disable=redefined-builtin
195
- redistribution_info : Optional [ dict [str , str ]] = None ,
196
- split_dict : Optional [ splits_lib .SplitDict ] = None ,
194
+ redistribution_info : dict [str , str ] | None = None ,
195
+ split_dict : splits_lib .SplitDict | None = None ,
197
196
alternative_file_formats : (
198
197
Sequence [str | file_adapters .FileFormat ] | None
199
198
) = None ,
@@ -403,7 +402,7 @@ def disable_shuffling(self) -> bool:
403
402
return self .as_proto .disable_shuffling
404
403
405
404
@property
406
- def homepage (self ):
405
+ def homepage (self ) -> str :
407
406
urls = self .as_proto .location .urls
408
407
tfds_homepage = f"https://www.tensorflow.org/datasets/catalog/{ self .name } "
409
408
return urls and urls [0 ] or tfds_homepage
@@ -413,7 +412,7 @@ def citation(self) -> str:
413
412
return self .as_proto .citation
414
413
415
414
@property
416
- def data_dir (self ):
415
+ def data_dir (self ) -> str :
417
416
return self ._identity .data_dir
418
417
419
418
@property
@@ -431,15 +430,15 @@ def download_size(self) -> utils.Size:
431
430
)
432
431
433
432
@download_size .setter
434
- def download_size (self , size ):
433
+ def download_size (self , size : int ):
435
434
self .as_proto .download_size = size
436
435
437
436
@property
438
437
def features (self ):
439
438
return self ._features
440
439
441
440
@property
442
- def alternative_file_formats (self ) -> Sequence [file_adapters .FileFormat ]:
441
+ def alternative_file_formats (self ) -> list [file_adapters .FileFormat ]:
443
442
return self ._alternative_file_formats
444
443
445
444
@property
@@ -454,7 +453,7 @@ def set_is_blocked(self, is_blocked: str) -> None:
454
453
self ._is_blocked = is_blocked
455
454
456
455
@property
457
- def supervised_keys (self ) -> Optional [ SupervisedKeysType ] :
456
+ def supervised_keys (self ) -> SupervisedKeysType | None :
458
457
if not self .as_proto .HasField ("supervised_keys" ):
459
458
return None
460
459
supervised_keys = self .as_proto .supervised_keys
@@ -576,8 +575,8 @@ def set_splits(self, split_dict: splits_lib.SplitDict) -> None:
576
575
# into the new split_dict. Also add the filename template if it's not set.
577
576
new_split_infos = []
578
577
incomplete_filename_template = naming .ShardedFileTemplate (
578
+ data_dir = epath .Path (self .data_dir ),
579
579
dataset_name = self .name ,
580
- data_dir = self .data_dir ,
581
580
filetype_suffix = (
582
581
self .as_proto .file_format or file_adapters .DEFAULT_FILE_FORMAT .value
583
582
),
@@ -728,22 +727,20 @@ def read_from_directory(self, dataset_info_dir: epath.PathLike) -> None:
728
727
729
728
# Restore the feature metadata (vocabulary, labels names,...)
730
729
if self .features :
731
- self .features .load_metadata (dataset_info_dir ) # pytype: disable=missing-parameter # always-use-property-annotation
730
+ self .features .load_metadata (dataset_info_dir , feature_name = None )
732
731
# For `ReadOnlyBuilder`, reconstruct the features from the config.
733
732
elif feature_lib .make_config_path (dataset_info_dir ).exists ():
734
- self ._features = feature_lib . FeatureConnector .from_config (
733
+ self ._features = top_level_feature . TopLevelFeature .from_config (
735
734
dataset_info_dir
736
735
)
736
+
737
+ # If the dataset was loaded from file, self.metadata will be `None`, so
738
+ # we create a MetadataDict first.
739
+ if not self ._metadata :
740
+ self ._metadata = MetadataDict ()
737
741
# Restore the MetaDataDict from metadata.json if there is any
738
- if (
739
- self .metadata is not None
740
- or _metadata_filepath (dataset_info_dir ).exists ()
741
- ):
742
- # If the dataset was loaded from file, self.metadata will be `None`, so
743
- # we create a MetadataDict first.
744
- if self .metadata is None :
745
- self ._metadata = MetadataDict ()
746
- self .metadata .load_metadata (dataset_info_dir ) # pytype: disable=attribute-error # always-use-property-annotation
742
+ if _metadata_filepath (dataset_info_dir ).exists ():
743
+ self ._metadata .load_metadata (dataset_info_dir )
747
744
748
745
# Update fields which are not defined in the code. This means that
749
746
# the code will overwrite fields which are present in
@@ -1215,7 +1212,7 @@ def pack_as_supervised_ds(
1215
1212
and isinstance (ds .element_spec , tuple )
1216
1213
and len (ds .element_spec ) == 2
1217
1214
):
1218
- x_key , y_key = ds_info .supervised_keys # pytype: disable=bad-unpacking # always-use-property-annotation
1215
+ x_key , y_key = ds_info .supervised_keys # pytype: disable=bad-unpacking
1219
1216
ds = ds .map (lambda x , y : {x_key : x , y_key : y })
1220
1217
return ds
1221
1218
else : # If dataset isn't a supervised tuple (input, label), return as-is
0 commit comments