@@ -937,7 +937,6 @@ def as_dataset(
937
937
decoders : TreeDict [decode .partial_decode .DecoderArg ] | None = None ,
938
938
read_config : read_config_lib .ReadConfig | None = None ,
939
939
as_supervised : bool = False ,
940
- file_format : str | file_adapters .FileFormat | None = None ,
941
940
):
942
941
# pylint: disable=line-too-long
943
942
"""Constructs a `tf.data.Dataset`.
@@ -1007,9 +1006,6 @@ def as_dataset(
1007
1006
a 2-tuple structure `(input, label)` according to
1008
1007
`builder.info.supervised_keys`. If `False`, the default, the returned
1009
1008
`tf.data.Dataset` will have a dictionary with all the features.
1010
- file_format: if the dataset is stored in multiple file formats, then this
1011
- argument can be used to specify the file format to load. If not
1012
- specified, the default file format is used.
1013
1009
1014
1010
Returns:
1015
1011
`tf.data.Dataset`, or if `split=None`, `dict<key: tfds.Split, value:
@@ -1043,7 +1039,6 @@ def as_dataset(
1043
1039
decoders = decoders ,
1044
1040
read_config = read_config ,
1045
1041
as_supervised = as_supervised ,
1046
- file_format = file_format ,
1047
1042
)
1048
1043
all_ds = tree .map_structure (build_single_dataset , split )
1049
1044
return all_ds
@@ -1056,28 +1051,19 @@ def _build_single_dataset(
1056
1051
decoders : TreeDict [decode .partial_decode .DecoderArg ] | None ,
1057
1052
read_config : read_config_lib .ReadConfig ,
1058
1053
as_supervised : bool ,
1059
- file_format : str | file_adapters .FileFormat | None = None ,
1060
1054
) -> tf .data .Dataset :
1061
1055
"""as_dataset for a single split."""
1062
1056
wants_full_dataset = batch_size == - 1
1063
1057
if wants_full_dataset :
1064
1058
batch_size = self .info .splits .total_num_examples or sys .maxsize
1065
1059
1066
- if file_format is not None :
1067
- file_format = file_adapters .FileFormat .from_value (file_format )
1068
-
1069
1060
# Build base dataset
1070
- as_dataset_kwargs = {
1071
- "split" : split ,
1072
- "shuffle_files" : shuffle_files ,
1073
- "decoders" : decoders ,
1074
- "read_config" : read_config ,
1075
- }
1076
- # Not all dataset builder classes support file_format, so only pass it if
1077
- # it's supported.
1078
- if "file_format" in inspect .signature (self ._as_dataset ).parameters :
1079
- as_dataset_kwargs ["file_format" ] = file_format
1080
- ds = self ._as_dataset (** as_dataset_kwargs )
1061
+ ds = self ._as_dataset (
1062
+ split = split ,
1063
+ shuffle_files = shuffle_files ,
1064
+ decoders = decoders ,
1065
+ read_config = read_config ,
1066
+ )
1081
1067
1082
1068
# Auto-cache small datasets which are small enough to fit in memory.
1083
1069
if self ._should_cache_ds (
@@ -1263,7 +1249,6 @@ def _as_dataset(
1263
1249
decoders : TreeDict [decode .partial_decode .DecoderArg ] | None = None ,
1264
1250
read_config : read_config_lib .ReadConfig | None = None ,
1265
1251
shuffle_files : bool = False ,
1266
- file_format : str | file_adapters .FileFormat | None = None ,
1267
1252
) -> tf .data .Dataset :
1268
1253
"""Constructs a `tf.data.Dataset`.
1269
1254
@@ -1279,9 +1264,6 @@ def _as_dataset(
1279
1264
read_config: `tfds.ReadConfig`
1280
1265
shuffle_files: `bool`, whether to shuffle the input files. Optional,
1281
1266
defaults to `False`.
1282
- file_format: if the dataset is stored in multiple file formats, then this
1283
- argument can be used to specify the file format to load. If not
1284
- specified, the default file format is used.
1285
1267
1286
1268
Returns:
1287
1269
`tf.data.Dataset`
@@ -1525,14 +1507,16 @@ def _example_specs(self):
1525
1507
)
1526
1508
return self .info .features .get_serialized_info ()
1527
1509
1528
- def _as_dataset ( # pytype: disable=signature-mismatch # overriding-parameter-type-checks
1510
+ def _as_dataset (
1529
1511
self ,
1530
1512
split : splits_lib .Split ,
1531
- decoders : TreeDict [decode .partial_decode .DecoderArg ] | None ,
1532
- read_config : read_config_lib .ReadConfig ,
1533
- shuffle_files : bool ,
1534
- file_format : file_adapters .FileFormat | None = None ,
1513
+ decoders : TreeDict [decode .partial_decode .DecoderArg ] | None = None ,
1514
+ read_config : read_config_lib .ReadConfig | None = None ,
1515
+ shuffle_files : bool = False ,
1535
1516
) -> tf .data .Dataset :
1517
+ if read_config is None :
1518
+ read_config = read_config_lib .ReadConfig ()
1519
+
1536
1520
# Partial decoding
1537
1521
# TODO(epot): Should be moved inside `features.decode_example`
1538
1522
if isinstance (decoders , decode .PartialDecoding ):
@@ -1550,10 +1534,18 @@ def _as_dataset( # pytype: disable=signature-mismatch # overriding-parameter-t
1550
1534
f"Features are not set for dataset { self .name } in { self .data_dir } !"
1551
1535
)
1552
1536
1537
+ file_format = (
1538
+ read_config .file_format
1539
+ or self .info .file_format
1540
+ or file_adapters .DEFAULT_FILE_FORMAT
1541
+ )
1542
+ if file_format is not None :
1543
+ file_format = file_adapters .FileFormat .from_value (file_format )
1544
+
1553
1545
reader = reader_lib .Reader (
1554
1546
self .data_dir ,
1555
1547
example_specs = example_specs ,
1556
- file_format = file_format or self . info . file_format ,
1548
+ file_format = file_format ,
1557
1549
)
1558
1550
decode_fn = functools .partial (features .decode_example , decoders = decoders )
1559
1551
return reader .read (
0 commit comments