17
17
18
18
from __future__ import annotations
19
19
20
+ from collections .abc import Sequence
20
21
import functools
21
22
import os
22
23
import typing
23
- from typing import Any , List , Optional , Type
24
+ from typing import Any , Type
24
25
25
26
from etils import epy
26
27
from tensorflow_datasets .core .utils .lazy_imports_utils import tensorflow as tf
@@ -54,7 +55,7 @@ def __init__(
54
55
self ,
55
56
builder_dir : epath .PathLike ,
56
57
* ,
57
- info_proto : Optional [ dataset_info_pb2 .DatasetInfo ] = None ,
58
+ info_proto : dataset_info_pb2 .DatasetInfo | None = None ,
58
59
):
59
60
"""Constructor.
60
61
@@ -82,9 +83,7 @@ def __init__(
82
83
# original source code.
83
84
self .__module__ = info_proto .module_name
84
85
85
- builder_config = dataset_builder .BuilderConfig .from_dataset_info (
86
- info_proto ,
87
- )
86
+ builder_config = dataset_builder .BuilderConfig .from_dataset_info (info_proto )
88
87
# __init__ will call _build_data_dir, _create_builder_config,
89
88
# _pick_version to set the data_dir, config, and version
90
89
super ().__init__ (
@@ -158,9 +157,9 @@ def builder_from_directory(
158
157
159
158
160
159
def builder_from_directories (
161
- builder_dirs : List [epath .PathLike ],
160
+ builder_dirs : Sequence [epath .PathLike ],
162
161
* ,
163
- filetype_suffix : Optional [ str ] = None , # DEPRECATED
162
+ filetype_suffix : str | None = None , # DEPRECATED
164
163
) -> dataset_builder .DatasetBuilder :
165
164
"""Loads a `tfds.core.DatasetBuilder` from the given generated dataset path.
166
165
@@ -244,10 +243,8 @@ def builder_from_metadata(
244
243
Returns:
245
244
builder: `tfds.core.DatasetBuilder`, builder for dataset at the given path.
246
245
"""
247
- return ReadOnlyBuilder (
248
- builder_dir = builder_dir ,
249
- info_proto = info_proto ,
250
- )
246
+ builder = ReadOnlyBuilder (builder_dir = builder_dir , info_proto = info_proto )
247
+ return builder
251
248
252
249
253
250
@error_utils .reraise_with_context (registered .DatasetNotFoundError )
@@ -292,7 +289,7 @@ def builder_from_files(
292
289
return builder_from_directory (builder_dir )
293
290
294
291
295
- def _find_builder_dir (name : str , ** builder_kwargs : Any ) -> Optional [ str ] :
292
+ def _find_builder_dir (name : str , ** builder_kwargs : Any ) -> str | None :
296
293
"""Search whether the given dataset is present on disk and return its path.
297
294
298
295
Note:
@@ -344,17 +341,17 @@ def _find_builder_dir(name: str, **builder_kwargs: Any) -> Optional[str]:
344
341
return None
345
342
346
343
# Search the dataset across all registered data_dirs
347
- all_builder_dirs = []
348
- all_data_dirs = file_utils .list_data_dirs (given_data_dir = data_dir )
344
+ all_builder_dirs = set ()
345
+ all_data_dirs = set (file_utils .list_data_dirs (given_data_dir = data_dir ))
346
+ find_builder_fn = functools .partial (
347
+ _find_builder_dir_single_dir ,
348
+ builder_name = name .name ,
349
+ version_str = str (version ) if version else None ,
350
+ config_name = config ,
351
+ )
349
352
for current_data_dir in all_data_dirs :
350
- builder_dir = _find_builder_dir_single_dir (
351
- name .name ,
352
- data_dir = current_data_dir ,
353
- version_str = str (version ) if version else None ,
354
- config_name = config ,
355
- )
356
- if builder_dir :
357
- all_builder_dirs .append (builder_dir )
353
+ if builder_dir := find_builder_fn (data_dir = current_data_dir ):
354
+ all_builder_dirs .add (builder_dir )
358
355
359
356
if not all_builder_dirs :
360
357
all_dirs_str = '\n \t - ' .join (['' ] + [str (dir ) for dir in all_data_dirs ])
@@ -378,14 +375,14 @@ def _find_builder_dir(name: str, **builder_kwargs: Any) -> Optional[str]:
378
375
'Please resolve the ambiguity by explicitly setting `data_dir=`.'
379
376
)
380
377
381
- return all_builder_dirs [ 0 ]
378
+ return all_builder_dirs . pop ()
382
379
383
380
384
381
def _get_dataset_dir (
385
382
builder_dir : epath .Path ,
386
383
* ,
387
384
version_str : str ,
388
- config_name : Optional [ str ] = None ,
385
+ config_name : str | None = None ,
389
386
) -> epath .Path :
390
387
"""Returns the path for the given dataset, config and version."""
391
388
dataset_dir = builder_dir
@@ -402,12 +399,11 @@ def _contains_dataset(dataset_dir: epath.PathLike) -> bool:
402
399
403
400
404
401
def _find_builder_dir_single_dir (
405
- builder_name : str ,
406
- * ,
407
402
data_dir : epath .PathLike ,
408
- config_name : Optional [str ] = None ,
409
- version_str : Optional [str ] = None ,
410
- ) -> Optional [str ]:
403
+ builder_name : str ,
404
+ config_name : str | None = None ,
405
+ version_str : str | None = None ,
406
+ ) -> str | None :
411
407
"""Same as `find_builder_dir` but requires explicit dir."""
412
408
413
409
builder_dir = epath .Path (data_dir ) / builder_name
@@ -462,7 +458,7 @@ def _find_builder_dir_single_dir(
462
458
def _get_default_config_name (
463
459
builder_dir : epath .Path ,
464
460
name : str ,
465
- ) -> Optional [ str ] :
461
+ ) -> str | None :
466
462
"""Returns the default config of the given dataset, None if not found."""
467
463
# Search for the DatasetBuilder generation code
468
464
try :
@@ -488,9 +484,9 @@ def _get_default_config_name(
488
484
def _get_version_str (
489
485
builder_dir : epath .Path ,
490
486
* ,
491
- config_name : Optional [ str ] = None ,
492
- requested_version : Optional [ str ] = None ,
493
- ) -> Optional [ str ] :
487
+ config_name : str | None = None ,
488
+ requested_version : str | None = None ,
489
+ ) -> str | None :
494
490
"""Returns the version name found in the directory.
495
491
496
492
Args:
0 commit comments