@@ -169,8 +169,8 @@ def builder_from_directory(
169
169
above.
170
170
171
171
Args:
172
- builder_dir: `str`, path of the directory containing the dataset to read (
173
- e.g. `~/tensorflow_datasets/mnist/3.0.0/`).
172
+ builder_dir: Path of the directory containing the dataset to read ( e.g.
173
+ `~/tensorflow_datasets/mnist/3.0.0/`).
174
174
175
175
Returns:
176
176
builder: `tfds.core.DatasetBuilder`, builder for dataset at the given path.
@@ -311,7 +311,7 @@ def builder_from_files(
311
311
return builder_from_directory (builder_dir )
312
312
313
313
314
- def _find_builder_dir (name : str , ** builder_kwargs : Any ) -> str | None :
314
+ def _find_builder_dir (name : str , ** builder_kwargs : Any ) -> epath . Path | None :
315
315
"""Search whether the given dataset is present on disk and return its path.
316
316
317
317
Note:
@@ -363,13 +363,13 @@ def _find_builder_dir(name: str, **builder_kwargs: Any) -> str | None:
363
363
return None
364
364
365
365
# Search the dataset across all registered data_dirs
366
- all_builder_dirs = set ()
366
+ all_builder_dirs : set [ epath . Path ] = set ()
367
367
all_data_dirs = set (file_utils .list_data_dirs (given_data_dir = data_dir ))
368
368
find_builder_fn = functools .partial (
369
369
_find_builder_dir_single_dir ,
370
370
builder_name = name .name ,
371
- version_str = str (version ) if version else None ,
372
371
config_name = config ,
372
+ version = version ,
373
373
)
374
374
if len (all_data_dirs ) <= 1 :
375
375
for current_data_dir in all_data_dirs :
@@ -398,14 +398,14 @@ def wrapped_find_builder_fn(data_dir):
398
398
399
399
# If the dataset root_dir exists, a common error is that the config name
400
400
# was not specified. So we list the possible configs and display them.
401
- possible_configs = _list_possible_configs (name , all_data_dirs )
401
+ possible_configs = _list_possible_configs (name . name , all_data_dirs )
402
402
if possible_configs :
403
- configs = '\n \t - ' .join (['' ] + list ( possible_configs ) )
403
+ configs_str = '\n \t - ' .join (['' ] + possible_configs )
404
404
error_msg = (
405
405
f'However, a folder for "{ name .name } " does exist. Is it possible that'
406
406
' you specified the wrong config? You can add a config by replacing'
407
407
f' `tfds.load({ name .name } )` by `tfds.load("{ name .name } /my_config")`.'
408
- f' Possible configs are:{ configs } \n '
408
+ f' Possible configs are:{ configs_str } \n '
409
409
)
410
410
411
411
error_utils .add_context (error_msg )
@@ -431,32 +431,19 @@ def wrapped_find_builder_fn(data_dir):
431
431
432
432
433
433
def _list_possible_configs (
434
- name : naming . DatasetName , all_data_dirs : set [epath .PathLike ]
435
- ) -> Sequence [str ]:
434
+ builder_name : str , all_data_dirs : set [epath .PathLike ]
435
+ ) -> list [str ]:
436
436
configs = []
437
437
for data_dir in all_data_dirs :
438
- root_dir = epath .Path (data_dir ) / name . name
439
- if root_dir .exists ():
440
- for path in root_dir .iterdir ():
438
+ builder_dir = epath .Path (data_dir ) / builder_name
439
+ if builder_dir .exists ():
440
+ for path in builder_dir .iterdir ():
441
441
if path .is_dir ():
442
442
configs .append (path .name )
443
443
return configs
444
444
445
445
446
- def _get_dataset_dir (
447
- builder_dir : epath .Path ,
448
- * ,
449
- version_str : str ,
450
- config_name : str | None = None ,
451
- ) -> epath .Path :
452
- """Returns the path for the given dataset, config and version."""
453
- dataset_dir = builder_dir
454
- if config_name :
455
- dataset_dir = dataset_dir / config_name
456
- return dataset_dir / version_str
457
-
458
-
459
- def _contains_dataset (dataset_dir : epath .PathLike ) -> bool :
446
+ def _contains_dataset (dataset_dir : epath .Path ) -> bool :
460
447
try :
461
448
return feature_lib .make_config_path (dataset_dir ).exists ()
462
449
except (OSError , tf .errors .PermissionDeniedError ):
@@ -467,49 +454,53 @@ def _find_builder_dir_single_dir(
467
454
data_dir : epath .PathLike ,
468
455
builder_name : str ,
469
456
config_name : str | None = None ,
470
- version_str : str | None = None ,
471
- ) -> str | None :
457
+ version : version_lib . Version | str | None = None ,
458
+ ) -> epath . Path | None :
472
459
"""Same as `find_builder_dir` but requires explicit dir."""
473
460
474
- builder_dir = epath .Path (data_dir ) / builder_name
475
-
476
461
# If the version is specified, check if the dataset dir exists and return.
477
- if version_str and version_lib .Version .is_valid (version_str ):
478
- dataset_dir = _get_dataset_dir (
479
- builder_dir = builder_dir ,
480
- version_str = version_str ,
462
+ if version_lib .Version .is_valid (version ):
463
+ dataset_dir = file_utils . get_dataset_dir (
464
+ data_dir = data_dir ,
465
+ builder_name = builder_name ,
481
466
config_name = config_name ,
467
+ version = version ,
482
468
)
483
469
if _contains_dataset (dataset_dir ):
484
- return os . fspath ( dataset_dir )
470
+ return dataset_dir
485
471
486
472
# If no config_name or an empty string was given, we try to find the default
487
473
# config and load the dataset for that.
488
474
if not config_name :
489
- default_config_name = _get_default_config_name (
490
- builder_dir = builder_dir , name = builder_name
475
+ config_name = _get_default_config_name (
476
+ data_dir = data_dir , builder_name = builder_name
491
477
)
492
- if default_config_name :
493
- return _find_builder_dir_single_dir (
494
- builder_name = builder_name ,
478
+ if version_lib .Version .is_valid (version ):
479
+ dataset_dir = file_utils .get_dataset_dir (
495
480
data_dir = data_dir ,
496
- config_name = default_config_name ,
497
- version_str = version_str ,
481
+ builder_name = builder_name ,
482
+ config_name = config_name ,
483
+ version = version ,
498
484
)
485
+ if _contains_dataset (dataset_dir ):
486
+ return dataset_dir
499
487
500
488
# Dataset wasn't found, try to find a suitable available version.
501
- found_version_str = _get_version_str (
502
- builder_dir , config_name = config_name , requested_version = version_str
489
+ found_version = _get_version (
490
+ data_dir = data_dir ,
491
+ builder_name = builder_name ,
492
+ config_name = config_name ,
493
+ requested_version = version ,
503
494
)
504
- if found_version_str and (
505
- version_str is None or found_version_str != version_str
506
- ):
507
- return _find_builder_dir_single_dir (
508
- builder_name = builder_name ,
495
+ if found_version and str (found_version ) != version :
496
+ dataset_dir = file_utils .get_dataset_dir (
509
497
data_dir = data_dir ,
498
+ builder_name = builder_name ,
510
499
config_name = config_name ,
511
- version_str = found_version_str ,
500
+ version = found_version ,
512
501
)
502
+ if _contains_dataset (dataset_dir ):
503
+ return dataset_dir
513
504
514
505
# If no builder found, we populate the error_context with useful information
515
506
# and return None.
@@ -521,16 +512,19 @@ def _find_builder_dir_single_dir(
521
512
522
513
523
514
def _get_default_config_name (
524
- builder_dir : epath .Path ,
525
- name : str ,
515
+ data_dir : epath .Path ,
516
+ builder_name : str ,
526
517
) -> str | None :
527
518
"""Returns the default config of the given dataset, None if not found."""
519
+ builder_dir = file_utils .get_dataset_dir (
520
+ data_dir = data_dir , builder_name = builder_name
521
+ )
528
522
# Search for the DatasetBuilder generation code
529
523
try :
530
524
# Warning: The registered dataset may not match the files (e.g. if
531
525
# the imported datasets has the same name as the generated files while
532
526
# being 2 differents datasets)
533
- cls = registered .imported_builder_cls (name )
527
+ cls = registered .imported_builder_cls (builder_name )
534
528
cls = typing .cast (Type [dataset_builder .DatasetBuilder ], cls )
535
529
except registered .DatasetNotFoundError :
536
530
pass
@@ -543,45 +537,43 @@ def _get_default_config_name(
543
537
return cls .default_builder_config .name
544
538
545
539
# Otherwise, try to load default config from common metadata
546
- return dataset_builder .load_default_config_name (epath . Path ( builder_dir ) )
540
+ return dataset_builder .load_default_config_name (builder_dir )
547
541
548
542
549
- def _get_version_str (
550
- builder_dir : epath .Path ,
551
- * ,
543
+ def _get_version (
544
+ data_dir : epath .Path ,
545
+ builder_name : str ,
552
546
config_name : str | None = None ,
553
- requested_version : str | None = None ,
554
- ) -> str | None :
555
- """Returns the version name found in the directory.
547
+ requested_version : version_lib . Version | str | None = None ,
548
+ ) -> version_lib . Version | None :
549
+ """Returns the version name found in the builder directory.
556
550
557
551
Args:
558
- builder_dir : Directory containing the versions (`builder_dir/1.0.0/`,...)
559
- config_name: Optional name of the config that should be used. Will be
560
- ignored if it is an empty string .
552
+ data_dir : Directory containing the builder.
553
+ builder_name: Name of the builder.
554
+ config_name: Name of the config .
561
555
requested_version: Optional version to search (e.g. `1.0.0`, `2.*.*`,...)
562
-
563
- Returns:
564
- version_str: The version directory name found in `builder_dir`.
565
556
"""
566
- if config_name :
567
- builder_dir = builder_dir / config_name
568
- all_versions = version_lib .list_all_versions (os .fspath (builder_dir ))
557
+ config_dir = file_utils .get_dataset_dir (
558
+ data_dir = data_dir , builder_name = builder_name , config_name = config_name
559
+ )
560
+ all_versions = version_lib .list_all_versions (config_dir )
569
561
# Version not given, using the latest one.
570
562
if not requested_version and all_versions :
571
- return str ( all_versions [- 1 ])
563
+ return all_versions [- 1 ]
572
564
# Version given, return the highest version matching `requested_version`.
573
565
for v in reversed (all_versions ):
574
566
if v .match (requested_version ):
575
- return str ( v )
567
+ return v
576
568
# Directory doesn't have version, or requested_version doesn't match
577
569
if requested_version :
578
570
error_msg = (
579
571
f'No version matching the requested { requested_version } was '
580
- f'found in the builder directory: { builder_dir } .'
572
+ f'found in the builder directory: { config_dir } .'
581
573
)
582
574
else :
583
575
error_msg = (
584
- f"The builder directory { builder_dir } doesn't contain any versions."
576
+ f"The builder directory { config_dir } doesn't contain any versions."
585
577
)
586
578
error_utils .add_context (error_msg )
587
579
return None
0 commit comments