@@ -368,6 +368,61 @@ def list_dataset_versions(
368
368
return sorted (found_versions )
369
369
370
370
371
+ def is_valid_variant_dir (
372
+ variant_dir : Path ,
373
+ matched_files : set [str ] | None = None ,
374
+ include_old_tfds_version : bool = False ,
375
+ ) -> bool :
376
+ """Returns whether the variant directory is valid.
377
+
378
+ Valid variant directories must:
379
+ - Contain a dataset_info.json file.
380
+ - Contain a features.json file.
381
+ - Have a valid version name.
382
+
383
+ Args:
384
+ variant_dir: The variant directory to check.
385
+ matched_files: The files that were matched in the variant directory. If
386
+ None, all json files in the directory are used.
387
+ include_old_tfds_version: include datasets that have been generated with
388
+ TFDS before 4.0.0.
389
+ """
390
+ version = variant_dir .name
391
+ if not version_lib .Version .is_valid (version ):
392
+ logging .warning (
393
+ 'Variant folder %s has invalid version %s' ,
394
+ variant_dir ,
395
+ version ,
396
+ )
397
+ return False
398
+
399
+ if matched_files is None :
400
+ matched_files = set (
401
+ matched_path .name for matched_path in variant_dir .glob ('*.json' )
402
+ )
403
+
404
+ if constants .DATASET_INFO_FILENAME not in matched_files :
405
+ logging .warning (
406
+ 'Variant folder %s has no %s' ,
407
+ variant_dir ,
408
+ constants .DATASET_INFO_FILENAME ,
409
+ )
410
+ return False
411
+
412
+ if (
413
+ not include_old_tfds_version
414
+ and constants .FEATURES_FILENAME not in matched_files
415
+ ):
416
+ logging .warning (
417
+ 'Variant folder %s has no %s' ,
418
+ variant_dir ,
419
+ constants .FEATURES_FILENAME ,
420
+ )
421
+ return False
422
+
423
+ return True
424
+
425
+
371
426
def list_dataset_variants (
372
427
dataset_dir : Path ,
373
428
namespace : str | None = None ,
@@ -401,36 +456,17 @@ def list_dataset_variants(
401
456
matched_files_by_variant_dir [file .parent ].add (file .name )
402
457
403
458
for variant_dir , matched_files in matched_files_by_variant_dir .items ():
404
- if constants .DATASET_INFO_FILENAME not in matched_files :
405
- logging .warning (
406
- 'Ignoring variant folder %s, which has no %s' ,
407
- variant_dir ,
408
- constants .DATASET_INFO_FILENAME ,
409
- )
410
- continue
411
-
412
- if (
413
- not include_old_tfds_version
414
- and constants .FEATURES_FILENAME not in matched_files
459
+ if not is_valid_variant_dir (
460
+ variant_dir = variant_dir ,
461
+ matched_files = matched_files ,
462
+ include_old_tfds_version = include_old_tfds_version ,
415
463
):
416
- logging .info (
417
- 'Ignoring variant folder %s, which has no %s' ,
418
- variant_dir ,
419
- constants .FEATURES_FILENAME ,
420
- )
421
- continue
422
-
423
- version = variant_dir .name
424
- if not version_lib .Version .is_valid (version ):
425
- logging .warning (
426
- 'Ignoring variant folder %s, which has invalid version %s' ,
427
- variant_dir ,
428
- version ,
429
- )
464
+ logging .warning ('Skipping invalid variant directory: %s' , variant_dir )
430
465
continue
431
466
432
467
config_dir = variant_dir .parent
433
468
config = config_dir .name if config_dir != dataset_dir else None
469
+ version = variant_dir .name
434
470
435
471
yield naming .DatasetReference (
436
472
namespace = namespace ,
0 commit comments