@@ -747,12 +747,16 @@ def read_from_directory(self, dataset_info_dir: epath.PathLike) -> None:
747
747
dataset_info_dir
748
748
)
749
749
750
- # Restore the MetaDataDict from metadata.json if there is any
751
- if _metadata_filepath (dataset_info_dir ).exists ():
752
- # If the dataset was loaded from file, self.metadata will be `None`, so
753
- # we create a MetadataDict first.
754
- if self ._metadata is None :
755
- self ._metadata = MetadataDict ()
750
+ # If the dataset was loaded from file, self.metadata will be `None`, so
751
+ # we create a MetadataDict first.
752
+ if self ._metadata is None :
753
+ self ._metadata = LazyMetadataDict (dataset_info_dir )
754
+ elif isinstance (self ._metadata , MetadataDict ):
755
+ lazy_metadata = LazyMetadataDict (dataset_info_dir )
756
+ lazy_metadata .update (self ._metadata )
757
+ self ._metadata = lazy_metadata
758
+ elif _metadata_filepath (dataset_info_dir ).exists ():
759
+ # Restore the MetaDataDict from metadata.json if there is any
756
760
self ._metadata .load_metadata (dataset_info_dir )
757
761
758
762
# Update fields which are not defined in the code. This means that
@@ -1375,6 +1379,12 @@ def add_tfds_data_source_access(
1375
1379
)
1376
1380
1377
1381
1382
+ def _load_metadata_from_file (data_dir : epath .PathLike ) -> dict [str , Any ]:
1383
+ """Loads metadata from file."""
1384
+ with _metadata_filepath (data_dir ).open (mode = "r" ) as f :
1385
+ return json .load (f )
1386
+
1387
+
1378
1388
class MetadataDict (Metadata , dict ):
1379
1389
"""A `tfds.core.Metadata` object that acts as a `dict`.
1380
1390
@@ -1389,8 +1399,41 @@ def save_metadata(self, data_dir):
1389
1399
def load_metadata (self , data_dir ):
1390
1400
"""Restore the metadata."""
1391
1401
self .clear ()
1392
- with _metadata_filepath (data_dir ).open (mode = "r" ) as f :
1393
- self .update (json .load (f ))
1402
+ self .update (_load_metadata_from_file (data_dir ))
1403
+
1404
+
1405
+ class LazyMetadataDict (MetadataDict ):
1406
+ """A `tfds.core.Metadata` object that acts as a `dict`.
1407
+
1408
+ Content is lazily loaded from the given data directory.
1409
+ """
1410
+
1411
+ def __init__ (self , data_dir : epath .PathLike ) -> None :
1412
+ self ._data_dir = epath .Path (data_dir )
1413
+ self ._data_is_loaded = False
1414
+ super ().__init__ ()
1415
+
1416
+ def _load_metadata (self ):
1417
+ if not self ._data_is_loaded :
1418
+ if _metadata_filepath (self ._data_dir ).exists ():
1419
+ self .load_metadata (self ._data_dir )
1420
+ self ._data_is_loaded = True
1421
+
1422
+ def __getitem__ (self , key , / ):
1423
+ self ._load_metadata ()
1424
+ return super ().__getitem__ (key )
1425
+
1426
+ def __eq__ (self , value , / ):
1427
+ self ._load_metadata ()
1428
+ return super ().__eq__ (value )
1429
+
1430
+ def keys (self ):
1431
+ self ._load_metadata ()
1432
+ return super ().keys ()
1433
+
1434
+ def items (self ):
1435
+ self ._load_metadata ()
1436
+ return super ().items ()
1394
1437
1395
1438
1396
1439
class BeamMetadataDict (MetadataDict ):
0 commit comments