Skip to content

Commit 3ec275c

Browse files
tomvdwThe TensorFlow Datasets Authors
authored andcommitted
Lazy load namespace config
PiperOrigin-RevId: 627964344
1 parent c229c24 commit 3ec275c

File tree

6 files changed

+22
-14
lines changed

6 files changed

+22
-14
lines changed

tensorflow_datasets/core/community/config.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
from collections.abc import Mapping, Sequence
1919
import dataclasses
20+
import functools
2021
import threading
2122
from typing import Any
2223

@@ -114,6 +115,9 @@ def add_namespace(self, namespace: str, config: NamespaceConfig) -> None:
114115
self._config_per_namespace[namespace] = config
115116

116117

117-
community_config = NamespaceRegistry(
118-
config_path=utils.tfds_path('community-datasets.toml')
119-
)
118+
COMMUNITY_CONFIG_PATH = 'community-datasets.toml'
119+
120+
121+
@functools.lru_cache(maxsize=1)
122+
def get_community_config() -> NamespaceRegistry:
123+
return NamespaceRegistry(config_path=utils.tfds_path(COMMUNITY_CONFIG_PATH))

tensorflow_datasets/core/community/config_test.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818

1919
def test_community_config():
20-
assert "huggingface" in config_lib.community_config.config_per_namespace
21-
assert "kubric" in config_lib.community_config.config_per_namespace
22-
assert "robotics" in config_lib.community_config.config_per_namespace
20+
community_config = config_lib.get_community_config()
21+
assert "huggingface" in community_config.config_per_namespace
22+
assert "kubric" in community_config.config_per_namespace
23+
assert "robotics" in community_config.config_per_namespace

tensorflow_datasets/core/community/registry.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from collections.abc import Iterable, Mapping, Sequence
1919
import concurrent.futures
2020
import difflib
21+
import functools
2122
import multiprocessing
2223
import os
2324
from typing import Any, Type
@@ -308,4 +309,6 @@ def get_builder_root_dirs(
308309
return result
309310

310311

311-
community_register = DatasetRegistry(config_lib.community_config)
312+
@functools.lru_cache(maxsize=1)
313+
def community_register() -> DatasetRegistry:
314+
return DatasetRegistry(config_lib.get_community_config())

tensorflow_datasets/core/community/registry_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ def test_load_register_for_path_mixed():
153153

154154

155155
def test_community_register():
156-
assert 'huggingface' in registry_lib.community_register.list_namespaces()
156+
assert 'huggingface' in registry_lib.community_register().list_namespaces()
157157

158158

159159
def _write_dummy_config(content: str, tmp_path: str) -> epath.Path:

tensorflow_datasets/core/load.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def list_builders(
7878
datasets = registered.list_imported_builders()
7979
if with_community_datasets:
8080
if visibility.DatasetType.COMMUNITY_PUBLIC.is_available():
81-
datasets += community.community_register.list_builders()
81+
datasets += community.community_register().list_builders()
8282
return datasets
8383

8484

@@ -112,7 +112,7 @@ def builder_cls(name: str) -> Type[dataset_builder.DatasetBuilder]:
112112
if ds_name.namespace:
113113
# `namespace:dataset` are loaded from the community register
114114
if visibility.DatasetType.COMMUNITY_PUBLIC.is_available():
115-
return community.community_register.builder_cls(ds_name)
115+
return community.community_register().builder_cls(ds_name)
116116
else:
117117
raise ValueError(
118118
f'Cannot load {ds_name} when community datasets are disabled'
@@ -191,9 +191,9 @@ def get_dataset_repr() -> str:
191191
name=name.name, **builder_kwargs)
192192
if (
193193
visibility.DatasetType.COMMUNITY_PUBLIC.is_available()
194-
and community.community_register.has_namespace(name.namespace)
194+
and community.community_register().has_namespace(name.namespace)
195195
):
196-
return community.community_register.builder(name=name, **builder_kwargs)
196+
return community.community_register().builder(name=name, **builder_kwargs)
197197

198198
# First check whether we can find the corresponding dataset builder code
199199
try:

tensorflow_datasets/core/load_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,10 @@ def test_load_hf_dataset():
4545
])
4646
def test_community_public_load():
4747
with mock.patch(
48-
'tensorflow_datasets.core.community.community_register.list_builders',
48+
'tensorflow_datasets.core.community.registry.DatasetRegistry.list_builders',
4949
return_value=['ns:ds'],
5050
), mock.patch(
51-
'tensorflow_datasets.core.community.community_register.builder_cls',
51+
'tensorflow_datasets.core.community.registry.DatasetRegistry.builder_cls',
5252
return_value=testing.DummyDataset,
5353
), mock.patch(
5454
'tensorflow_datasets.core.registered.list_imported_builders',

0 commit comments

Comments
 (0)