Skip to content

Commit 7247953

Browse files
author
The TensorFlow Datasets Authors
committed
Speed up ReadOnlyBuilder.all_builder_dirs by using concurrent futures.
PiperOrigin-RevId: 648615587
1 parent fb880d8 commit 7247953

File tree

1 file changed

+10
-3
lines changed

1 file changed

+10
-3
lines changed

tensorflow_datasets/core/read_only_builder.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from __future__ import annotations
1919

2020
from collections.abc import Sequence
21+
import concurrent.futures
2122
import functools
2223
import os
2324
import typing
@@ -349,9 +350,15 @@ def _find_builder_dir(name: str, **builder_kwargs: Any) -> str | None:
349350
version_str=str(version) if version else None,
350351
config_name=config,
351352
)
352-
for current_data_dir in all_data_dirs:
353-
if builder_dir := find_builder_fn(data_dir=current_data_dir):
354-
all_builder_dirs.add(builder_dir)
353+
if len(all_data_dirs) <= 1:
354+
for current_data_dir in all_data_dirs:
355+
if builder_dir := find_builder_fn(data_dir=current_data_dir):
356+
all_builder_dirs.add(builder_dir)
357+
else:
358+
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
359+
for builder_dir in executor.map(find_builder_fn, all_data_dirs):
360+
if builder_dir:
361+
all_builder_dirs.add(builder_dir)
355362

356363
if not all_builder_dirs:
357364
all_dirs_str = '\n\t- '.join([''] + [str(dir) for dir in all_data_dirs])

0 commit comments

Comments
 (0)