Skip to content

Commit 465d709

Browse files
tomvdwThe TensorFlow Datasets Authors
authored andcommitted
Add more lazy imports throughout the code
PiperOrigin-RevId: 638912770
1 parent c74827d commit 465d709

File tree

12 files changed

+104
-62
lines changed

12 files changed

+104
-62
lines changed

tensorflow_datasets/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@
3737
# pylint: enable=line-too-long
3838
# pylint: disable=g-import-not-at-top,g-bad-import-order,wrong-import-position,unused-import
3939

40+
from __future__ import annotations
41+
4042
from absl import logging
4143
from etils import epy as _epy
4244

tensorflow_datasets/core/dataset_info.py

Lines changed: 30 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -33,16 +33,18 @@
3333
from __future__ import annotations
3434

3535
import abc
36+
from collections.abc import Iterable
3637
import dataclasses
3738
import json
3839
import os
3940
import posixpath
4041
import tempfile
4142
import time
42-
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
43+
from typing import Any, Optional
4344

4445
from absl import logging
4546
from etils import epath
47+
from etils import epy
4648
from tensorflow_datasets.core import constants
4749
from tensorflow_datasets.core import file_adapters
4850
from tensorflow_datasets.core import lazy_imports_lib
@@ -52,16 +54,21 @@
5254
from tensorflow_datasets.core.features import feature as feature_lib
5355
from tensorflow_datasets.core.features import top_level_feature
5456
from tensorflow_datasets.core.proto import dataset_info_pb2
55-
from tensorflow_datasets.core.utils import file_utils
56-
from tensorflow_datasets.core.utils import gcs_utils
5757
from tensorflow_datasets.core.utils.lazy_imports_utils import apache_beam as beam
5858
from tensorflow_datasets.core.utils.lazy_imports_utils import tensorflow as tf
5959

60-
from google.protobuf import json_format
60+
with epy.lazy_imports():
61+
# pylint: disable=g-import-not-at-top
62+
from tensorflow_datasets.core.utils import file_utils
63+
from tensorflow_datasets.core.utils import gcs_utils
64+
65+
from google.protobuf import json_format
66+
# pylint: enable=g-import-not-at-top
67+
6168

6269
# TODO(b/109648354): Remove the "pytype: disable" comment.
63-
Nest = Union[Tuple["Nest", ...], Dict[str, "Nest"], str] # pytype: disable=not-supported-yet
64-
SupervisedKeysType = Union[Tuple[Nest, Nest], Tuple[Nest, Nest, Nest]]
70+
Nest = tuple["Nest", ...] | dict[str, "Nest"] | str # pytype: disable=not-supported-yet
71+
SupervisedKeysType = tuple[Nest, Nest] | tuple[Nest, Nest, Nest]
6572

6673

6774
def dataset_info_path(dataset_info_dir: epath.PathLike) -> epath.Path:
@@ -108,7 +115,7 @@ class DatasetIdentity:
108115
config_name: str | None = None
109116
config_description: str | None = None
110117
config_tags: list[str] | None = None
111-
release_notes: Dict[str, str] | None = None
118+
release_notes: dict[str, str] | None = None
112119

113120
@classmethod
114121
def from_builder(cls, builder) -> "DatasetIdentity":
@@ -176,16 +183,16 @@ def __init__(
176183
# LINT.IfChange(dataset_info_args)
177184
self,
178185
*,
179-
builder: Union[DatasetIdentity, Any],
180-
description: Optional[str] = None,
186+
builder: DatasetIdentity | Any,
187+
description: str | None = None,
181188
features: Optional[feature_lib.FeatureConnector] = None,
182189
supervised_keys: Optional[SupervisedKeysType] = None,
183190
disable_shuffling: bool = False,
184-
homepage: Optional[str] = None,
185-
citation: Optional[str] = None,
186-
metadata: Optional[Metadata] = None,
187-
license: Optional[str] = None, # pylint: disable=redefined-builtin
188-
redistribution_info: Optional[Dict[str, str]] = None,
191+
homepage: str | None = None,
192+
citation: str | None = None,
193+
metadata: Metadata | None = None,
194+
license: str | None = None, # pylint: disable=redefined-builtin
195+
redistribution_info: Optional[dict[str, str]] = None,
189196
split_dict: Optional[splits_lib.SplitDict] = None,
190197
# LINT.ThenChange(:setstate)
191198
):
@@ -347,7 +354,7 @@ def config_description(self) -> str | None:
347354
return self._identity.config_description
348355

349356
@property
350-
def config_tags(self) -> List[str] | None:
357+
def config_tags(self) -> list[str] | None:
351358
return self._identity.config_tags
352359

353360
@property
@@ -368,7 +375,7 @@ def version(self):
368375
return self._identity.version
369376

370377
@property
371-
def release_notes(self) -> Optional[Dict[str, str]]:
378+
def release_notes(self) -> dict[str, str] | None:
372379
return self._identity.release_notes
373380

374381
@property
@@ -412,7 +419,7 @@ def features(self):
412419
return self._features
413420

414421
@property
415-
def metadata(self) -> Optional[Metadata]:
422+
def metadata(self) -> Metadata | None:
416423
return self._metadata
417424

418425
@property
@@ -431,14 +438,14 @@ def module_name(self) -> str:
431438
return self._identity.module_name
432439

433440
@property
434-
def file_format(self) -> Optional[file_adapters.FileFormat]:
441+
def file_format(self) -> file_adapters.FileFormat | None:
435442
if not self.as_proto.file_format:
436443
return None
437444
return file_adapters.FileFormat(self.as_proto.file_format)
438445

439446
def set_file_format(
440447
self,
441-
file_format: Union[None, str, file_adapters.FileFormat],
448+
file_format: None | str | file_adapters.FileFormat,
442449
override: bool = False,
443450
) -> None:
444451
"""Internal function to define the file format.
@@ -716,8 +723,8 @@ def read_from_directory(self, dataset_info_dir: epath.PathLike) -> None:
716723

717724
def add_file_data_source_access(
718725
self,
719-
path: Union[epath.PathLike, Iterable[epath.PathLike]],
720-
url: Optional[str] = None,
726+
path: epath.PathLike | Iterable[epath.PathLike],
727+
url: str | None = None,
721728
) -> None:
722729
"""Records that the given query was used to generate this dataset.
723730
@@ -743,7 +750,7 @@ def add_file_data_source_access(
743750
def add_url_access(
744751
self,
745752
url: str,
746-
checksum: Optional[str] = None,
753+
checksum: str | None = None,
747754
) -> None:
748755
"""Records the URL used to generate this dataset."""
749756
self._info_proto.data_source_accesses.append(
@@ -768,7 +775,7 @@ def add_sql_data_source_access(
768775
def add_tfds_data_source_access(
769776
self,
770777
dataset_reference: naming.DatasetReference,
771-
url: Optional[str] = None,
778+
url: str | None = None,
772779
) -> None:
773780
"""Records that the given query was used to generate this dataset.
774781

tensorflow_datasets/core/dataset_metadata.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,20 @@
1515

1616
"""Logic related to reading datasets metadata from config files."""
1717

18+
from __future__ import annotations
19+
1820
import dataclasses
1921
import functools
2022

2123
from etils import epath
22-
from etils import etree
24+
from etils import epy
2325
from tensorflow_datasets.core import constants
24-
from tensorflow_datasets.core.utils import resource_utils
26+
27+
with epy.lazy_imports():
28+
# pylint: disable=g-import-not-at-top
29+
from etils import etree
30+
from tensorflow_datasets.core.utils import resource_utils
31+
# pylint: enable=g-import-not-at-top
2532

2633

2734
CITATIONS_FILENAME = "CITATIONS.bib"

tensorflow_datasets/core/download/download_manager.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,20 +29,21 @@
2929
from absl import logging
3030
from etils import epath
3131
from etils import epy
32-
import promise
33-
from tensorflow_datasets.core import utils
34-
from tensorflow_datasets.core.download import checksums
35-
from tensorflow_datasets.core.download import extractor
36-
from tensorflow_datasets.core.download import kaggle
37-
from tensorflow_datasets.core.download import resource as resource_lib
38-
from tensorflow_datasets.core.download import util
39-
from tensorflow_datasets.core.utils import shard_utils
40-
from tensorflow_datasets.core.utils import type_utils
4132
from tensorflow_datasets.core.utils.lazy_imports_utils import tree
4233

4334
with epy.lazy_imports():
4435
# pylint: disable=g-import-not-at-top
36+
import promise
37+
38+
from tensorflow_datasets.core import utils
39+
from tensorflow_datasets.core.download import checksums
4540
from tensorflow_datasets.core.download import downloader
41+
from tensorflow_datasets.core.download import extractor
42+
from tensorflow_datasets.core.download import kaggle
43+
from tensorflow_datasets.core.download import resource as resource_lib
44+
from tensorflow_datasets.core.download import util
45+
from tensorflow_datasets.core.utils import shard_utils
46+
from tensorflow_datasets.core.utils import type_utils
4647
# pylint: enable=g-import-not-at-top
4748

4849
# pylint: disable=logging-fstring-interpolation

tensorflow_datasets/core/logging/base_logger.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,24 @@
1515

1616
"""This module defines the methods a logger implementation should define."""
1717

18+
from __future__ import annotations
19+
1820
from typing import Any, Dict, Optional, Union
1921

20-
from etils import epath
21-
from tensorflow_datasets.core import decode
22-
from tensorflow_datasets.core import download as download_lib
23-
from tensorflow_datasets.core import file_adapters
24-
from tensorflow_datasets.core import splits as splits_lib
25-
from tensorflow_datasets.core.logging import call_metadata
26-
from tensorflow_datasets.core.utils import read_config as read_config_lib
27-
from tensorflow_datasets.core.utils import type_utils
22+
from etils import epy
23+
24+
with epy.lazy_imports():
25+
# pylint: disable=g-import-not-at-top
26+
from etils import epath
27+
from tensorflow_datasets.core import decode
28+
from tensorflow_datasets.core import download as download_lib
29+
from tensorflow_datasets.core import file_adapters
30+
from tensorflow_datasets.core import splits as splits_lib
31+
from tensorflow_datasets.core.logging import call_metadata
32+
from tensorflow_datasets.core.utils import read_config as read_config_lib
33+
from tensorflow_datasets.core.utils import type_utils
34+
# pylint: enable=g-import-not-at-top
35+
2836

2937
TreeDict = type_utils.TreeDict
3038

tensorflow_datasets/core/logging/call_metadata.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,15 @@
1515

1616
"""To associate metadata with TFDS calls."""
1717

18+
from __future__ import annotations
19+
1820
import enum
1921
import threading
2022
import time
21-
from typing import Dict, Optional, Tuple
23+
2224

2325
# Maps thread_id to "Session ID", if any.
24-
_THREAD_TO_SESSIONID: Dict[int, int] = {}
26+
_THREAD_TO_SESSIONID: dict[int, int] = {}
2527

2628
_NEXT_SESSION_ID = 1
2729
_NEXT_SESSION_ID_LOCK = threading.Lock()
@@ -33,7 +35,7 @@ class Status(enum.Enum):
3335
ERROR = 2
3436

3537

36-
def _get_session_id(thread_id: int) -> Tuple[int, bool]:
38+
def _get_session_id(thread_id: int) -> tuple[int, bool]:
3739
"""Returns (session_id, direct_call) tuple."""
3840
session_id = _THREAD_TO_SESSIONID.get(thread_id, None)
3941
if session_id:
@@ -55,8 +57,8 @@ class CallMetadata:
5557
"""
5658

5759
# The start and end times of the event (microseconds since Epoch).
58-
start_time_micros: Optional[int]
59-
end_time_micros: Optional[int]
60+
start_time_micros: int | None
61+
end_time_micros: int | None
6062

6163
# The status (success or error) of the call.
6264
status: Status

tensorflow_datasets/core/read_only_builder.py

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,27 +15,34 @@
1515

1616
"""Load Datasets without reading dataset generation code."""
1717

18+
from __future__ import annotations
19+
1820
import functools
1921
import os
2022
import typing
2123
from typing import Any, List, Optional, Type
2224

23-
from etils import epath
24-
from etils import etree
25-
from tensorflow_datasets.core import dataset_builder
26-
from tensorflow_datasets.core import dataset_info
27-
from tensorflow_datasets.core import logging as tfds_logging
28-
from tensorflow_datasets.core import naming
29-
from tensorflow_datasets.core import registered
30-
from tensorflow_datasets.core import splits as splits_lib
31-
from tensorflow_datasets.core import utils
32-
from tensorflow_datasets.core.features import feature as feature_lib
33-
from tensorflow_datasets.core.proto import dataset_info_pb2
34-
from tensorflow_datasets.core.utils import error_utils
35-
from tensorflow_datasets.core.utils import file_utils
36-
from tensorflow_datasets.core.utils import version as version_lib
25+
from etils import epy
3726
from tensorflow_datasets.core.utils.lazy_imports_utils import tensorflow as tf
3827

28+
with epy.lazy_imports():
29+
# pylint: disable=g-import-not-at-top
30+
from etils import epath
31+
from etils import etree
32+
from tensorflow_datasets.core import dataset_builder
33+
from tensorflow_datasets.core import dataset_info
34+
from tensorflow_datasets.core import logging as tfds_logging
35+
from tensorflow_datasets.core import naming
36+
from tensorflow_datasets.core import registered
37+
from tensorflow_datasets.core import splits as splits_lib
38+
from tensorflow_datasets.core import utils
39+
from tensorflow_datasets.core.features import feature as feature_lib
40+
from tensorflow_datasets.core.proto import dataset_info_pb2
41+
from tensorflow_datasets.core.utils import error_utils
42+
from tensorflow_datasets.core.utils import file_utils
43+
from tensorflow_datasets.core.utils import version as version_lib
44+
# pylint: enable=g-import-not-at-top
45+
3946

4047
class ReadOnlyBuilder(
4148
dataset_builder.FileReaderBuilder, skip_registration=True

tensorflow_datasets/core/utils/file_utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515

1616
"""Library of helper functions to handle dealing with files."""
1717

18+
from __future__ import annotations
19+
1820
import collections
1921
from collections.abc import Iterator, Sequence
2022
import functools

tensorflow_datasets/core/utils/py_utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515

1616
"""Some python utils function and classes."""
1717

18+
from __future__ import annotations
19+
1820
import base64
1921
from collections.abc import Iterator, Sequence
2022
import contextlib

tensorflow_datasets/core/utils/tqdm_utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515

1616
"""Wrapper around tqdm."""
1717

18+
from __future__ import annotations
19+
1820
import contextlib
1921
import os
2022

0 commit comments

Comments
 (0)