Skip to content

Commit 436ae85

Browse files
tomvdwThe TensorFlow Datasets Authors
authored andcommitted
Clean up dependencies a bit
PiperOrigin-RevId: 690631591
1 parent a811cac commit 436ae85

File tree

10 files changed

+37
-46
lines changed

10 files changed

+37
-46
lines changed

tensorflow_datasets/core/community/config.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
# Make sure that github paths are registered. This import makes sure that epath
2727
# understands paths that start with github://.
2828
from tensorflow_datasets.core import github_api # pylint: disable=unused-import
29-
from tensorflow_datasets.core import utils
29+
from tensorflow_datasets.core.utils import resource_utils
3030
import toml
3131

3232

@@ -120,4 +120,6 @@ def add_namespace(self, namespace: str, config: NamespaceConfig) -> None:
120120

121121
@functools.lru_cache(maxsize=1)
122122
def get_community_config() -> NamespaceRegistry:
123-
return NamespaceRegistry(config_path=utils.tfds_path(COMMUNITY_CONFIG_PATH))
123+
return NamespaceRegistry(
124+
config_path=resource_utils.tfds_path(COMMUNITY_CONFIG_PATH)
125+
)

tensorflow_datasets/core/download/downloader.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def _get_filename(response: Response) -> str:
125125
if filename:
126126
return filename
127127
# Otherwise, fallback on extracting the name from the url.
128-
return utils.basename_from_url(response.url)
128+
return _basename_from_url(response.url)
129129

130130

131131
class _Downloader:
@@ -351,3 +351,13 @@ def _assert_status(response: requests.Response) -> None:
351351
response.url, response.status_code
352352
)
353353
)
354+
355+
356+
def _basename_from_url(url: str) -> str:
357+
"""Returns file name of file at given url."""
358+
filename = urllib.parse.urlparse(url).path
359+
filename = os.path.basename(filename)
360+
# Replace `%2F` (html code for `/`) by `_`.
361+
# This is consistent with how Chrome rename downloaded files.
362+
filename = filename.replace('%2F', '_')
363+
return filename or 'unknown_name'

tensorflow_datasets/core/download/downloader_test.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,3 +240,17 @@ def test_filename_from_headers(
240240
headers = None
241241
resp = _FakeResponse('http://foo.bar/baz.zip', b'content', headers=headers)
242242
assert downloader._get_filename(resp), filename
243+
244+
245+
@pytest.mark.parametrize(
246+
['url', 'filename'],
247+
[
248+
(
249+
'http://test.com/appspot.com/tsvsWithoutLabels%2FAX.tsv?' # pylint: disable=implicit-str-concat
250+
'Id=firebase&Expires=2498860800',
251+
'tsvsWithoutLabels_AX.tsv', # `%2F` -> `_`
252+
),
253+
],
254+
)
255+
def test_basename_from_url(url: str, filename: str):
256+
assert downloader._basename_from_url(url) == filename

tensorflow_datasets/core/utils/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
from tensorflow_datasets.core.utils.image_utils import THUMBNAIL_SIZE
3131
from tensorflow_datasets.core.utils.py_utils import add_sys_path
3232
from tensorflow_datasets.core.utils.py_utils import atomic_write
33-
from tensorflow_datasets.core.utils.py_utils import basename_from_url
3433
from tensorflow_datasets.core.utils.py_utils import build_synchronize_decorator
3534
from tensorflow_datasets.core.utils.py_utils import classproperty
3635
from tensorflow_datasets.core.utils.py_utils import dedent

tensorflow_datasets/core/utils/file_utils.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
from tensorflow_datasets.core import naming
3636
from tensorflow_datasets.core.utils import docs
3737
from tensorflow_datasets.core.utils import py_utils
38-
from tensorflow_datasets.core.utils import read_config
3938
from tensorflow_datasets.core.utils import type_utils
4039
from tensorflow_datasets.core.utils import version as version_lib
4140

@@ -343,7 +342,7 @@ def _find_references_with_glob(
343342
namespace: Optional namespace to which the found datasets belong to.
344343
include_old_tfds_version: include datasets that have been generated with
345344
TFDS before 4.0.0.
346-
glob_suffixes: list of file suffixes to use to create the the glob for
345+
glob_suffixes: list of file suffixes to use to create the glob for
347346
interesting TFDS files. Defaults to json files.
348347
349348
Yields:
@@ -450,7 +449,7 @@ def list_dataset_variants(
450449
include_versions: whether to list what versions are available.
451450
include_old_tfds_version: include datasets that have been generated with
452451
TFDS before 4.0.0.
453-
glob_suffixes: list of file suffixes to use to create the the glob for
452+
glob_suffixes: list of file suffixes to use to create the glob for
454453
interesting TFDS files. Defaults to json files.
455454
456455
Yields:

tensorflow_datasets/core/utils/py_utils.py

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -36,15 +36,9 @@
3636

3737
from absl import logging as absl_logging
3838
from etils import epath
39-
from etils import epy
4039
from tensorflow_datasets.core import constants
4140
from tensorflow_datasets.core.utils import type_utils
4241

43-
with epy.lazy_imports():
44-
# pylint: disable=g-import-not-at-top
45-
from six.moves import urllib
46-
# pylint: enable=g-import-not-at-top
47-
4842

4943
Tree = type_utils.Tree
5044

@@ -500,16 +494,6 @@ def lock_decorated(*args, **kwargs):
500494
return lock_decorator
501495

502496

503-
def basename_from_url(url: str) -> str:
504-
"""Returns file name of file at given url."""
505-
filename = urllib.parse.urlparse(url).path
506-
filename = os.path.basename(filename)
507-
# Replace `%2F` (html code for `/`) by `_`.
508-
# This is consistent with how Chrome rename downloaded files.
509-
filename = filename.replace('%2F', '_')
510-
return filename or 'unknown_name'
511-
512-
513497
def list_info_files(dir_path: epath.PathLike) -> Sequence[str]:
514498
"""Returns name of info files within dir_path."""
515499
path = epath.Path(dir_path)

tensorflow_datasets/core/utils/py_utils_test.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
import tensorflow as tf
2222
from tensorflow_datasets import testing
2323
from tensorflow_datasets.core import constants
24-
from tensorflow_datasets.core import utils
2524
from tensorflow_datasets.core.utils import py_utils
2625

2726

@@ -336,20 +335,6 @@ def test_flatten_with_path():
336335
)
337336

338337

339-
@pytest.mark.parametrize(
340-
['url', 'filename'],
341-
[
342-
(
343-
'http://test.com/appspot.com/tsvsWithoutLabels%2FAX.tsv?' # pylint: disable=implicit-str-concat
344-
'Id=firebase&Expires=2498860800',
345-
'tsvsWithoutLabels_AX.tsv', # `%2F` -> `_`
346-
),
347-
],
348-
)
349-
def test_basename_from_url(url: str, filename: str):
350-
assert utils.basename_from_url(url) == filename
351-
352-
353338
def test_incomplete_file(tmp_path: pathlib.Path):
354339
tmp_path = epath.Path(tmp_path)
355340
filepath = tmp_path / 'test.txt'

tensorflow_datasets/core/utils/read_config.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
from __future__ import annotations
1919

2020
import dataclasses
21-
import enum
2221
from typing import Callable, Optional, Sequence, Union, cast
2322

2423
from tensorflow_datasets.core.utils import shard_utils

tensorflow_datasets/core/utils/shard_utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
import dataclasses
2727
import math
2828
import os
29-
from typing import Any, List
29+
from typing import Any
3030

3131
DEFAULT_MIN_SHARD_SIZE: int = 64 << 20 # 64 MiB
3232
DEFAULT_MAX_SHARD_SIZE: int = 1024 << 20 # 1 GiB
@@ -179,7 +179,7 @@ def replace(self, **kwargs: Any) -> FileInstruction:
179179
def split_file_instruction(
180180
file_instruction: FileInstruction,
181181
num_splits: int,
182-
) -> List[FileInstruction]:
182+
) -> list[FileInstruction]:
183183
"""Instructions for reading the given file instruction in several splits.
184184
185185
Note that this function may return fewer splits than `num_splits` in case the
@@ -215,7 +215,7 @@ def get_file_instructions(
215215
to: int,
216216
filenames: Sequence[str],
217217
shard_lengths: Sequence[int],
218-
) -> List[FileInstruction]:
218+
) -> list[FileInstruction]:
219219
"""Returns a list of files (+skip/take) to read [from_:to] items from shards.
220220
221221
Args:

tensorflow_datasets/core/utils/version.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import dataclasses
2121
import enum
2222
import re
23-
from typing import List, Tuple, Union
2423

2524
from etils import epath
2625

@@ -132,7 +131,7 @@ class Version:
132131

133132
def __init__(
134133
self,
135-
version: Union[Version, str],
134+
version: Version | str,
136135
experiments=None,
137136
tfds_version_to_prepare=None,
138137
):
@@ -242,7 +241,7 @@ def is_valid(cls, version: Version | str | None) -> bool:
242241

243242
def _str_to_version(
244243
version_str: str, allow_wildcard=False
245-
) -> Tuple[Union[int, str], Union[int, str], Union[int, str]]:
244+
) -> tuple[int | str, int | str, int | str]:
246245
"""Return the tuple (major, minor, patch) version extracted from the str."""
247246
if not isinstance(version_str, str):
248247
raise TypeError(
@@ -264,7 +263,7 @@ def _str_to_version(
264263
)
265264

266265

267-
def list_all_versions(root_dir: epath.PathLike) -> List[Version]:
266+
def list_all_versions(root_dir: epath.PathLike) -> list[Version]:
268267
"""Lists all dataset versions present on disk, sorted."""
269268
root_dir = epath.Path(root_dir)
270269
versions = []

0 commit comments

Comments
 (0)