Clean up dependencies a bit

tomvdw · The TensorFlow Datasets Authors · commit 436ae854ee06 · 2024-10-28T09:10:28.000-07:00
PiperOrigin-RevId: 690631591
diff --git a/tensorflow_datasets/core/community/config.py b/tensorflow_datasets/core/community/config.py
@@ -26,7 +26,7 @@
 # Make sure that github paths are registered. This import makes sure that epath
 # understands paths that start with github://.
 from tensorflow_datasets.core import github_api  # pylint: disable=unused-import
-from tensorflow_datasets.core import utils
+from tensorflow_datasets.core.utils import resource_utils
 import toml
 
 
@@ -120,4 +120,6 @@ def add_namespace(self, namespace: str, config: NamespaceConfig) -> None:
 
 @functools.lru_cache(maxsize=1)
 def get_community_config() -> NamespaceRegistry:
-  return NamespaceRegistry(config_path=utils.tfds_path(COMMUNITY_CONFIG_PATH))
+  return NamespaceRegistry(
+      config_path=resource_utils.tfds_path(COMMUNITY_CONFIG_PATH)
+  )
diff --git a/tensorflow_datasets/core/download/downloader.py b/tensorflow_datasets/core/download/downloader.py
@@ -125,7 +125,7 @@ def _get_filename(response: Response) -> str:
     if filename:
       return filename
   # Otherwise, fallback on extracting the name from the url.
-  return utils.basename_from_url(response.url)
+  return _basename_from_url(response.url)
 
 
 class _Downloader:
@@ -351,3 +351,13 @@ def _assert_status(response: requests.Response) -> None:
             response.url, response.status_code
         )
     )
+
+
+def _basename_from_url(url: str) -> str:
+  """Returns file name of file at given url."""
+  filename = urllib.parse.urlparse(url).path
+  filename = os.path.basename(filename)
+  # Replace `%2F` (html code for `/`) by `_`.
+  # This is consistent with how Chrome rename downloaded files.
+  filename = filename.replace('%2F', '_')
+  return filename or 'unknown_name'
diff --git a/tensorflow_datasets/core/download/downloader_test.py b/tensorflow_datasets/core/download/downloader_test.py
@@ -240,3 +240,17 @@ def test_filename_from_headers(
     headers = None
   resp = _FakeResponse('http://foo.bar/baz.zip', b'content', headers=headers)
   assert downloader._get_filename(resp), filename
+
+
+@pytest.mark.parametrize(
+    ['url', 'filename'],
+    [
+        (
+            'http://test.com/appspot.com/tsvsWithoutLabels%2FAX.tsv?'  # pylint: disable=implicit-str-concat
+            'Id=firebase&Expires=2498860800',
+            'tsvsWithoutLabels_AX.tsv',  # `%2F` -> `_`
+        ),
+    ],
+)
+def test_basename_from_url(url: str, filename: str):
+  assert downloader._basename_from_url(url) == filename
diff --git a/tensorflow_datasets/core/utils/__init__.py b/tensorflow_datasets/core/utils/__init__.py
@@ -30,7 +30,6 @@
 from tensorflow_datasets.core.utils.image_utils import THUMBNAIL_SIZE
 from tensorflow_datasets.core.utils.py_utils import add_sys_path
 from tensorflow_datasets.core.utils.py_utils import atomic_write
-from tensorflow_datasets.core.utils.py_utils import basename_from_url
 from tensorflow_datasets.core.utils.py_utils import build_synchronize_decorator
 from tensorflow_datasets.core.utils.py_utils import classproperty
 from tensorflow_datasets.core.utils.py_utils import dedent
diff --git a/tensorflow_datasets/core/utils/file_utils.py b/tensorflow_datasets/core/utils/file_utils.py
@@ -35,7 +35,6 @@
 from tensorflow_datasets.core import naming
 from tensorflow_datasets.core.utils import docs
 from tensorflow_datasets.core.utils import py_utils
-from tensorflow_datasets.core.utils import read_config
 from tensorflow_datasets.core.utils import type_utils
 from tensorflow_datasets.core.utils import version as version_lib
 
@@ -343,7 +342,7 @@ def _find_references_with_glob(
     namespace: Optional namespace to which the found datasets belong to.
     include_old_tfds_version: include datasets that have been generated with
       TFDS before 4.0.0.
-    glob_suffixes: list of file suffixes to use to create the the glob for
+    glob_suffixes: list of file suffixes to use to create the glob for
       interesting TFDS files. Defaults to json files.
 
   Yields:
@@ -450,7 +449,7 @@ def list_dataset_variants(
     include_versions: whether to list what versions are available.
     include_old_tfds_version: include datasets that have been generated with
       TFDS before 4.0.0.
-    glob_suffixes: list of file suffixes to use to create the the glob for
+    glob_suffixes: list of file suffixes to use to create the glob for
       interesting TFDS files. Defaults to json files.
 
   Yields:
diff --git a/tensorflow_datasets/core/utils/py_utils.py b/tensorflow_datasets/core/utils/py_utils.py
@@ -36,15 +36,9 @@
 
 from absl import logging as absl_logging
 from etils import epath
-from etils import epy
 from tensorflow_datasets.core import constants
 from tensorflow_datasets.core.utils import type_utils
 
-with epy.lazy_imports():
-  # pylint: disable=g-import-not-at-top
-  from six.moves import urllib
-  # pylint: enable=g-import-not-at-top
-
 
 Tree = type_utils.Tree
 
@@ -500,16 +494,6 @@ def lock_decorated(*args, **kwargs):
   return lock_decorator
 
 
-def basename_from_url(url: str) -> str:
-  """Returns file name of file at given url."""
-  filename = urllib.parse.urlparse(url).path
-  filename = os.path.basename(filename)
-  # Replace `%2F` (html code for `/`) by `_`.
-  # This is consistent with how Chrome rename downloaded files.
-  filename = filename.replace('%2F', '_')
-  return filename or 'unknown_name'
-
-
 def list_info_files(dir_path: epath.PathLike) -> Sequence[str]:
   """Returns name of info files within dir_path."""
   path = epath.Path(dir_path)
diff --git a/tensorflow_datasets/core/utils/py_utils_test.py b/tensorflow_datasets/core/utils/py_utils_test.py
@@ -21,7 +21,6 @@
 import tensorflow as tf
 from tensorflow_datasets import testing
 from tensorflow_datasets.core import constants
-from tensorflow_datasets.core import utils
 from tensorflow_datasets.core.utils import py_utils
 
 
@@ -336,20 +335,6 @@ def test_flatten_with_path():
   )
 
 
-@pytest.mark.parametrize(
-    ['url', 'filename'],
-    [
-        (
-            'http://test.com/appspot.com/tsvsWithoutLabels%2FAX.tsv?'  # pylint: disable=implicit-str-concat
-            'Id=firebase&Expires=2498860800',
-            'tsvsWithoutLabels_AX.tsv',  # `%2F` -> `_`
-        ),
-    ],
-)
-def test_basename_from_url(url: str, filename: str):
-  assert utils.basename_from_url(url) == filename
-
-
 def test_incomplete_file(tmp_path: pathlib.Path):
   tmp_path = epath.Path(tmp_path)
   filepath = tmp_path / 'test.txt'
diff --git a/tensorflow_datasets/core/utils/read_config.py b/tensorflow_datasets/core/utils/read_config.py
@@ -18,7 +18,6 @@
 from __future__ import annotations
 
 import dataclasses
-import enum
 from typing import Callable, Optional, Sequence, Union, cast
 
 from tensorflow_datasets.core.utils import shard_utils
diff --git a/tensorflow_datasets/core/utils/shard_utils.py b/tensorflow_datasets/core/utils/shard_utils.py
@@ -26,7 +26,7 @@
 import dataclasses
 import math
 import os
-from typing import Any, List
+from typing import Any
 
 DEFAULT_MIN_SHARD_SIZE: int = 64 << 20  # 64 MiB
 DEFAULT_MAX_SHARD_SIZE: int = 1024 << 20  # 1 GiB
@@ -179,7 +179,7 @@ def replace(self, **kwargs: Any) -> FileInstruction:
 def split_file_instruction(
     file_instruction: FileInstruction,
     num_splits: int,
-) -> List[FileInstruction]:
+) -> list[FileInstruction]:
   """Instructions for reading the given file instruction in several splits.
 
   Note that this function may return fewer splits than `num_splits` in case the
@@ -215,7 +215,7 @@ def get_file_instructions(
     to: int,
     filenames: Sequence[str],
     shard_lengths: Sequence[int],
-) -> List[FileInstruction]:
+) -> list[FileInstruction]:
   """Returns a list of files (+skip/take) to read [from_:to] items from shards.
 
   Args:
diff --git a/tensorflow_datasets/core/utils/version.py b/tensorflow_datasets/core/utils/version.py
@@ -20,7 +20,6 @@
 import dataclasses
 import enum
 import re
-from typing import List, Tuple, Union
 
 from etils import epath
 
@@ -132,7 +131,7 @@ class Version:
 
   def __init__(
       self,
-      version: Union[Version, str],
+      version: Version | str,
       experiments=None,
       tfds_version_to_prepare=None,
   ):
@@ -242,7 +241,7 @@ def is_valid(cls, version: Version | str | None) -> bool:
 
 def _str_to_version(
     version_str: str, allow_wildcard=False
-) -> Tuple[Union[int, str], Union[int, str], Union[int, str]]:
+) -> tuple[int | str, int | str, int | str]:
   """Return the tuple (major, minor, patch) version extracted from the str."""
   if not isinstance(version_str, str):
     raise TypeError(
@@ -264,7 +263,7 @@ def _str_to_version(
   )
 
 
-def list_all_versions(root_dir: epath.PathLike) -> List[Version]:
+def list_all_versions(root_dir: epath.PathLike) -> list[Version]:
   """Lists all dataset versions present on disk, sorted."""
   root_dir = epath.Path(root_dir)
   versions = []