Fix pytype errors.

marcenacp · The TensorFlow Datasets Authors · commit d7c97ea5129f · 2024-03-25T08:52:39.000-07:00
PiperOrigin-RevId: 618854330
diff --git a/tensorflow_datasets/core/decode/partial_decode.py b/tensorflow_datasets/core/decode/partial_decode.py
@@ -118,8 +118,9 @@ def _normalize_feature_dict(
     inner_features = {
         k: v for k, v in expected_feature.items() if v is not False  # pylint: disable=g-bool-id-comparison
     }
+    feature = typing.cast(features_lib.FeaturesDict, feature)
     inner_features = {  # Extract the feature subset  # pylint: disable=g-complex-comprehension
-        k: _extract_feature_item(  # pytype: disable=wrong-arg-types  # always-use-return-annotations
+        k: _extract_feature_item(
             feature=feature,
             expected_key=k,
             expected_value=v,
@@ -153,18 +154,17 @@ def _extract_features(
   # Recurse into FeaturesDict, Sequence
   # Use `type` rather than `isinstance` to not recurse into inherited classes.
   if type(feature) == features_lib.FeaturesDict:  # pylint: disable=unidiomatic-typecheck
+    feature = typing.cast(features_lib.FeaturesDict, feature)
     expected_feature = typing.cast(features_lib.FeaturesDict, expected_feature)
-    return features_lib.FeaturesDict(
-        {  # Extract the feature subset  # pylint: disable=g-complex-comprehension
-            k: _extract_feature_item(  # pytype: disable=wrong-arg-types  # always-use-return-annotations
-                feature=feature,
-                expected_key=k,
-                expected_value=v,
-                fn=_extract_features,
-            )
-            for k, v in expected_feature.items()
-        }
-    )
+    return features_lib.FeaturesDict({  # Extract the feature subset  # pylint: disable=g-complex-comprehension
+        k: _extract_feature_item(
+            feature=feature,
+            expected_key=k,
+            expected_value=v,
+            fn=_extract_features,
+        )
+        for k, v in expected_feature.items()
+    })
   elif type(feature) == features_lib.Sequence:  # pylint: disable=unidiomatic-typecheck
     feature = typing.cast(features_lib.Sequence, feature)
     expected_feature = typing.cast(features_lib.Sequence, expected_feature)
diff --git a/tensorflow_datasets/core/load.py b/tensorflow_datasets/core/load.py
@@ -433,7 +433,7 @@ def load_all_datasets(
       `dict` of `dataset_names` mapping to a `dict` of {`split_name`:
       tf.data.Dataset} for each desired datasets.
     """
-    return self.load_datasets(  # pytype: disable=wrong-arg-types
+    return self.load_datasets(
         datasets=self.datasets.keys(), split=split, loader_kwargs=loader_kwargs
     )
 
diff --git a/tensorflow_datasets/datasets/kddcup99/kddcup99_dataset_builder.py b/tensorflow_datasets/datasets/kddcup99/kddcup99_dataset_builder.py
@@ -245,5 +245,5 @@ def _generate_examples(self, gz_path):
           row['root_shell'] = bool_utils.parse_bool(row['root_shell'])
           row['is_hot_login'] = bool_utils.parse_bool(row['is_hot_login'])
           row['is_guest_login'] = bool_utils.parse_bool(row['is_guest_login'])
-          row['label'] = row['label'].rstrip('.')  # pytype: disable=attribute-error
+          row['label'] = str(row['label']).rstrip('.')
           yield index, row
diff --git a/tensorflow_datasets/text/gsm8k/gsm8k.py b/tensorflow_datasets/text/gsm8k/gsm8k.py
@@ -68,9 +68,9 @@ def _info(self) -> tfds.core.DatasetInfo:
   def _split_generators(self, dl_manager: tfds.download.DownloadManager):
     """Returns SplitGenerators."""
     extracted = dl_manager.download_and_extract(_URLS)
-    return {k: self._generate_examples(v) for k, v in extracted.items()}  # pytype: disable=wrong-arg-types  # always-use-return-annotations
+    return {k: self._generate_examples(v) for k, v in extracted.items()}
 
-  def _generate_examples(self, path: str):
+  def _generate_examples(self, path: epath.PathLike):
     """Yields examples."""
     with epath.Path(path).open() as f:
       for i, line in enumerate(f):
diff --git a/tensorflow_datasets/video/tao/tao.py b/tensorflow_datasets/video/tao/tao.py
@@ -18,9 +18,10 @@
 from __future__ import annotations
 
 import collections
+from collections.abc import Mapping
 import json
 import os
-from typing import Any, Dict, Optional, Tuple
+from typing import Any
 
 from etils import epath
 import numpy as np
@@ -52,12 +53,12 @@
 }
 """
 
-NestedDict = Dict[str, Any]
+NestedDict = Mapping[str, Any]
 
 
 def _build_annotations_index(
     annotations: NestedDict,
-) -> Tuple[NestedDict, NestedDict, NestedDict, NestedDict]:
+) -> tuple[NestedDict, NestedDict, NestedDict, NestedDict]:
   """Builds several dictionaries to aid in looking up annotations."""
   vids = {x['id']: x for x in annotations['videos']}
   images = {x['id']: x for x in annotations['images']}
@@ -72,7 +73,7 @@ def _build_annotations_index(
   return vids, ann_to_images, track_to_anns, vid_to_tracks
 
 
-def _merge_categories_map(annotations: NestedDict) -> Dict[str, str]:
+def _merge_categories_map(annotations: NestedDict) -> dict[str, str]:
   """Some categories should be renamed into others.
 
   This code segment is based on the TAO provided preprocessing API.
@@ -91,7 +92,9 @@ def _merge_categories_map(annotations: NestedDict) -> Dict[str, str]:
   return merge_map
 
 
-def _maybe_prepare_manual_data(dl_manager: tfds.download.DownloadManager):
+def _maybe_prepare_manual_data(
+    dl_manager: tfds.download.DownloadManager,
+) -> tuple[epath.Path | None, epath.Path | None]:
   """Return paths to the manually downloaded data if it is available."""
 
   # The file has a different name each time it is downloaded.
@@ -115,7 +118,7 @@ def _maybe_prepare_manual_data(dl_manager: tfds.download.DownloadManager):
   return dl_manager.extract(files)
 
 
-def _get_category_id_map(annotations_root) -> Dict[str, int]:
+def _get_category_id_map(annotations_root) -> dict[str, int]:
   """Gets a map from the TAO category id to a tfds category index.
 
   The tfds category index is the index which a category appears in the
@@ -150,7 +153,7 @@ def _get_category_id_map(annotations_root) -> Dict[str, int]:
 
 
 def _preprocess_annotations(
-    annotations_file: str, id_map: Dict[int, int]
+    annotations_file: str, id_map: dict[str, int]
 ) -> NestedDict:
   """Preprocesses the data to group together some category labels."""
   with epath.Path(annotations_file).open('r') as f:
@@ -226,8 +229,8 @@ class TaoConfig(tfds.core.BuilderConfig):
   def __init__(
       self,
       *,
-      height: Optional[int] = None,
-      width: Optional[int] = None,
+      height: int | None = None,
+      width: int | None = None,
       **kwargs,
   ):
     """The parameters specifying how the dataset will be processed.
@@ -391,11 +394,15 @@ def _create_metadata(
     return metadata
 
   def _generate_examples(
-      self, data_path, manual_path, annotations_path, id_map
+      self,
+      data_path: epath.PathLike,
+      manual_path: epath.Path | None,
+      annotations_path: epath.Path,
+      id_map: dict[str, int],
   ):
     """Yields examples."""
     beam = tfds.core.lazy_imports.apache_beam
-    annotations = _preprocess_annotations(annotations_path, id_map)  # pytype: disable=wrong-arg-types  # always-use-return-annotations
+    annotations = _preprocess_annotations(os.fspath(annotations_path), id_map)
     outs = _build_annotations_index(annotations)
     vids, ann_to_images, track_to_anns, vid_to_tracks = outs
 

Original file line number	Diff line number	Diff line change
`@@ -433,7 +433,7 @@ def load_all_datasets(`
`433`	`433`	`dict` of `dataset_names` mapping to a `dict` of {`split_name`:
`434`	`434`	`tf.data.Dataset} for each desired datasets.`
`435`	`435`	`"""`
`436`		`- return self.load_datasets( # pytype: disable=wrong-arg-types`
	`436`	`+ return self.load_datasets(`
`437`	`437`	`datasets=self.datasets.keys(), split=split, loader_kwargs=loader_kwargs`
`438`	`438`	`)`
`439`	`439`