From 957efd0f95ec413f7ddb5f2138677b207c7408c5 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 11 Sep 2025 11:55:54 +0100 Subject: [PATCH 1/4] TYP: improve type annotations and remove unnecessary type ignores - Add and refine type hints in core modules (algorithms, base, indexes, _version). - Remove redundant `# type: ignore[no-untyped-call]` comments in groupby/categorical.py. - Refactor variable annotations for clarity and consistency. - No functional changes; improves code quality and type safety. --- pandas/_version.py | 3 ++- pandas/core/algorithms.py | 13 +++++++++++++ pandas/core/base.py | 2 +- pandas/core/groupby/categorical.py | 4 ++-- pandas/core/indexes/base.py | 4 +++- pandas/util/_print_versions.py | 2 +- 6 files changed, 22 insertions(+), 6 deletions(-) diff --git a/pandas/_version.py b/pandas/_version.py index 54d5bb8c2dc91..1c64192cd76af 100644 --- a/pandas/_version.py +++ b/pandas/_version.py @@ -17,6 +17,7 @@ import re import subprocess import sys +from typing import Any def get_keywords(): @@ -640,7 +641,7 @@ def render(pieces, style): } -def get_versions(): +def get_versions() -> dict[str, Any]: """Get version information or return default if unable to do so.""" # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have # __file__, we can work backwards from there to the root. Some diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 8126bd072a8dc..4aa35eb815a3e 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -12,6 +12,7 @@ TYPE_CHECKING, Literal, cast, + overload, ) import warnings @@ -314,6 +315,18 @@ def _check_object_for_strings(values: np.ndarray) -> str: # --------------- # +@overload +def unique(values: np.ndarray) -> np.ndarray: ... +@overload +def unique(values: Index) -> Index: ... +@overload +def unique(values: Series) -> np.ndarray: ... +@overload +def unique(values: Categorical) -> Categorical: ... +@overload +def unique(values: ExtensionArray) -> ExtensionArray: ... + + def unique(values): """ Return unique values based on a hash table. diff --git a/pandas/core/base.py b/pandas/core/base.py index 6d2ab581470bc..bffe29ca52fba 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1100,7 +1100,7 @@ def unique(self): values = self._values if not isinstance(values, np.ndarray): # i.e. ExtensionArray - result = values.unique() + result: np.ndarray | ExtensionArray = values.unique() else: result = algorithms.unique1d(values) return result diff --git a/pandas/core/groupby/categorical.py b/pandas/core/groupby/categorical.py index 806f34975b8a7..0daa6d8f8101b 100644 --- a/pandas/core/groupby/categorical.py +++ b/pandas/core/groupby/categorical.py @@ -46,7 +46,7 @@ def recode_for_groupby(c: Categorical, sort: bool, observed: bool) -> Categorica # In cases with c.ordered, this is equivalent to # return c.remove_unused_categories(), c - take_codes = unique1d(c.codes[c.codes != -1]) # type: ignore[no-untyped-call] + take_codes = unique1d(c.codes[c.codes != -1]) if sort: take_codes = np.sort(take_codes) @@ -68,7 +68,7 @@ def recode_for_groupby(c: Categorical, sort: bool, observed: bool) -> Categorica # GH:46909: Re-ordering codes faster than using (set|add|reorder)_categories # GH 38140: exclude nan from indexer for categories - unique_notnan_codes = unique1d(c.codes[c.codes != -1]) # type: ignore[no-untyped-call] + unique_notnan_codes = unique1d(c.codes[c.codes != -1]) if sort: unique_notnan_codes = np.sort(unique_notnan_codes) if (num_cat := len(c.categories)) > len(unique_notnan_codes): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ddde24e72c65c..b02429bc4fbcb 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3301,7 +3301,9 @@ def _intersection(self, other: Index, sort: bool = False): if is_numeric_dtype(self.dtype): # This is faster, because Index.unique() checks for uniqueness # before calculating the unique values. - res = algos.unique1d(res_indexer) + res: Index | ExtensionArray | np.ndarray = algos.unique1d( + res_indexer + ) else: result = self.take(indexer) res = result.drop_duplicates() diff --git a/pandas/util/_print_versions.py b/pandas/util/_print_versions.py index efcb50bf4345c..00724c76b9ba8 100644 --- a/pandas/util/_print_versions.py +++ b/pandas/util/_print_versions.py @@ -32,7 +32,7 @@ def _get_commit_hash() -> str | None: except ImportError: from pandas._version import get_versions - versions = get_versions() # type: ignore[no-untyped-call] + versions = get_versions() return versions["full-revisionid"] From 647c1d45c9fa79b89fead1bbe97e763a445f931e Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 12 Sep 2025 11:38:46 +0100 Subject: [PATCH 2/4] remove Any for now while discussions ongoing --- pandas/_version.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/_version.py b/pandas/_version.py index 1c64192cd76af..80a13ec607130 100644 --- a/pandas/_version.py +++ b/pandas/_version.py @@ -17,7 +17,6 @@ import re import subprocess import sys -from typing import Any def get_keywords(): @@ -641,7 +640,7 @@ def render(pieces, style): } -def get_versions() -> dict[str, Any]: +def get_versions() -> dict: """Get version information or return default if unable to do so.""" # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have # __file__, we can work backwards from there to the root. Some From babc39606a952531c38a4bc1eeb34a28c05d4fec Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 12 Sep 2025 11:53:09 +0100 Subject: [PATCH 3/4] prefer ignore[assignment] over explicit variable annotations for now - precursor PR to change global config so these won't be necessary --- pandas/core/base.py | 4 ++-- pandas/core/indexes/base.py | 6 ++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index bffe29ca52fba..7d7e43808be5c 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1100,9 +1100,9 @@ def unique(self): values = self._values if not isinstance(values, np.ndarray): # i.e. ExtensionArray - result: np.ndarray | ExtensionArray = values.unique() + result = values.unique() else: - result = algorithms.unique1d(values) + result = algorithms.unique1d(values) # type: ignore[assignment] return result @final diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b02429bc4fbcb..bcf647acb98b1 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3301,12 +3301,10 @@ def _intersection(self, other: Index, sort: bool = False): if is_numeric_dtype(self.dtype): # This is faster, because Index.unique() checks for uniqueness # before calculating the unique values. - res: Index | ExtensionArray | np.ndarray = algos.unique1d( - res_indexer - ) + res = algos.unique1d(res_indexer) else: result = self.take(indexer) - res = result.drop_duplicates() + res = result.drop_duplicates() # type: ignore[assignment] return ensure_wrapped_if_datetimelike(res) res_values = self._intersection_via_get_indexer(other, sort=sort) From 3607e75be781c8471f3c620257a7212720646d1f Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 12 Sep 2025 12:35:22 +0100 Subject: [PATCH 4/4] combine the overloads --- pandas/core/algorithms.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 4aa35eb815a3e..d8953da5490cd 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -11,6 +11,7 @@ from typing import ( TYPE_CHECKING, Literal, + TypeVar, cast, overload, ) @@ -105,6 +106,8 @@ ExtensionArray, ) + T = TypeVar("T", bound=Index | Categorical | ExtensionArray) + # --------------- # # dtype access # @@ -316,15 +319,9 @@ def _check_object_for_strings(values: np.ndarray) -> str: @overload -def unique(values: np.ndarray) -> np.ndarray: ... -@overload -def unique(values: Index) -> Index: ... -@overload -def unique(values: Series) -> np.ndarray: ... -@overload -def unique(values: Categorical) -> Categorical: ... +def unique(values: T) -> T: ... @overload -def unique(values: ExtensionArray) -> ExtensionArray: ... +def unique(values: np.ndarray | Series) -> np.ndarray: ... def unique(values):