From a946f6f35d84a84467fa592c2a6f4bf83cd96ec7 Mon Sep 17 00:00:00 2001 From: cmp0xff Date: Sun, 1 Sep 2024 22:49:53 +0200 Subject: [PATCH 1/4] fix(typing): #983 return type of StringMethods.match --- pandas-stubs/core/strings.pyi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas-stubs/core/strings.pyi b/pandas-stubs/core/strings.pyi index a21074dad..a1a61bb2e 100644 --- a/pandas-stubs/core/strings.pyi +++ b/pandas-stubs/core/strings.pyi @@ -100,7 +100,7 @@ class StringMethods(NoNewAttributesMixin, Generic[T, _TS]): ) -> Series[bool]: ... def match( self, pat: str, case: bool = ..., flags: int = ..., na: Any = ... - ) -> T: ... + ) -> Series[bool]: ... def replace( self, pat: str, From 2eafd54c85c0136b05b3277d25734ef1e0545b9f Mon Sep 17 00:00:00 2001 From: cmp0xff Date: Sun, 1 Sep 2024 23:07:38 +0200 Subject: [PATCH 2/4] feat(string): #983 tests for the fix --- tests/test_indexes.py | 7 +++++++ tests/test_series.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/test_indexes.py b/tests/test_indexes.py index 85edc3b57..6777736d9 100644 --- a/tests/test_indexes.py +++ b/tests/test_indexes.py @@ -113,6 +113,13 @@ def test_str_split() -> None: check(assert_type(ind.str.split("-", expand=True), pd.MultiIndex), pd.MultiIndex) +def test_str_match() -> None: + i = pd.Index( + ["applep", "bananap", "Cherryp", "DATEp", "eGGpLANTp", "123p", "23.45p"] + ) + check(assert_type(i.str.match("pp"), npt.NDArray[np.bool_]), np.ndarray, np.bool_) + + def test_index_rename() -> None: ind = pd.Index([1, 2, 3], name="foo") ind2 = ind.rename("goo") diff --git a/tests/test_series.py b/tests/test_series.py index e68028cc4..bcac374ed 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -1481,7 +1481,7 @@ def test_string_accessors(): check(assert_type(s.str.ljust(80), pd.Series), pd.Series) check(assert_type(s.str.lower(), pd.Series), pd.Series) check(assert_type(s.str.lstrip("a"), pd.Series), pd.Series) - check(assert_type(s.str.match("pp"), pd.Series), pd.Series) + check(assert_type(s.str.match("pp"), "pd.Series[bool]"), pd.Series, np.bool_) check(assert_type(s.str.normalize("NFD"), pd.Series), pd.Series) check(assert_type(s.str.pad(80, "right"), pd.Series), pd.Series) check(assert_type(s.str.partition("p"), pd.DataFrame), pd.DataFrame) From 9c58445ced9c799c4c020b8e22541bdfeb2da0d4 Mon Sep 17 00:00:00 2001 From: cmp0xff Date: Wed, 4 Sep 2024 10:04:43 +0200 Subject: [PATCH 3/4] feat: #983 new TypeVar, following https://github.com/pandas-dev/pandas-stubs/pull/990#issuecomment-2327478740 --- pandas-stubs/core/indexes/base.pyi | 3 ++- pandas-stubs/core/series.pyi | 2 +- pandas-stubs/core/strings.pyi | 7 +++++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/pandas-stubs/core/indexes/base.pyi b/pandas-stubs/core/indexes/base.pyi index b8dc94448..db921a446 100644 --- a/pandas-stubs/core/indexes/base.pyi +++ b/pandas-stubs/core/indexes/base.pyi @@ -18,6 +18,7 @@ from typing import ( ) import numpy as np +from numpy import typing as npt from pandas import ( DataFrame, DatetimeIndex, @@ -260,7 +261,7 @@ class Index(IndexOpsMixin[S1]): **kwargs, ) -> Self: ... @property - def str(self) -> StringMethods[Self, MultiIndex]: ... + def str(self) -> StringMethods[Self, MultiIndex, npt.NDArray[np.bool_]]: ... def is_(self, other) -> bool: ... def __len__(self) -> int: ... def __array__(self, dtype=...) -> np.ndarray: ... diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index 209237746..e12fa2cc7 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -1163,7 +1163,7 @@ class Series(IndexOpsMixin[S1], NDFrame): ) -> Series[S1]: ... def to_period(self, freq: _str | None = ..., copy: _bool = ...) -> DataFrame: ... @property - def str(self) -> StringMethods[Series, DataFrame]: ... + def str(self) -> StringMethods[Series, DataFrame, Series[bool]]: ... @property def dt(self) -> CombinedDatetimelikeProperties: ... @property diff --git a/pandas-stubs/core/strings.pyi b/pandas-stubs/core/strings.pyi index a1a61bb2e..09e8b1a65 100644 --- a/pandas-stubs/core/strings.pyi +++ b/pandas-stubs/core/strings.pyi @@ -12,6 +12,7 @@ from typing import ( ) import numpy as np +from numpy import typing as npt import pandas as pd from pandas import ( DataFrame, @@ -27,8 +28,10 @@ from pandas._typing import ( # The _TS type is what is used for the result of str.split with expand=True _TS = TypeVar("_TS", DataFrame, MultiIndex) +# The _TM type is what is used for the result of str.match +_TM = TypeVar("_TM", Series[bool], npt.NDArray[np.bool_]) -class StringMethods(NoNewAttributesMixin, Generic[T, _TS]): +class StringMethods(NoNewAttributesMixin, Generic[T, _TS, _TM]): def __init__(self, data: T) -> None: ... def __getitem__(self, key: slice | int) -> T: ... def __iter__(self) -> T: ... @@ -100,7 +103,7 @@ class StringMethods(NoNewAttributesMixin, Generic[T, _TS]): ) -> Series[bool]: ... def match( self, pat: str, case: bool = ..., flags: int = ..., na: Any = ... - ) -> Series[bool]: ... + ) -> _TM: ... def replace( self, pat: str, From 6410ee67da1e78282fb9c4cd1174122aac4e921c Mon Sep 17 00:00:00 2001 From: cmp0xff Date: Wed, 4 Sep 2024 18:02:34 +0200 Subject: [PATCH 4/4] fix(comment): #983 use `np_ndarray_bool` from `pandas._typing` in stubs @Dr-Irv https://github.com/pandas-dev/pandas-stubs/pull/990#pullrequestreview-2280575336 --- pandas-stubs/core/indexes/base.pyi | 3 +-- pandas-stubs/core/strings.pyi | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas-stubs/core/indexes/base.pyi b/pandas-stubs/core/indexes/base.pyi index db921a446..297e94057 100644 --- a/pandas-stubs/core/indexes/base.pyi +++ b/pandas-stubs/core/indexes/base.pyi @@ -18,7 +18,6 @@ from typing import ( ) import numpy as np -from numpy import typing as npt from pandas import ( DataFrame, DatetimeIndex, @@ -261,7 +260,7 @@ class Index(IndexOpsMixin[S1]): **kwargs, ) -> Self: ... @property - def str(self) -> StringMethods[Self, MultiIndex, npt.NDArray[np.bool_]]: ... + def str(self) -> StringMethods[Self, MultiIndex, np_ndarray_bool]: ... def is_(self, other) -> bool: ... def __len__(self) -> int: ... def __array__(self, dtype=...) -> np.ndarray: ... diff --git a/pandas-stubs/core/strings.pyi b/pandas-stubs/core/strings.pyi index 09e8b1a65..a3596aa5c 100644 --- a/pandas-stubs/core/strings.pyi +++ b/pandas-stubs/core/strings.pyi @@ -12,7 +12,6 @@ from typing import ( ) import numpy as np -from numpy import typing as npt import pandas as pd from pandas import ( DataFrame, @@ -24,12 +23,13 @@ from pandas.core.base import NoNewAttributesMixin from pandas._typing import ( JoinHow, T, + np_ndarray_bool, ) # The _TS type is what is used for the result of str.split with expand=True _TS = TypeVar("_TS", DataFrame, MultiIndex) # The _TM type is what is used for the result of str.match -_TM = TypeVar("_TM", Series[bool], npt.NDArray[np.bool_]) +_TM = TypeVar("_TM", Series[bool], np_ndarray_bool) class StringMethods(NoNewAttributesMixin, Generic[T, _TS, _TM]): def __init__(self, data: T) -> None: ...