Skip to content

Commit bb3e78f

Browse files
committed
Updates:
- fix Index.str.split method return wrong result; - add test for Index.str.split method with expand=False; - return changes performed in pull request pandas-dev#1029.
1 parent 7b43a93 commit bb3e78f

File tree

5 files changed

+37
-7
lines changed

5 files changed

+37
-7
lines changed

pandas-stubs/core/indexes/base.pyi

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,9 @@ class Index(IndexOpsMixin[S1]):
261261
**kwargs,
262262
) -> Self: ...
263263
@property
264-
def str(self) -> StringMethods[Self, MultiIndex, np_ndarray_bool]: ...
264+
def str(
265+
self,
266+
) -> StringMethods[Self, MultiIndex, np_ndarray_bool, Index[list[str]]]: ...
265267
def is_(self, other) -> bool: ...
266268
def __len__(self) -> int: ...
267269
def __array__(self, dtype=...) -> np.ndarray: ...

pandas-stubs/core/series.pyi

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,26 @@ class Series(IndexOpsMixin[S1], NDFrame):
242242
copy: bool = ...,
243243
) -> Series[float]: ...
244244
@overload
245+
def __new__( # type: ignore[overload-overlap]
246+
cls,
247+
data: Sequence[Never],
248+
index: Axes | None = ...,
249+
*,
250+
dtype: Dtype = ...,
251+
name: Hashable = ...,
252+
copy: bool = ...,
253+
) -> Series[Any]: ...
254+
@overload
255+
def __new__(
256+
cls,
257+
data: Sequence[list[str]],
258+
index: Axes | None = ...,
259+
*,
260+
dtype: Dtype = ...,
261+
name: Hashable = ...,
262+
copy: bool = ...,
263+
) -> Series[list[str]]: ...
264+
@overload
245265
def __new__(
246266
cls,
247267
data: Sequence[str],
@@ -1199,7 +1219,9 @@ class Series(IndexOpsMixin[S1], NDFrame):
11991219
) -> Series[S1]: ...
12001220
def to_period(self, freq: _str | None = ..., copy: _bool = ...) -> DataFrame: ...
12011221
@property
1202-
def str(self) -> StringMethods[Series, DataFrame, Series[bool]]: ...
1222+
def str(
1223+
self,
1224+
) -> StringMethods[Series, DataFrame, Series[bool], Series[list[str]]]: ...
12031225
@property
12041226
def dt(self) -> CombinedDatetimelikeProperties: ...
12051227
@property

pandas-stubs/core/strings.pyi

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import numpy as np
1515
import pandas as pd
1616
from pandas import (
1717
DataFrame,
18+
Index,
1819
MultiIndex,
1920
Series,
2021
)
@@ -28,10 +29,12 @@ from pandas._typing import (
2829

2930
# The _TS type is what is used for the result of str.split with expand=True
3031
_TS = TypeVar("_TS", DataFrame, MultiIndex)
32+
# The _TS2 type is what is used for the result of str.split with expand=False
33+
_TS2 = TypeVar("_TS2", Series[list[str]], Index[list[str]])
3134
# The _TM type is what is used for the result of str.match
3235
_TM = TypeVar("_TM", Series[bool], np_ndarray_bool)
3336

34-
class StringMethods(NoNewAttributesMixin, Generic[T, _TS, _TM]):
37+
class StringMethods(NoNewAttributesMixin, Generic[T, _TS, _TM, _TS2]):
3538
def __init__(self, data: T) -> None: ...
3639
def __getitem__(self, key: slice | int) -> T: ...
3740
def __iter__(self) -> T: ...
@@ -67,7 +70,7 @@ class StringMethods(NoNewAttributesMixin, Generic[T, _TS, _TM]):
6770
@overload
6871
def split(
6972
self, pat: str = ..., *, n: int = ..., expand: Literal[False], regex: bool = ...
70-
) -> Series[list[str]]: ...
73+
) -> _TS2: ...
7174
@overload
7275
def split(
7376
self, pat: str = ..., *, n: int = ..., expand: bool = ..., regex: bool = ...

tests/test_frame.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3767,9 +3767,9 @@ class MyDict(TypedDict):
37673767

37683768

37693769
def test_series_empty_dtype() -> None:
3770-
"""Test for the creation of a Series from an empty list GH571 to map to a Series[str]."""
3770+
"""Test for the creation of a Series from an empty list GH571 to map to a Series[Any]."""
37713771
new_tab: Sequence[Never] = [] # need to be typehinted to please mypy
3772-
check(assert_type(pd.Series(new_tab), "pd.Series[str]"), pd.Series)
3773-
check(assert_type(pd.Series([]), "pd.Series[str]"), pd.Series)
3772+
check(assert_type(pd.Series(new_tab), "pd.Series[Any]"), pd.Series)
3773+
check(assert_type(pd.Series([]), "pd.Series[Any]"), pd.Series)
37743774
# ensure that an empty string does not get matched to Sequence[Never]
37753775
check(assert_type(pd.Series(""), "pd.Series[str]"), pd.Series)

tests/test_indexes.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,9 @@ def test_str_split() -> None:
113113
ind = pd.Index(["a-b", "c-d"])
114114
check(assert_type(ind.str.split("-"), "pd.Index[str]"), pd.Index)
115115
check(assert_type(ind.str.split("-", expand=True), pd.MultiIndex), pd.MultiIndex)
116+
check(
117+
assert_type(ind.str.split("-", expand=False), "pd.Index[list[str]]"), pd.Index
118+
)
116119

117120

118121
def test_str_match() -> None:

0 commit comments

Comments
 (0)