From 7b43a938f8e1b1f8597bf9e63911335cd31dc0a0 Mon Sep 17 00:00:00 2001
From: pan-vlados <pan.vlados.w@gmail.com>
Date: Wed, 18 Dec 2024 04:17:41 +0300
Subject: [PATCH 1/8] GH1074 Add type hint Series[list[str]] for
 Series.str.split with expand=False

---
 pandas-stubs/_typing.pyi      | 6 ++++--
 pandas-stubs/core/series.pyi  | 6 +++---
 pandas-stubs/core/strings.pyi | 4 ++++
 tests/test_frame.py           | 6 +++---
 tests/test_series.py          | 3 +++
 5 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi
index c4ee25c4b..56b83e176 100644
--- a/pandas-stubs/_typing.pyi
+++ b/pandas-stubs/_typing.pyi
@@ -547,7 +547,8 @@ S1 = TypeVar(
     | Period
     | Interval
     | CategoricalDtype
-    | BaseOffset,
+    | BaseOffset
+    | list[str],
 )
 
 S2 = TypeVar(
@@ -566,7 +567,8 @@ S2 = TypeVar(
     | Period
     | Interval
     | CategoricalDtype
-    | BaseOffset,
+    | BaseOffset
+    | list[str],
 )
 
 IndexingInt: TypeAlias = (
diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi
index 2907cec00..f2bb6d938 100644
--- a/pandas-stubs/core/series.pyi
+++ b/pandas-stubs/core/series.pyi
@@ -242,15 +242,15 @@ class Series(IndexOpsMixin[S1], NDFrame):
         copy: bool = ...,
     ) -> Series[float]: ...
     @overload
-    def __new__(  # type: ignore[overload-overlap]
+    def __new__(
         cls,
-        data: Sequence[Never],
+        data: Sequence[str],
         index: Axes | None = ...,
         *,
         dtype: Dtype = ...,
         name: Hashable = ...,
         copy: bool = ...,
-    ) -> Series[Any]: ...
+    ) -> Series[str]: ...
     @overload
     def __new__(
         cls,
diff --git a/pandas-stubs/core/strings.pyi b/pandas-stubs/core/strings.pyi
index 7e0dc880a..522504b4e 100644
--- a/pandas-stubs/core/strings.pyi
+++ b/pandas-stubs/core/strings.pyi
@@ -65,6 +65,10 @@ class StringMethods(NoNewAttributesMixin, Generic[T, _TS, _TM]):
         self, pat: str = ..., *, n: int = ..., expand: Literal[True], regex: bool = ...
     ) -> _TS: ...
     @overload
+    def split(
+        self, pat: str = ..., *, n: int = ..., expand: Literal[False], regex: bool = ...
+    ) -> Series[list[str]]: ...
+    @overload
     def split(
         self, pat: str = ..., *, n: int = ..., expand: bool = ..., regex: bool = ...
     ) -> T: ...
diff --git a/tests/test_frame.py b/tests/test_frame.py
index d8cf99f84..8810a06b9 100644
--- a/tests/test_frame.py
+++ b/tests/test_frame.py
@@ -3767,9 +3767,9 @@ class MyDict(TypedDict):
 
 
 def test_series_empty_dtype() -> None:
-    """Test for the creation of a Series from an empty list GH571 to map to a Series[Any]."""
+    """Test for the creation of a Series from an empty list GH571 to map to a Series[str]."""
     new_tab: Sequence[Never] = []  # need to be typehinted to please mypy
-    check(assert_type(pd.Series(new_tab), "pd.Series[Any]"), pd.Series)
-    check(assert_type(pd.Series([]), "pd.Series[Any]"), pd.Series)
+    check(assert_type(pd.Series(new_tab), "pd.Series[str]"), pd.Series)
+    check(assert_type(pd.Series([]), "pd.Series[str]"), pd.Series)
     # ensure that an empty string does not get matched to Sequence[Never]
     check(assert_type(pd.Series(""), "pd.Series[str]"), pd.Series)
diff --git a/tests/test_series.py b/tests/test_series.py
index f35fd20aa..ac4d78b2e 100644
--- a/tests/test_series.py
+++ b/tests/test_series.py
@@ -1556,6 +1556,9 @@ def test_string_accessors():
     check(assert_type(s.str.split("a"), pd.Series), pd.Series)
     # GH 194
     check(assert_type(s.str.split("a", expand=True), pd.DataFrame), pd.DataFrame)
+    check(
+        assert_type(s.str.split("a", expand=False), "pd.Series[list[str]]"), pd.Series
+    )
     check(assert_type(s.str.startswith("a"), "pd.Series[bool]"), pd.Series, np.bool_)
     check(
         assert_type(s.str.startswith(("a", "b")), "pd.Series[bool]"),

From bb3e78f8afea41670f6c2445f1343657c904fbf5 Mon Sep 17 00:00:00 2001
From: pan-vlados <pan.vlados.w@gmail.com>
Date: Wed, 18 Dec 2024 21:21:37 +0300
Subject: [PATCH 2/8] Updates:

    - fix Index.str.split method return wrong result;
    - add test for Index.str.split method with expand=False;
    - return changes performed in pull request #1029.
---
 pandas-stubs/core/indexes/base.pyi |  4 +++-
 pandas-stubs/core/series.pyi       | 24 +++++++++++++++++++++++-
 pandas-stubs/core/strings.pyi      |  7 +++++--
 tests/test_frame.py                |  6 +++---
 tests/test_indexes.py              |  3 +++
 5 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/pandas-stubs/core/indexes/base.pyi b/pandas-stubs/core/indexes/base.pyi
index 8f44bc5e3..763aae027 100644
--- a/pandas-stubs/core/indexes/base.pyi
+++ b/pandas-stubs/core/indexes/base.pyi
@@ -261,7 +261,9 @@ class Index(IndexOpsMixin[S1]):
         **kwargs,
     ) -> Self: ...
     @property
-    def str(self) -> StringMethods[Self, MultiIndex, np_ndarray_bool]: ...
+    def str(
+        self,
+    ) -> StringMethods[Self, MultiIndex, np_ndarray_bool, Index[list[str]]]: ...
     def is_(self, other) -> bool: ...
     def __len__(self) -> int: ...
     def __array__(self, dtype=...) -> np.ndarray: ...
diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi
index f2bb6d938..b91d3842b 100644
--- a/pandas-stubs/core/series.pyi
+++ b/pandas-stubs/core/series.pyi
@@ -242,6 +242,26 @@ class Series(IndexOpsMixin[S1], NDFrame):
         copy: bool = ...,
     ) -> Series[float]: ...
     @overload
+    def __new__(  # type: ignore[overload-overlap]
+        cls,
+        data: Sequence[Never],
+        index: Axes | None = ...,
+        *,
+        dtype: Dtype = ...,
+        name: Hashable = ...,
+        copy: bool = ...,
+    ) -> Series[Any]: ...
+    @overload
+    def __new__(
+        cls,
+        data: Sequence[list[str]],
+        index: Axes | None = ...,
+        *,
+        dtype: Dtype = ...,
+        name: Hashable = ...,
+        copy: bool = ...,
+    ) -> Series[list[str]]: ...
+    @overload
     def __new__(
         cls,
         data: Sequence[str],
@@ -1199,7 +1219,9 @@ class Series(IndexOpsMixin[S1], NDFrame):
     ) -> Series[S1]: ...
     def to_period(self, freq: _str | None = ..., copy: _bool = ...) -> DataFrame: ...
     @property
-    def str(self) -> StringMethods[Series, DataFrame, Series[bool]]: ...
+    def str(
+        self,
+    ) -> StringMethods[Series, DataFrame, Series[bool], Series[list[str]]]: ...
     @property
     def dt(self) -> CombinedDatetimelikeProperties: ...
     @property
diff --git a/pandas-stubs/core/strings.pyi b/pandas-stubs/core/strings.pyi
index 522504b4e..cefe321bb 100644
--- a/pandas-stubs/core/strings.pyi
+++ b/pandas-stubs/core/strings.pyi
@@ -15,6 +15,7 @@ import numpy as np
 import pandas as pd
 from pandas import (
     DataFrame,
+    Index,
     MultiIndex,
     Series,
 )
@@ -28,10 +29,12 @@ from pandas._typing import (
 
 # The _TS type is what is used for the result of str.split with expand=True
 _TS = TypeVar("_TS", DataFrame, MultiIndex)
+# The _TS2 type is what is used for the result of str.split with expand=False
+_TS2 = TypeVar("_TS2", Series[list[str]], Index[list[str]])
 # The _TM type is what is used for the result of str.match
 _TM = TypeVar("_TM", Series[bool], np_ndarray_bool)
 
-class StringMethods(NoNewAttributesMixin, Generic[T, _TS, _TM]):
+class StringMethods(NoNewAttributesMixin, Generic[T, _TS, _TM, _TS2]):
     def __init__(self, data: T) -> None: ...
     def __getitem__(self, key: slice | int) -> T: ...
     def __iter__(self) -> T: ...
@@ -67,7 +70,7 @@ class StringMethods(NoNewAttributesMixin, Generic[T, _TS, _TM]):
     @overload
     def split(
         self, pat: str = ..., *, n: int = ..., expand: Literal[False], regex: bool = ...
-    ) -> Series[list[str]]: ...
+    ) -> _TS2: ...
     @overload
     def split(
         self, pat: str = ..., *, n: int = ..., expand: bool = ..., regex: bool = ...
diff --git a/tests/test_frame.py b/tests/test_frame.py
index 8810a06b9..d8cf99f84 100644
--- a/tests/test_frame.py
+++ b/tests/test_frame.py
@@ -3767,9 +3767,9 @@ class MyDict(TypedDict):
 
 
 def test_series_empty_dtype() -> None:
-    """Test for the creation of a Series from an empty list GH571 to map to a Series[str]."""
+    """Test for the creation of a Series from an empty list GH571 to map to a Series[Any]."""
     new_tab: Sequence[Never] = []  # need to be typehinted to please mypy
-    check(assert_type(pd.Series(new_tab), "pd.Series[str]"), pd.Series)
-    check(assert_type(pd.Series([]), "pd.Series[str]"), pd.Series)
+    check(assert_type(pd.Series(new_tab), "pd.Series[Any]"), pd.Series)
+    check(assert_type(pd.Series([]), "pd.Series[Any]"), pd.Series)
     # ensure that an empty string does not get matched to Sequence[Never]
     check(assert_type(pd.Series(""), "pd.Series[str]"), pd.Series)
diff --git a/tests/test_indexes.py b/tests/test_indexes.py
index aab49c405..fc4a7efb0 100644
--- a/tests/test_indexes.py
+++ b/tests/test_indexes.py
@@ -113,6 +113,9 @@ def test_str_split() -> None:
     ind = pd.Index(["a-b", "c-d"])
     check(assert_type(ind.str.split("-"), "pd.Index[str]"), pd.Index)
     check(assert_type(ind.str.split("-", expand=True), pd.MultiIndex), pd.MultiIndex)
+    check(
+        assert_type(ind.str.split("-", expand=False), "pd.Index[list[str]]"), pd.Index
+    )
 
 
 def test_str_match() -> None:

From f77583cd44440c16f199659f963069eddf73ac66 Mon Sep 17 00:00:00 2001
From: Vladislav <pan.vlados.w@gmail.com>
Date: Wed, 18 Dec 2024 22:22:20 +0300
Subject: [PATCH 3/8] Update tests/test_indexes.py

Co-authored-by: Irv Lustig <irv@princeton.com>
---
 tests/test_indexes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_indexes.py b/tests/test_indexes.py
index fc4a7efb0..b4140ab7f 100644
--- a/tests/test_indexes.py
+++ b/tests/test_indexes.py
@@ -114,7 +114,7 @@ def test_str_split() -> None:
     check(assert_type(ind.str.split("-"), "pd.Index[str]"), pd.Index)
     check(assert_type(ind.str.split("-", expand=True), pd.MultiIndex), pd.MultiIndex)
     check(
-        assert_type(ind.str.split("-", expand=False), "pd.Index[list[str]]"), pd.Index
+        assert_type(ind.str.split("-", expand=False), "pd.Index[list[str]]"), pd.Index, list
     )
 
 

From d93390d9bec37279612973513a3f035b93c66a08 Mon Sep 17 00:00:00 2001
From: Vladislav <pan.vlados.w@gmail.com>
Date: Wed, 18 Dec 2024 22:22:33 +0300
Subject: [PATCH 4/8] Update tests/test_series.py

Co-authored-by: Irv Lustig <irv@princeton.com>
---
 tests/test_series.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_series.py b/tests/test_series.py
index ac4d78b2e..df23d5333 100644
--- a/tests/test_series.py
+++ b/tests/test_series.py
@@ -1553,7 +1553,7 @@ def test_string_accessors():
     check(assert_type(s.str.rstrip(), pd.Series), pd.Series)
     check(assert_type(s.str.slice(0, 4, 2), pd.Series), pd.Series)
     check(assert_type(s.str.slice_replace(0, 2, "XX"), pd.Series), pd.Series)
-    check(assert_type(s.str.split("a"), pd.Series), pd.Series)
+    check(assert_type(s.str.split("a"), "pd.Series[list[str]]"), pd.Series, list)
     # GH 194
     check(assert_type(s.str.split("a", expand=True), pd.DataFrame), pd.DataFrame)
     check(

From 577d375a217877419c8d3e26ef9b7036059d37f9 Mon Sep 17 00:00:00 2001
From: Vladislav <pan.vlados.w@gmail.com>
Date: Wed, 18 Dec 2024 22:22:45 +0300
Subject: [PATCH 5/8] Update tests/test_series.py

Co-authored-by: Irv Lustig <irv@princeton.com>
---
 tests/test_series.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_series.py b/tests/test_series.py
index df23d5333..d2bf9e348 100644
--- a/tests/test_series.py
+++ b/tests/test_series.py
@@ -1557,7 +1557,7 @@ def test_string_accessors():
     # GH 194
     check(assert_type(s.str.split("a", expand=True), pd.DataFrame), pd.DataFrame)
     check(
-        assert_type(s.str.split("a", expand=False), "pd.Series[list[str]]"), pd.Series
+        assert_type(s.str.split("a", expand=False), "pd.Series[list[str]]"), pd.Series, list
     )
     check(assert_type(s.str.startswith("a"), "pd.Series[bool]"), pd.Series, np.bool_)
     check(

From e46842a18222bdcd4c312975bebf69fb6e07fb74 Mon Sep 17 00:00:00 2001
From: pan-vlados <pan.vlados.w@gmail.com>
Date: Wed, 18 Dec 2024 22:26:33 +0300
Subject: [PATCH 6/8] Updates:

    - combine two str.split overloads and keep only _TS and _TS2;
    - fix test_indexes.py test for test_str_split().
---
 pandas-stubs/core/strings.pyi | 6 +-----
 tests/test_indexes.py         | 2 +-
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/pandas-stubs/core/strings.pyi b/pandas-stubs/core/strings.pyi
index cefe321bb..df208ef5e 100644
--- a/pandas-stubs/core/strings.pyi
+++ b/pandas-stubs/core/strings.pyi
@@ -69,13 +69,9 @@ class StringMethods(NoNewAttributesMixin, Generic[T, _TS, _TM, _TS2]):
     ) -> _TS: ...
     @overload
     def split(
-        self, pat: str = ..., *, n: int = ..., expand: Literal[False], regex: bool = ...
+        self, pat: str = ..., *, n: int = ..., expand: Literal[False] = ..., regex: bool = ...
     ) -> _TS2: ...
     @overload
-    def split(
-        self, pat: str = ..., *, n: int = ..., expand: bool = ..., regex: bool = ...
-    ) -> T: ...
-    @overload
     def rsplit(self, pat: str = ..., *, n: int = ..., expand: Literal[True]) -> _TS: ...
     @overload
     def rsplit(self, pat: str = ..., *, n: int = ..., expand: bool = ...) -> T: ...
diff --git a/tests/test_indexes.py b/tests/test_indexes.py
index b4140ab7f..1712e6c28 100644
--- a/tests/test_indexes.py
+++ b/tests/test_indexes.py
@@ -111,7 +111,7 @@ def test_difference_none() -> None:
 def test_str_split() -> None:
     # GH 194
     ind = pd.Index(["a-b", "c-d"])
-    check(assert_type(ind.str.split("-"), "pd.Index[str]"), pd.Index)
+    check(assert_type(ind.str.split("-"), "pd.Index[list[str]]"), pd.Index)
     check(assert_type(ind.str.split("-", expand=True), pd.MultiIndex), pd.MultiIndex)
     check(
         assert_type(ind.str.split("-", expand=False), "pd.Index[list[str]]"), pd.Index, list

From 7ba4a4571a05a278cb63913b8307650908d3eded Mon Sep 17 00:00:00 2001
From: pan-vlados <pan.vlados.w@gmail.com>
Date: Wed, 18 Dec 2024 22:32:14 +0300
Subject: [PATCH 7/8] pre-commit fixes

---
 pandas-stubs/core/strings.pyi | 7 ++++++-
 tests/test_indexes.py         | 4 +++-
 tests/test_series.py          | 4 +++-
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/pandas-stubs/core/strings.pyi b/pandas-stubs/core/strings.pyi
index df208ef5e..4ea794a5d 100644
--- a/pandas-stubs/core/strings.pyi
+++ b/pandas-stubs/core/strings.pyi
@@ -69,7 +69,12 @@ class StringMethods(NoNewAttributesMixin, Generic[T, _TS, _TM, _TS2]):
     ) -> _TS: ...
     @overload
     def split(
-        self, pat: str = ..., *, n: int = ..., expand: Literal[False] = ..., regex: bool = ...
+        self,
+        pat: str = ...,
+        *,
+        n: int = ...,
+        expand: Literal[False] = ...,
+        regex: bool = ...,
     ) -> _TS2: ...
     @overload
     def rsplit(self, pat: str = ..., *, n: int = ..., expand: Literal[True]) -> _TS: ...
diff --git a/tests/test_indexes.py b/tests/test_indexes.py
index 1712e6c28..a70c45228 100644
--- a/tests/test_indexes.py
+++ b/tests/test_indexes.py
@@ -114,7 +114,9 @@ def test_str_split() -> None:
     check(assert_type(ind.str.split("-"), "pd.Index[list[str]]"), pd.Index)
     check(assert_type(ind.str.split("-", expand=True), pd.MultiIndex), pd.MultiIndex)
     check(
-        assert_type(ind.str.split("-", expand=False), "pd.Index[list[str]]"), pd.Index, list
+        assert_type(ind.str.split("-", expand=False), "pd.Index[list[str]]"),
+        pd.Index,
+        list,
     )
 
 
diff --git a/tests/test_series.py b/tests/test_series.py
index d2bf9e348..41dbaadfd 100644
--- a/tests/test_series.py
+++ b/tests/test_series.py
@@ -1557,7 +1557,9 @@ def test_string_accessors():
     # GH 194
     check(assert_type(s.str.split("a", expand=True), pd.DataFrame), pd.DataFrame)
     check(
-        assert_type(s.str.split("a", expand=False), "pd.Series[list[str]]"), pd.Series, list
+        assert_type(s.str.split("a", expand=False), "pd.Series[list[str]]"),
+        pd.Series,
+        list,
     )
     check(assert_type(s.str.startswith("a"), "pd.Series[bool]"), pd.Series, np.bool_)
     check(

From 667cef321f338603eb1f77e073919eecf973f7c0 Mon Sep 17 00:00:00 2001
From: pan-vlados <pan.vlados.w@gmail.com>
Date: Wed, 18 Dec 2024 23:00:55 +0300
Subject: [PATCH 8/8] Add type hints and tests for str.rsplit() for
 expand=False

---
 pandas-stubs/core/strings.pyi |  4 +++-
 tests/test_indexes.py         | 14 +++++++++++++-
 tests/test_series.py          |  7 ++++++-
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/pandas-stubs/core/strings.pyi b/pandas-stubs/core/strings.pyi
index 4ea794a5d..b952ced0d 100644
--- a/pandas-stubs/core/strings.pyi
+++ b/pandas-stubs/core/strings.pyi
@@ -79,7 +79,9 @@ class StringMethods(NoNewAttributesMixin, Generic[T, _TS, _TM, _TS2]):
     @overload
     def rsplit(self, pat: str = ..., *, n: int = ..., expand: Literal[True]) -> _TS: ...
     @overload
-    def rsplit(self, pat: str = ..., *, n: int = ..., expand: bool = ...) -> T: ...
+    def rsplit(
+        self, pat: str = ..., *, n: int = ..., expand: Literal[False] = ...
+    ) -> _TS2: ...
     @overload
     def partition(self, sep: str = ...) -> pd.DataFrame: ...
     @overload
diff --git a/tests/test_indexes.py b/tests/test_indexes.py
index a70c45228..468908ad5 100644
--- a/tests/test_indexes.py
+++ b/tests/test_indexes.py
@@ -111,7 +111,7 @@ def test_difference_none() -> None:
 def test_str_split() -> None:
     # GH 194
     ind = pd.Index(["a-b", "c-d"])
-    check(assert_type(ind.str.split("-"), "pd.Index[list[str]]"), pd.Index)
+    check(assert_type(ind.str.split("-"), "pd.Index[list[str]]"), pd.Index, list)
     check(assert_type(ind.str.split("-", expand=True), pd.MultiIndex), pd.MultiIndex)
     check(
         assert_type(ind.str.split("-", expand=False), "pd.Index[list[str]]"),
@@ -120,6 +120,18 @@ def test_str_split() -> None:
     )
 
 
+def test_str_rsplit() -> None:
+    # GH 1074
+    ind = pd.Index(["a-b", "c-d"])
+    check(assert_type(ind.str.rsplit("-"), "pd.Index[list[str]]"), pd.Index, list)
+    check(assert_type(ind.str.rsplit("-", expand=True), pd.MultiIndex), pd.MultiIndex)
+    check(
+        assert_type(ind.str.rsplit("-", expand=False), "pd.Index[list[str]]"),
+        pd.Index,
+        list,
+    )
+
+
 def test_str_match() -> None:
     i = pd.Index(
         ["applep", "bananap", "Cherryp", "DATEp", "eGGpLANTp", "123p", "23.45p"]
diff --git a/tests/test_series.py b/tests/test_series.py
index 41dbaadfd..1aca73c50 100644
--- a/tests/test_series.py
+++ b/tests/test_series.py
@@ -1548,8 +1548,13 @@ def test_string_accessors():
     check(assert_type(s.str.rindex("p"), pd.Series), pd.Series)
     check(assert_type(s.str.rjust(80), pd.Series), pd.Series)
     check(assert_type(s.str.rpartition("p"), pd.DataFrame), pd.DataFrame)
-    check(assert_type(s.str.rsplit("a"), pd.Series), pd.Series)
+    check(assert_type(s.str.rsplit("a"), "pd.Series[list[str]]"), pd.Series, list)
     check(assert_type(s.str.rsplit("a", expand=True), pd.DataFrame), pd.DataFrame)
+    check(
+        assert_type(s.str.rsplit("a", expand=False), "pd.Series[list[str]]"),
+        pd.Series,
+        list,
+    )
     check(assert_type(s.str.rstrip(), pd.Series), pd.Series)
     check(assert_type(s.str.slice(0, 4, 2), pd.Series), pd.Series)
     check(assert_type(s.str.slice_replace(0, 2, "XX"), pd.Series), pd.Series)