Skip to content
Closed
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -526,6 +526,7 @@ Conversion

Strings
^^^^^^^
- Bug in :meth:`Series.str.split` would not treat ``pat`` as regex when ``regex=None`` for series having ``pd.ArrowDtype(pa.string())`` dtype (:issue:`58321`)
- Bug in :meth:`Series.value_counts` would not respect ``sort=False`` for series having ``string`` dtype (:issue:`55224`)
-

Expand Down
3 changes: 3 additions & 0 deletions pandas/core/strings/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -920,6 +920,9 @@ def split(
)
if is_re(pat):
regex = True
elif isinstance(pat, str) and regex is None:
# regex is None so link to old behavior #43563
regex = len(pat) != 1
result = self._data.array._str_split(pat, n, expand, regex)
if self._data.dtype == "category":
dtype = self._data.dtype.categories.dtype
Expand Down
21 changes: 21 additions & 0 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -2296,6 +2296,27 @@ def test_str_split_pat_none(method):
tm.assert_series_equal(result, expected)


def test_str_split_regex_explicit():
# GH 58321
# adapted from tests/strings/test_split_partition.py
values = pd.Series("xxxjpgzzz.jpg", dtype=ArrowDtype(pa.string()))

# explicit regex = False split
result = values.str.split(r"\.jpg", regex=False)
exp = pd.Series(ArrowExtensionArray(pa.array([["xxxjpgzzz.jpg"]])))
tm.assert_series_equal(result, exp)

# non explicit regex split, pattern length == 1
result = values.str.split(r".")
exp = pd.Series(ArrowExtensionArray(pa.array([["xxxjpgzzz", "jpg"]])))
tm.assert_series_equal(result, exp)

# non explicit regex split, pattern length != 1
result = values.str.split(r".jpg")
exp = pd.Series(ArrowExtensionArray(pa.array([["xx", "zzz", ""]])))
tm.assert_series_equal(result, exp)


def test_str_split():
# GH 52401
ser = pd.Series(["a1cbcb", "a2cbcb", None], dtype=ArrowDtype(pa.string()))
Expand Down