Skip to content
Merged
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v2.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,9 @@ Conversion
Strings
^^^^^^^
- Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`59628`)
- Bug in ``ser.str.slice`` with negative ``step`` with :class:`ArrowDtype` giving incorrect results (:issue:`59710`)
- Bug in the ``center`` method on :class:`Series` and :class:`Index` object ``str`` accessors with pyarrow-backed dtype not matching the python behavior in corner cases with an odd number of fill characters (:issue:`54792`)

-

Interval
^^^^^^^^
Expand Down
32 changes: 23 additions & 9 deletions pandas/core/arrays/_arrow_string_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from pandas.compat import (
pa_version_under10p1,
pa_version_under11p0,
pa_version_under13p0,
pa_version_under17p0,
)
Expand All @@ -22,10 +23,7 @@
import pyarrow.compute as pc

if TYPE_CHECKING:
from collections.abc import (
Callable,
Sized,
)
from collections.abc import Callable

from pandas._typing import (
Scalar,
Expand All @@ -34,7 +32,7 @@


class ArrowStringArrayMixin:
_pa_array: Sized
_pa_array: pa.ChunkedArray

def __init__(self, *args, **kwargs) -> None:
raise NotImplementedError
Expand Down Expand Up @@ -96,13 +94,29 @@ def _str_get(self, i: int) -> Self:
selected = pc.utf8_slice_codeunits(
self._pa_array, start=start, stop=stop, step=step
)
null_value = pa.scalar(
None,
type=self._pa_array.type, # type: ignore[attr-defined]
)
null_value = pa.scalar(None, type=self._pa_array.type)
result = pc.if_else(not_out_of_bounds, selected, null_value)
return type(self)(result)

def _str_slice(
self, start: int | None = None, stop: int | None = None, step: int | None = None
) -> Self:
if pa_version_under11p0:
# GH#59724
result = self._apply_elementwise(lambda val: val[start:stop:step])
return type(self)(pa.chunked_array(result, type=self._pa_array.type))
if start is None:
if step is not None and step < 0:
# GH#59710
start = -1
else:
start = 0
if step is None:
step = 1
return type(self)(
pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
)

def _str_slice_replace(
self, start: int | None = None, stop: int | None = None, repl: str | None = None
) -> Self:
Expand Down
11 changes: 0 additions & 11 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -2393,17 +2393,6 @@ def _str_rpartition(self, sep: str, expand: bool) -> Self:
result = self._apply_elementwise(predicate)
return type(self)(pa.chunked_array(result))

def _str_slice(
self, start: int | None = None, stop: int | None = None, step: int | None = None
) -> Self:
if start is None:
start = 0
if step is None:
step = 1
return type(self)(
pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
)

def _str_len(self) -> Self:
return type(self)(pc.utf8_length(self._pa_array))

Expand Down
14 changes: 1 addition & 13 deletions pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,7 @@ def astype(self, dtype, copy: bool = True):
_str_startswith = ArrowStringArrayMixin._str_startswith
_str_endswith = ArrowStringArrayMixin._str_endswith
_str_pad = ArrowStringArrayMixin._str_pad
_str_slice = ArrowStringArrayMixin._str_slice

def _str_contains(
self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True
Expand Down Expand Up @@ -351,19 +352,6 @@ def _str_fullmatch(
pat = f"{pat}$"
return self._str_match(pat, case, flags, na)

def _str_slice(
self, start: int | None = None, stop: int | None = None, step: int | None = None
) -> Self:
if stop is None:
return super()._str_slice(start, stop, step)
if start is None:
start = 0
if step is None:
step = 1
return type(self)(
pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
)

def _str_len(self):
result = pc.utf8_length(self._pa_array)
return self._convert_int_result(result)
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -2036,6 +2036,7 @@ def test_str_join_string_type():
[None, 2, None, ["ab", None]],
[None, 2, 1, ["ab", None]],
[1, 3, 1, ["bc", None]],
(None, None, -1, ["dcba", None]),
],
)
def test_str_slice(start, stop, step, exp):
Expand Down