Skip to content

Commit 8f430f9

Browse files
committed
REF: move implementations to mixin
1 parent f970e3f commit 8f430f9

File tree

2 files changed

+38
-37
lines changed

2 files changed

+38
-37
lines changed

pandas/core/arrays/_arrow_string_mixins.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
from functools import partial
4+
import re
45
from typing import (
56
TYPE_CHECKING,
67
Any,
@@ -159,6 +160,33 @@ def _str_slice_replace(
159160
stop = np.iinfo(np.int64).max
160161
return type(self)(pc.utf8_replace_slice(self._pa_array, start, stop, repl))
161162

163+
def _str_replace(
164+
self,
165+
pat: str | re.Pattern,
166+
repl: str | Callable,
167+
n: int = -1,
168+
case: bool = True,
169+
flags: int = 0,
170+
regex: bool = True,
171+
) -> Self:
172+
if isinstance(pat, re.Pattern) or callable(repl) or not case or flags:
173+
raise NotImplementedError(
174+
"replace is not supported with a re.Pattern, callable repl, "
175+
"case=False, or flags!=0"
176+
)
177+
178+
func = pc.replace_substring_regex if regex else pc.replace_substring
179+
# https://github.com/apache/arrow/issues/39149
180+
# GH 56404, unexpected behavior with negative max_replacements with pyarrow.
181+
pa_max_replacements = None if n < 0 else n
182+
result = func(
183+
self._pa_array,
184+
pattern=pat,
185+
replacement=repl,
186+
max_replacements=pa_max_replacements,
187+
)
188+
return type(self)(result)
189+
162190
def _str_capitalize(self) -> Self:
163191
return type(self)(pc.utf8_capitalize(self._pa_array))
164192

@@ -168,6 +196,16 @@ def _str_title(self) -> Self:
168196
def _str_swapcase(self) -> Self:
169197
return type(self)(pc.utf8_swapcase(self._pa_array))
170198

199+
def _str_removeprefix(self, prefix: str):
200+
if not pa_version_under13p0:
201+
starts_with = pc.starts_with(self._pa_array, pattern=prefix)
202+
removed = pc.utf8_slice_codeunits(self._pa_array, len(prefix))
203+
result = pc.if_else(starts_with, removed, self._pa_array)
204+
return type(self)(result)
205+
predicate = lambda val: val.removeprefix(prefix)
206+
result = self._apply_elementwise(predicate)
207+
return type(self)(pa.chunked_array(result))
208+
171209
def _str_removesuffix(self, suffix: str):
172210
ends_with = pc.ends_with(self._pa_array, pattern=suffix)
173211
removed = pc.utf8_slice_codeunits(self._pa_array, 0, stop=-len(suffix))

pandas/core/arrays/arrow/array.py

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -2323,33 +2323,6 @@ def _str_count(self, pat: str, flags: int = 0) -> Self:
23232323
raise NotImplementedError(f"count not implemented with {flags=}")
23242324
return type(self)(pc.count_substring_regex(self._pa_array, pat))
23252325

2326-
def _str_replace(
2327-
self,
2328-
pat: str | re.Pattern,
2329-
repl: str | Callable,
2330-
n: int = -1,
2331-
case: bool = True,
2332-
flags: int = 0,
2333-
regex: bool = True,
2334-
) -> Self:
2335-
if isinstance(pat, re.Pattern) or callable(repl) or not case or flags:
2336-
raise NotImplementedError(
2337-
"replace is not supported with a re.Pattern, callable repl, "
2338-
"case=False, or flags!=0"
2339-
)
2340-
2341-
func = pc.replace_substring_regex if regex else pc.replace_substring
2342-
# https://github.com/apache/arrow/issues/39149
2343-
# GH 56404, unexpected behavior with negative max_replacements with pyarrow.
2344-
pa_max_replacements = None if n < 0 else n
2345-
result = func(
2346-
self._pa_array,
2347-
pattern=pat,
2348-
replacement=repl,
2349-
max_replacements=pa_max_replacements,
2350-
)
2351-
return type(self)(result)
2352-
23532326
def _str_repeat(self, repeats: int | Sequence[int]) -> Self:
23542327
if not isinstance(repeats, int):
23552328
raise NotImplementedError(
@@ -2377,16 +2350,6 @@ def _str_rpartition(self, sep: str, expand: bool) -> Self:
23772350
result = self._apply_elementwise(predicate)
23782351
return type(self)(pa.chunked_array(result))
23792352

2380-
def _str_removeprefix(self, prefix: str):
2381-
if not pa_version_under13p0:
2382-
starts_with = pc.starts_with(self._pa_array, pattern=prefix)
2383-
removed = pc.utf8_slice_codeunits(self._pa_array, len(prefix))
2384-
result = pc.if_else(starts_with, removed, self._pa_array)
2385-
return type(self)(result)
2386-
predicate = lambda val: val.removeprefix(prefix)
2387-
result = self._apply_elementwise(predicate)
2388-
return type(self)(pa.chunked_array(result))
2389-
23902353
def _str_casefold(self) -> Self:
23912354
predicate = lambda val: val.casefold()
23922355
result = self._apply_elementwise(predicate)

0 commit comments

Comments
 (0)