Skip to content

Commit 3d34b77

Browse files
committed
REF: move implementations to mixing class
1 parent ab0c761 commit 3d34b77

File tree

3 files changed

+56
-82
lines changed

3 files changed

+56
-82
lines changed

pandas/core/arrays/_arrow_string_mixins.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,37 @@ def _convert_int_result(self, result):
4242
# Convert an integer-dtype result to the appropriate result type
4343
raise NotImplementedError
4444

45+
def _str_len(self):
46+
result = pc.utf8_length(self._pa_array)
47+
return self._convert_int_result(result)
48+
49+
def _str_lower(self) -> Self:
50+
return type(self)(pc.utf8_lower(self._pa_array))
51+
52+
def _str_upper(self) -> Self:
53+
return type(self)(pc.utf8_upper(self._pa_array))
54+
55+
def _str_strip(self, to_strip=None) -> Self:
56+
if to_strip is None:
57+
result = pc.utf8_trim_whitespace(self._pa_array)
58+
else:
59+
result = pc.utf8_trim(self._pa_array, characters=to_strip)
60+
return type(self)(result)
61+
62+
def _str_lstrip(self, to_strip=None) -> Self:
63+
if to_strip is None:
64+
result = pc.utf8_ltrim_whitespace(self._pa_array)
65+
else:
66+
result = pc.utf8_ltrim(self._pa_array, characters=to_strip)
67+
return type(self)(result)
68+
69+
def _str_rstrip(self, to_strip=None) -> Self:
70+
if to_strip is None:
71+
result = pc.utf8_rtrim_whitespace(self._pa_array)
72+
else:
73+
result = pc.utf8_rtrim(self._pa_array, characters=to_strip)
74+
return type(self)(result)
75+
4576
def _str_pad(
4677
self,
4778
width: int,
@@ -190,3 +221,17 @@ def _str_istitle(self):
190221
def _str_isupper(self):
191222
result = pc.utf8_is_upper(self._pa_array)
192223
return self._convert_bool_result(result)
224+
225+
def _str_match(
226+
self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None
227+
):
228+
if not pat.startswith("^"):
229+
pat = f"^{pat}"
230+
return self._str_contains(pat, case, flags, na, regex=True)
231+
232+
def _str_fullmatch(
233+
self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None
234+
):
235+
if not pat.endswith("$") or pat.endswith("\\$"):
236+
pat = f"{pat}$"
237+
return self._str_match(pat, case, flags, na)

pandas/core/arrays/arrow/array.py

Lines changed: 1 addition & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1998,7 +1998,7 @@ def _rank(
19981998
"""
19991999
See Series.rank.__doc__.
20002000
"""
2001-
return type(self)(
2001+
return self._convert_int_result(
20022002
self._rank_calc(
20032003
axis=axis,
20042004
method=method,
@@ -2337,9 +2337,6 @@ def _str_contains(
23372337
result = result.fill_null(na)
23382338
return type(self)(result)
23392339

2340-
def _result_converter(self, result):
2341-
return type(self)(result)
2342-
23432340
def _str_replace(
23442341
self,
23452342
pat: str | re.Pattern,
@@ -2374,20 +2371,6 @@ def _str_repeat(self, repeats: int | Sequence[int]) -> Self:
23742371
)
23752372
return type(self)(pc.binary_repeat(self._pa_array, repeats))
23762373

2377-
def _str_match(
2378-
self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None
2379-
) -> Self:
2380-
if not pat.startswith("^"):
2381-
pat = f"^{pat}"
2382-
return self._str_contains(pat, case, flags, na, regex=True)
2383-
2384-
def _str_fullmatch(
2385-
self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None
2386-
) -> Self:
2387-
if not pat.endswith("$") or pat.endswith("\\$"):
2388-
pat = f"{pat}$"
2389-
return self._str_match(pat, case, flags, na)
2390-
23912374
def _str_find(self, sub: str, start: int = 0, end: int | None = None) -> Self:
23922375
if (start == 0 or start is None) and end is None:
23932376
result = pc.find_substring(self._pa_array, sub)
@@ -2442,36 +2425,6 @@ def _str_slice(
24422425
pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
24432426
)
24442427

2445-
def _str_len(self) -> Self:
2446-
return type(self)(pc.utf8_length(self._pa_array))
2447-
2448-
def _str_lower(self) -> Self:
2449-
return type(self)(pc.utf8_lower(self._pa_array))
2450-
2451-
def _str_upper(self) -> Self:
2452-
return type(self)(pc.utf8_upper(self._pa_array))
2453-
2454-
def _str_strip(self, to_strip=None) -> Self:
2455-
if to_strip is None:
2456-
result = pc.utf8_trim_whitespace(self._pa_array)
2457-
else:
2458-
result = pc.utf8_trim(self._pa_array, characters=to_strip)
2459-
return type(self)(result)
2460-
2461-
def _str_lstrip(self, to_strip=None) -> Self:
2462-
if to_strip is None:
2463-
result = pc.utf8_ltrim_whitespace(self._pa_array)
2464-
else:
2465-
result = pc.utf8_ltrim(self._pa_array, characters=to_strip)
2466-
return type(self)(result)
2467-
2468-
def _str_rstrip(self, to_strip=None) -> Self:
2469-
if to_strip is None:
2470-
result = pc.utf8_rtrim_whitespace(self._pa_array)
2471-
else:
2472-
result = pc.utf8_rtrim(self._pa_array, characters=to_strip)
2473-
return type(self)(result)
2474-
24752428
def _str_removeprefix(self, prefix: str):
24762429
if not pa_version_under13p0:
24772430
starts_with = pc.starts_with(self._pa_array, pattern=prefix)

pandas/core/arrays/string_arrow.py

Lines changed: 10 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@
5454

5555
from pandas._typing import (
5656
ArrayLike,
57-
AxisInt,
5857
Dtype,
5958
Self,
6059
npt,
@@ -294,19 +293,22 @@ def astype(self, dtype, copy: bool = True):
294293
_str_startswith = ArrowStringArrayMixin._str_startswith
295294
_str_endswith = ArrowStringArrayMixin._str_endswith
296295
_str_pad = ArrowStringArrayMixin._str_pad
297-
_str_match = ArrowExtensionArray._str_match
298-
_str_fullmatch = ArrowExtensionArray._str_fullmatch
299-
_str_lower = ArrowExtensionArray._str_lower
300-
_str_upper = ArrowExtensionArray._str_upper
301-
_str_strip = ArrowExtensionArray._str_strip
302-
_str_lstrip = ArrowExtensionArray._str_lstrip
303-
_str_rstrip = ArrowExtensionArray._str_rstrip
296+
_str_match = ArrowStringArrayMixin._str_match
297+
_str_fullmatch = ArrowStringArrayMixin._str_fullmatch
298+
_str_lower = ArrowStringArrayMixin._str_lower
299+
_str_upper = ArrowStringArrayMixin._str_upper
300+
_str_strip = ArrowStringArrayMixin._str_strip
301+
_str_lstrip = ArrowStringArrayMixin._str_lstrip
302+
_str_rstrip = ArrowStringArrayMixin._str_rstrip
304303
_str_removesuffix = ArrowStringArrayMixin._str_removesuffix
305304
_str_get = ArrowStringArrayMixin._str_get
306305
_str_capitalize = ArrowStringArrayMixin._str_capitalize
307306
_str_title = ArrowStringArrayMixin._str_title
308307
_str_swapcase = ArrowStringArrayMixin._str_swapcase
309308
_str_slice_replace = ArrowStringArrayMixin._str_slice_replace
309+
_str_len = ArrowStringArrayMixin._str_len
310+
311+
_rank = ArrowExtensionArray._rank
310312

311313
def _str_contains(
312314
self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True
@@ -362,10 +364,6 @@ def _str_slice(
362364
return super()._str_slice(start, stop, step)
363365
return ArrowExtensionArray._str_slice(self, start=start, stop=stop, step=step)
364366

365-
def _str_len(self):
366-
result = pc.utf8_length(self._pa_array)
367-
return self._convert_int_result(result)
368-
369367
def _str_removeprefix(self, prefix: str):
370368
if not pa_version_under13p0:
371369
return ArrowExtensionArray._str_removeprefix(self, prefix)
@@ -431,28 +429,6 @@ def _reduce(
431429
else:
432430
return result
433431

434-
def _rank(
435-
self,
436-
*,
437-
axis: AxisInt = 0,
438-
method: str = "average",
439-
na_option: str = "keep",
440-
ascending: bool = True,
441-
pct: bool = False,
442-
):
443-
"""
444-
See Series.rank.__doc__.
445-
"""
446-
return self._convert_int_result(
447-
self._rank_calc(
448-
axis=axis,
449-
method=method,
450-
na_option=na_option,
451-
ascending=ascending,
452-
pct=pct,
453-
)
454-
)
455-
456432
def value_counts(self, dropna: bool = True) -> Series:
457433
result = super().value_counts(dropna=dropna)
458434
if self.dtype.na_value is np.nan:

0 commit comments

Comments
 (0)