Skip to content

Commit 6f7bec0

Browse files
committed
REF: de-duplicate ArrowStringArray methods (2)
1 parent 8fbce0b commit 6f7bec0

File tree

3 files changed

+69
-91
lines changed

3 files changed

+69
-91
lines changed

pandas/core/arrays/_arrow_string_mixins.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,54 @@ class ArrowStringArrayMixin:
2323
def __init__(self, *args, **kwargs) -> None:
2424
raise NotImplementedError
2525

26+
def _result_converter(self, result: pa.Array, na=None):
27+
# Convert bool-dtype results to the appropriate output type
28+
raise NotImplementedError
29+
30+
def _str_isalnum(self) -> Self:
31+
result = pc.utf8_is_alnum(self._pa_array)
32+
return self._result_converter(result)
33+
34+
def _str_isalpha(self):
35+
result = pc.utf8_is_alpha(self._pa_array)
36+
return self._result_converter(result)
37+
38+
def _str_isdecimal(self):
39+
result = pc.utf8_is_decimal(self._pa_array)
40+
return self._result_converter(result)
41+
42+
def _str_isdigit(self):
43+
result = pc.utf8_is_digit(self._pa_array)
44+
return self._result_converter(result)
45+
46+
def _str_islower(self):
47+
result = pc.utf8_is_lower(self._pa_array)
48+
return self._result_converter(result)
49+
50+
def _str_isnumeric(self):
51+
result = pc.utf8_is_numeric(self._pa_array)
52+
return self._result_converter(result)
53+
54+
def _str_isspace(self):
55+
result = pc.utf8_is_space(self._pa_array)
56+
return self._result_converter(result)
57+
58+
def _str_istitle(self):
59+
result = pc.utf8_is_title(self._pa_array)
60+
return self._result_converter(result)
61+
62+
def _str_isupper(self):
63+
result = pc.utf8_is_upper(self._pa_array)
64+
return self._result_converter(result)
65+
66+
def _convert_int_dtype(self, result):
67+
# Convert int-dtype results to the appropriate output type
68+
raise NotImplementedError
69+
70+
def _str_len(self):
71+
result = pc.utf8_length(self._pa_array)
72+
return self._convert_int_dtype(result)
73+
2674
def _str_pad(
2775
self,
2876
width: int,

pandas/core/arrays/arrow/array.py

Lines changed: 9 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1972,7 +1972,7 @@ def _rank(
19721972
"""
19731973
See Series.rank.__doc__.
19741974
"""
1975-
return type(self)(
1975+
return self._convert_int_dtype(
19761976
self._rank_calc(
19771977
axis=axis,
19781978
method=method,
@@ -2288,7 +2288,14 @@ def _apply_elementwise(self, func: Callable) -> list[list[Any]]:
22882288
def _str_count(self, pat: str, flags: int = 0) -> Self:
22892289
if flags:
22902290
raise NotImplementedError(f"count not implemented with {flags=}")
2291-
return type(self)(pc.count_substring_regex(self._pa_array, pat))
2291+
result = pc.count_substring_regex(self._pa_array, pat)
2292+
return self._convert_int_dtype(result)
2293+
2294+
def _result_converter(self, result, na=None):
2295+
return type(self)(result)
2296+
2297+
def _convert_int_dtype(self, result):
2298+
return type(self)(result)
22922299

22932300
def _str_contains(
22942301
self, pat, case: bool = True, flags: int = 0, na=None, regex: bool = True
@@ -2441,33 +2448,6 @@ def _str_slice(
24412448
pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
24422449
)
24432450

2444-
def _str_isalnum(self) -> Self:
2445-
return type(self)(pc.utf8_is_alnum(self._pa_array))
2446-
2447-
def _str_isalpha(self) -> Self:
2448-
return type(self)(pc.utf8_is_alpha(self._pa_array))
2449-
2450-
def _str_isdecimal(self) -> Self:
2451-
return type(self)(pc.utf8_is_decimal(self._pa_array))
2452-
2453-
def _str_isdigit(self) -> Self:
2454-
return type(self)(pc.utf8_is_digit(self._pa_array))
2455-
2456-
def _str_islower(self) -> Self:
2457-
return type(self)(pc.utf8_is_lower(self._pa_array))
2458-
2459-
def _str_isnumeric(self) -> Self:
2460-
return type(self)(pc.utf8_is_numeric(self._pa_array))
2461-
2462-
def _str_isspace(self) -> Self:
2463-
return type(self)(pc.utf8_is_space(self._pa_array))
2464-
2465-
def _str_istitle(self) -> Self:
2466-
return type(self)(pc.utf8_is_title(self._pa_array))
2467-
2468-
def _str_isupper(self) -> Self:
2469-
return type(self)(pc.utf8_is_upper(self._pa_array))
2470-
24712451
def _str_len(self) -> Self:
24722452
return type(self)(pc.utf8_length(self._pa_array))
24732453

pandas/core/arrays/string_arrow.py

Lines changed: 12 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@
5252

5353
from pandas._typing import (
5454
ArrayLike,
55-
AxisInt,
5655
Dtype,
5756
Scalar,
5857
Self,
@@ -367,45 +366,17 @@ def _str_slice(
367366
return super()._str_slice(start, stop, step)
368367
return ArrowExtensionArray._str_slice(self, start=start, stop=stop, step=step)
369368

370-
def _str_isalnum(self):
371-
result = pc.utf8_is_alnum(self._pa_array)
372-
return self._result_converter(result)
373-
374-
def _str_isalpha(self):
375-
result = pc.utf8_is_alpha(self._pa_array)
376-
return self._result_converter(result)
369+
_str_isalnum = ArrowStringArrayMixin._str_isalnum
370+
_str_isalpha = ArrowStringArrayMixin._str_isalpha
371+
_str_isdecimal = ArrowStringArrayMixin._str_isdecimal
372+
_str_isdigit = ArrowStringArrayMixin._str_isdigit
373+
_str_islower = ArrowStringArrayMixin._str_islower
374+
_str_isnumeric = ArrowStringArrayMixin._str_isnumeric
375+
_str_isspace = ArrowStringArrayMixin._str_isspace
376+
_str_istitle = ArrowStringArrayMixin._str_istitle
377+
_str_isupper = ArrowStringArrayMixin._str_isupper
377378

378-
def _str_isdecimal(self):
379-
result = pc.utf8_is_decimal(self._pa_array)
380-
return self._result_converter(result)
381-
382-
def _str_isdigit(self):
383-
result = pc.utf8_is_digit(self._pa_array)
384-
return self._result_converter(result)
385-
386-
def _str_islower(self):
387-
result = pc.utf8_is_lower(self._pa_array)
388-
return self._result_converter(result)
389-
390-
def _str_isnumeric(self):
391-
result = pc.utf8_is_numeric(self._pa_array)
392-
return self._result_converter(result)
393-
394-
def _str_isspace(self):
395-
result = pc.utf8_is_space(self._pa_array)
396-
return self._result_converter(result)
397-
398-
def _str_istitle(self):
399-
result = pc.utf8_is_title(self._pa_array)
400-
return self._result_converter(result)
401-
402-
def _str_isupper(self):
403-
result = pc.utf8_is_upper(self._pa_array)
404-
return self._result_converter(result)
405-
406-
def _str_len(self):
407-
result = pc.utf8_length(self._pa_array)
408-
return self._convert_int_dtype(result)
379+
_str_len = ArrowStringArrayMixin._str_len
409380

410381
_str_match = ArrowExtensionArray._str_match
411382
_str_fullmatch = ArrowExtensionArray._str_fullmatch
@@ -424,8 +395,7 @@ def _str_removeprefix(self, prefix: str):
424395
def _str_count(self, pat: str, flags: int = 0):
425396
if flags:
426397
return super()._str_count(pat, flags)
427-
result = pc.count_substring_regex(self._pa_array, pat)
428-
return self._convert_int_dtype(result)
398+
return ArrowExtensionArray._str_count(self, pat, flags)
429399

430400
def _str_find(self, sub: str, start: int = 0, end: int | None = None):
431401
if start != 0 and end is not None:
@@ -481,27 +451,7 @@ def _reduce(
481451
else:
482452
return result
483453

484-
def _rank(
485-
self,
486-
*,
487-
axis: AxisInt = 0,
488-
method: str = "average",
489-
na_option: str = "keep",
490-
ascending: bool = True,
491-
pct: bool = False,
492-
):
493-
"""
494-
See Series.rank.__doc__.
495-
"""
496-
return self._convert_int_dtype(
497-
self._rank_calc(
498-
axis=axis,
499-
method=method,
500-
na_option=na_option,
501-
ascending=ascending,
502-
pct=pct,
503-
)
504-
)
454+
_rank = ArrowExtensionArray._rank
505455

506456
def value_counts(self, dropna: bool = True) -> Series:
507457
result = super().value_counts(dropna=dropna)

0 commit comments

Comments
 (0)