Skip to content

Commit 261209f

Browse files
BUG: fix .str.isdigit to honor unicode superscript for older pyarrow
1 parent 14caf55 commit 261209f

File tree

3 files changed

+13
-4
lines changed

3 files changed

+13
-4
lines changed

doc/source/whatsnew/v2.3.2.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ become the default string dtype in pandas 3.0. See
2222

2323
Bug fixes
2424
^^^^^^^^^
25-
-
25+
- Fix :meth:`~Series.str.isdigit` to correctly recognize unicode superscript
26+
characters as digits for :class:`StringDtype` backed by PyArrow (:issue:`61466`)
2627

2728
.. ---------------------------------------------------------------------------
2829
.. _whatsnew_232.contributors:

pandas/core/arrays/_arrow_string_mixins.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
HAS_PYARROW,
1616
pa_version_under13p0,
1717
pa_version_under17p0,
18+
pa_version_under21p0,
1819
)
1920

2021
if HAS_PYARROW:
@@ -261,6 +262,12 @@ def _str_isdecimal(self):
261262
return self._convert_bool_result(result)
262263

263264
def _str_isdigit(self):
265+
if pa_version_under21p0:
266+
# https://github.com/pandas-dev/pandas/issues/61466
267+
res_list = self._apply_elementwise(str.isdigit)
268+
return self._convert_bool_result(
269+
pa.chunked_array(res_list, type=pa.bool_())
270+
)
264271
result = pc.utf8_is_digit(self._pa_array)
265272
return self._convert_bool_result(result)
266273

pandas/tests/strings/test_strings.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -240,8 +240,9 @@ def test_ismethods(method, expected, any_string_dtype):
240240
@pytest.mark.parametrize(
241241
"method, expected",
242242
[
243-
("isnumeric", [False, True, True, False, True, True, False]),
244-
("isdecimal", [False, True, False, False, False, True, False]),
243+
("isnumeric", [False, True, True, True, False, True, True, False]),
244+
("isdecimal", [False, True, False, False, False, False, True, False]),
245+
("isdigit", [False, True, True, False, False, False, True, False]),
245246
],
246247
)
247248
def test_isnumeric_unicode(method, expected, any_string_dtype):
@@ -250,7 +251,7 @@ def test_isnumeric_unicode(method, expected, any_string_dtype):
250251
# 0x1378: ፸ ETHIOPIC NUMBER SEVENTY
251252
# 0xFF13: 3 Em 3 # noqa: RUF003
252253
ser = Series(
253-
["A", "3", "¼", "★", "፸", "3", "four"], # noqa: RUF001
254+
["A", "3", "³", "¼", "★", "፸", "3", "four"], # noqa: RUF001
254255
dtype=any_string_dtype,
255256
)
256257
expected_dtype = (

0 commit comments

Comments
 (0)