7
7
import numpy as np
8
8
import pytest
9
9
10
+ from pandas .compat import pa_version_under21p0
10
11
from pandas .errors import Pandas4Warning
11
12
12
13
from pandas import (
15
16
Index ,
16
17
MultiIndex ,
17
18
Series ,
19
+ StringDtype ,
18
20
option_context ,
19
21
)
20
22
import pandas ._testing as tm
@@ -249,8 +251,9 @@ def test_ismethods(method, expected, any_string_dtype):
249
251
@pytest .mark .parametrize (
250
252
"method, expected" ,
251
253
[
252
- ("isnumeric" , [False , True , True , False , True , True , False ]),
253
- ("isdecimal" , [False , True , False , False , False , True , False ]),
254
+ ("isnumeric" , [False , True , True , True , False , True , True , False ]),
255
+ ("isdecimal" , [False , True , False , False , False , False , True , False ]),
256
+ ("isdigit" , [False , True , True , False , False , False , True , False ]),
254
257
],
255
258
)
256
259
def test_isnumeric_unicode (method , expected , any_string_dtype ):
@@ -259,19 +262,35 @@ def test_isnumeric_unicode(method, expected, any_string_dtype):
259
262
# 0x1378: ፸ ETHIOPIC NUMBER SEVENTY
260
263
# 0xFF13: 3 Em 3 # noqa: RUF003
261
264
ser = Series (
262
- ["A" , "3" , "¼" , "★" , "፸" , "3" , "four" ], # noqa: RUF001
265
+ ["A" , "3" , "³" , " ¼" , "★" , "፸" , "3" , "four" ], # noqa: RUF001
263
266
dtype = any_string_dtype ,
264
267
)
265
268
expected_dtype = (
266
269
"bool" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
267
270
)
268
271
expected = Series (expected , dtype = expected_dtype )
272
+ if (
273
+ method == "isdigit"
274
+ and isinstance (ser .dtype , StringDtype )
275
+ and ser .dtype .storage == "pyarrow"
276
+ and not pa_version_under21p0
277
+ ):
278
+ # known difference in behavior between python and pyarrow unicode handling
279
+ # pyarrow 21+ considers ¼ and ፸ as a digit, while python does not
280
+ expected .iloc [3 ] = True
281
+ expected .iloc [5 ] = True
282
+
269
283
result = getattr (ser .str , method )()
270
284
tm .assert_series_equal (result , expected )
271
285
272
286
# compare with standard library
273
- expected = [getattr (item , method )() for item in ser ]
274
- assert list (result ) == expected
287
+ # (only for non-pyarrow storage given the above differences)
288
+ if any_string_dtype == "object" or (
289
+ isinstance (any_string_dtype , StringDtype )
290
+ and any_string_dtype .storage == "python"
291
+ ):
292
+ expected = [getattr (item , method )() for item in ser ]
293
+ assert list (result ) == expected
275
294
276
295
277
296
@pytest .mark .parametrize (
0 commit comments