Skip to content

Commit 8d46c36

Browse files
authored
fix: Series.str.isdigit in unicode superscripts and fractions (#1924)
* fix: Series.str.isdigit in unicode subscripts * fix test
1 parent e5d996c commit 8d46c36

File tree

2 files changed

+6
-9
lines changed

2 files changed

+6
-9
lines changed

bigframes/core/compile/scalar_op_compiler.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -487,9 +487,9 @@ def isalpha_op_impl(x: ibis_types.Value):
487487

488488
@scalar_op_compiler.register_unary_op(ops.isdigit_op)
489489
def isdigit_op_impl(x: ibis_types.Value):
490-
# Based on docs, should include superscript/subscript-ed numbers
491-
# Tests however pass only when set to Nd unicode class
492-
return typing.cast(ibis_types.StringValue, x).re_search(r"^(\p{Nd})+$")
490+
return typing.cast(ibis_types.StringValue, x).re_search(
491+
r"^[\p{Nd}\x{00B9}\x{00B2}\x{00B3}\x{2070}\x{2074}-\x{2079}\x{2080}-\x{2089}]+$"
492+
)
493493

494494

495495
@scalar_op_compiler.register_unary_op(ops.isdecimal_op)

tests/system/small/operations/test_strings.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -324,13 +324,10 @@ def test_isalpha(weird_strings, weird_strings_pd):
324324
)
325325

326326

327-
@pytest.mark.skipif(
328-
"dev" in pa.__version__,
329-
# b/333484335 pyarrow is inconsistent on the behavior
330-
reason="pyarrow dev version is inconsistent on isdigit behavior.",
331-
)
332327
def test_isdigit(weird_strings, weird_strings_pd):
333-
pd_result = weird_strings_pd.str.isdigit()
328+
# check the behavior against normal pandas str, since pyarrow has a bug with superscripts/fractions b/333484335
329+
# astype object instead of str to support pd.NA
330+
pd_result = weird_strings_pd.astype(object).str.isdigit()
334331
bf_result = weird_strings.str.isdigit().to_pandas()
335332

336333
pd.testing.assert_series_equal(

0 commit comments

Comments
 (0)