Skip to content

Commit 8fc81e0

Browse files
committed
BUG: Fix Series.str.contains with compiled regex on Arrow string dtype (#61942) and add whatsnew note
1 parent a0decbc commit 8fc81e0

File tree

3 files changed

+22
-4
lines changed

3 files changed

+22
-4
lines changed

doc/source/whatsnew/v2.3.2

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
.. _whatsnew_232:
2+
3+
These are the changes in pandas 2.3.2. See :ref:`release` for a full changelog
4+
including other versions of pandas.
5+
6+
{{ header }}
7+
8+
Bug fixes
9+
^^^^^^^^^
10+
11+
- Fixed ``Series.str.contains`` with compiled regex on Arrow string dtype, which now correctly delegates to the object-dtype implementation. (:issue:`61942`)

pandas/core/arrays/string_arrow.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -344,10 +344,7 @@ def _str_contains(
344344
na=lib.no_default,
345345
regex: bool = True,
346346
):
347-
if isinstance(pat, re.Pattern) and regex:
348-
return super()._str_contains(pat, case, flags, na, regex)
349-
350-
if flags:
347+
if (isinstance(pat, re.Pattern) and regex) or flags:
351348
return super()._str_contains(pat, case, flags, na, regex)
352349

353350
return ArrowStringArrayMixin._str_contains(self, pat, case, flags, na, regex)

pandas/tests/strings/test_strings.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@
22
datetime,
33
timedelta,
44
)
5+
import re
56

67
import numpy as np
78
import pytest
89

10+
import pandas as pd
911
from pandas import (
1012
DataFrame,
1113
Index,
@@ -176,6 +178,14 @@ def test_empty_str_methods(any_string_dtype):
176178
tm.assert_series_equal(empty_str, empty.str.translate(table))
177179

178180

181+
def test_str_contains_compiled_regex_arrow():
182+
ser = Series(["foo", "bar", "baz", None], dtype="string[pyarrow]")
183+
pat = re.compile(r"ba.")
184+
result = ser.str.contains(pat)
185+
expected = Series([False, True, True, pd.NA], dtype="boolean[pyarrow]")
186+
tm.assert_series_equal(result, expected)
187+
188+
179189
@pytest.mark.parametrize(
180190
"method, expected",
181191
[

0 commit comments

Comments
 (0)