Skip to content

Commit fc4f736

Browse files
committed
BUG : Fix Series.str.contains with compiled regex on Arrow string
1 parent 5774290 commit fc4f736

File tree

1 file changed

+33
-7
lines changed

1 file changed

+33
-7
lines changed

pandas/core/strings/accessor.py

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1338,13 +1338,39 @@ def contains(
13381338
4 False
13391339
dtype: bool
13401340
"""
1341-
if regex and re.compile(pat).groups:
1342-
warnings.warn(
1343-
"This pattern is interpreted as a regular expression, and has "
1344-
"match groups. To actually get the groups, use str.extract.",
1345-
UserWarning,
1346-
stacklevel=find_stack_level(),
1347-
)
1341+
from pandas.core.dtypes.dtypes import ArrowDtype
1342+
import re
1343+
1344+
# --- Handle Arrow-backed string arrays with compiled regex patterns ---
1345+
# Arrow backend does not support compiled regex objects or Python regex flags.
1346+
# If a compiled regex is passed, only allow it if no flags are set.
1347+
1348+
if isinstance(self._data.dtype, ArrowDtype) and isinstance(pat, re.Pattern):
1349+
if flags != 0:
1350+
raise NotImplementedError(
1351+
"Series.str.contains() with a compiled regex pattern and flag is "
1352+
"not supported for Arrow-backed string arrays."
1353+
)
1354+
pat = pat.pattern
1355+
regex = True
1356+
1357+
if regex:
1358+
try:
1359+
_compiled = pat if isinstance(pat, re.Pattern) else re.compile(
1360+
pat, flags=flags
1361+
)
1362+
if _compiled.groups:
1363+
warnings.warn(
1364+
"This pattern is interpreted as a regular expression, and has "
1365+
"match groups. To actually get the groups, use str.extract.",
1366+
UserWarning,
1367+
stacklevel=find_stack_level(),
1368+
)
1369+
except re.error as e:
1370+
raise ValueError(
1371+
f"Invalid regex pattern passed to str.contains(): {e}"
1372+
) from e
1373+
13481374

13491375
result = self._data.array._str_contains(pat, case, flags, na, regex)
13501376
return self._wrap_result(result, fill_value=na, returns_string=False)

0 commit comments

Comments
 (0)