@@ -1338,13 +1338,39 @@ def contains(
13381338 4 False
13391339 dtype: bool
13401340 """
1341- if regex and re .compile (pat ).groups :
1342- warnings .warn (
1343- "This pattern is interpreted as a regular expression, and has "
1344- "match groups. To actually get the groups, use str.extract." ,
1345- UserWarning ,
1346- stacklevel = find_stack_level (),
1347- )
1341+ from pandas .core .dtypes .dtypes import ArrowDtype
1342+ import re
1343+
1344+ # --- Handle Arrow-backed string arrays with compiled regex patterns ---
1345+ # Arrow backend does not support compiled regex objects or Python regex flags.
1346+ # If a compiled regex is passed, only allow it if no flags are set.
1347+
1348+ if isinstance (self ._data .dtype , ArrowDtype ) and isinstance (pat , re .Pattern ):
1349+ if flags != 0 :
1350+ raise NotImplementedError (
1351+ "Series.str.contains() with a compiled regex pattern and flag is "
1352+ "not supported for Arrow-backed string arrays."
1353+ )
1354+ pat = pat .pattern
1355+ regex = True
1356+
1357+ if regex :
1358+ try :
1359+ _compiled = pat if isinstance (pat , re .Pattern ) else re .compile (
1360+ pat , flags = flags
1361+ )
1362+ if _compiled .groups :
1363+ warnings .warn (
1364+ "This pattern is interpreted as a regular expression, and has "
1365+ "match groups. To actually get the groups, use str.extract." ,
1366+ UserWarning ,
1367+ stacklevel = find_stack_level (),
1368+ )
1369+ except re .error as e :
1370+ raise ValueError (
1371+ f"Invalid regex pattern passed to str.contains(): { e } "
1372+ ) from e
1373+
13481374
13491375 result = self ._data .array ._str_contains (pat , case , flags , na , regex )
13501376 return self ._wrap_result (result , fill_value = na , returns_string = False )
0 commit comments