From 62a8c218839e0c1ce5410a41f712037b66c5df4c Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 4 Sep 2025 17:01:00 +0200
Subject: [PATCH 1/5] REGR: fix string contains/match methods with compiled
 regex with flags

---
 pandas/core/arrays/_arrow_string_mixins.py | 10 +--
 pandas/core/arrays/string_arrow.py         | 52 +++++++++++++-
 pandas/core/strings/object_array.py        |  6 +-
 pandas/tests/strings/test_find_replace.py  | 83 +++++++++++++++++++---
 4 files changed, 128 insertions(+), 23 deletions(-)

diff --git a/pandas/core/arrays/_arrow_string_mixins.py b/pandas/core/arrays/_arrow_string_mixins.py
index 55eddb8045ca6..d80b097066c27 100644
--- a/pandas/core/arrays/_arrow_string_mixins.py
+++ b/pandas/core/arrays/_arrow_string_mixins.py
@@ -316,10 +316,7 @@ def _str_match(
         flags: int = 0,
         na: Scalar | lib.NoDefault = lib.no_default,
     ):
-        if isinstance(pat, re.Pattern):
-            # GH#61952
-            pat = pat.pattern
-        if isinstance(pat, str) and not pat.startswith("^"):
+        if not pat.startswith("^"):
             pat = f"^{pat}"
         return self._str_contains(pat, case, flags, na, regex=True)
 
@@ -330,10 +327,7 @@ def _str_fullmatch(
         flags: int = 0,
         na: Scalar | lib.NoDefault = lib.no_default,
     ):
-        if isinstance(pat, re.Pattern):
-            # GH#61952
-            pat = pat.pattern
-        if isinstance(pat, str) and (not pat.endswith("$") or pat.endswith("\\$")):
+        if not pat.endswith("$") or pat.endswith("\\$"):
             pat = f"{pat}$"
         return self._str_match(pat, case, flags, na)
 
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 6e29848171ace..c59b22a135329 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -55,6 +55,7 @@
         ArrayLike,
         Dtype,
         NpDtype,
+        Scalar,
         npt,
     )
 
@@ -333,8 +334,6 @@ def astype(self, dtype, copy: bool = True):
     _str_startswith = ArrowStringArrayMixin._str_startswith
     _str_endswith = ArrowStringArrayMixin._str_endswith
     _str_pad = ArrowStringArrayMixin._str_pad
-    _str_match = ArrowStringArrayMixin._str_match
-    _str_fullmatch = ArrowStringArrayMixin._str_fullmatch
     _str_lower = ArrowStringArrayMixin._str_lower
     _str_upper = ArrowStringArrayMixin._str_upper
     _str_strip = ArrowStringArrayMixin._str_strip
@@ -349,6 +348,19 @@ def astype(self, dtype, copy: bool = True):
     _str_len = ArrowStringArrayMixin._str_len
     _str_slice = ArrowStringArrayMixin._str_slice
 
+    @staticmethod
+    def _preprocess_re_pattern(pat: re.Pattern, case: bool):
+        flags = pat.flags
+        pat = pat.pattern
+        # flags is not supported by pyarrow, but `case` is -> extract and remove
+        if flags & re.IGNORECASE:
+            case = False
+            flags = flags & ~re.IGNORECASE
+        # when creating a pattern with re.compile and a string, it automatically
+        # gets a UNICODE flag, while pyarrow assumes unicode for strings anyway
+        flags = flags & ~re.UNICODE
+        return pat, case, flags
+
     def _str_contains(
         self,
         pat,
@@ -360,10 +372,44 @@ def _str_contains(
         if flags:
             return super()._str_contains(pat, case, flags, na, regex)
         if isinstance(pat, re.Pattern):
-            pat = pat.pattern
+            pat, case, flags = self._preprocess_re_pattern(pat, case)
+            if flags:
+                return super()._str_contains(pat, case, flags, na, regex)
 
         return ArrowStringArrayMixin._str_contains(self, pat, case, flags, na, regex)
 
+    def _str_match(
+        self,
+        pat: str | re.Pattern,
+        case: bool = True,
+        flags: int = 0,
+        na: Scalar | lib.NoDefault = lib.no_default,
+    ):
+        if flags:
+            return super()._str_match(pat, case, flags, na)
+        if isinstance(pat, re.Pattern):
+            pat, case, flags = self._preprocess_re_pattern(pat, case)
+            if flags:
+                return super()._str_match(pat, case, flags, na)
+
+        return ArrowStringArrayMixin._str_match(self, pat, case, flags, na)
+
+    def _str_fullmatch(
+        self,
+        pat: str | re.Pattern,
+        case: bool = True,
+        flags: int = 0,
+        na: Scalar | lib.NoDefault = lib.no_default,
+    ):
+        if flags:
+            return super()._str_fullmatch(pat, case, flags, na)
+        if isinstance(pat, re.Pattern):
+            pat, case, flags = self._preprocess_re_pattern(pat, case)
+            if flags:
+                return super()._str_fullmatch(pat, case, flags, na)
+
+        return ArrowStringArrayMixin._str_fullmatch(self, pat, case, flags, na)
+
     def _str_replace(
         self,
         pat: str | re.Pattern,
diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
index 397fdcc5cac38..ba35542b7f112 100644
--- a/pandas/core/strings/object_array.py
+++ b/pandas/core/strings/object_array.py
@@ -262,8 +262,7 @@ def _str_match(
     ):
         if not case:
             flags |= re.IGNORECASE
-        if isinstance(pat, re.Pattern):
-            pat = pat.pattern
+
         regex = re.compile(pat, flags=flags)
 
         f = lambda x: regex.match(x) is not None
@@ -278,8 +277,7 @@ def _str_fullmatch(
     ):
         if not case:
             flags |= re.IGNORECASE
-        if isinstance(pat, re.Pattern):
-            pat = pat.pattern
+
         regex = re.compile(pat, flags=flags)
 
         f = lambda x: regex.fullmatch(x) is not None
diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py
index cce96f38d216a..8a235dc9a8105 100644
--- a/pandas/tests/strings/test_find_replace.py
+++ b/pandas/tests/strings/test_find_replace.py
@@ -283,16 +283,39 @@ def test_contains_nan(any_string_dtype):
 
 def test_contains_compiled_regex(any_string_dtype):
     # GH#61942
-    ser = Series(["foo", "bar", "baz"], dtype=any_string_dtype)
-    pat = re.compile("ba.")
-    result = ser.str.contains(pat)
-
     expected_dtype = (
         np.bool_ if is_object_or_nan_string_dtype(any_string_dtype) else "boolean"
     )
+
+    ser = Series(["foo", "bar", "Baz"], dtype=any_string_dtype)
+
+    pat = re.compile("ba.")
+    result = ser.str.contains(pat)
+    expected = Series([False, True, False], dtype=expected_dtype)
+    tm.assert_series_equal(result, expected)
+
+    # TODO this currently works for pyarrow-backed dtypes but raises for python
+    if any_string_dtype == "string" and any_string_dtype.storage == "pyarrow":
+        result = ser.str.contains(pat, case=False)
+        expected = Series([False, True, True], dtype=expected_dtype)
+        tm.assert_series_equal(result, expected)
+    else:
+        with pytest.raises(
+            ValueError, match="cannot process flags argument with a compiled pattern"
+        ):
+            ser.str.contains(pat, case=False)
+
+    pat = re.compile("ba.", flags=re.IGNORECASE)
+    result = ser.str.contains(pat)
     expected = Series([False, True, True], dtype=expected_dtype)
     tm.assert_series_equal(result, expected)
 
+    # TODO should this be supported?
+    with pytest.raises(
+        ValueError, match="cannot process flags argument with a compiled pattern"
+    ):
+        ser.str.contains(pat, flags=re.IGNORECASE)
+
 
 # --------------------------------------------------------------------------------------
 # str.startswith
@@ -833,14 +856,36 @@ def test_match_case_kwarg(any_string_dtype):
 
 def test_match_compiled_regex(any_string_dtype):
     # GH#61952
-    values = Series(["ab", "AB", "abc", "ABC"], dtype=any_string_dtype)
-    result = values.str.match(re.compile(r"ab"), case=False)
     expected_dtype = (
         np.bool_ if is_object_or_nan_string_dtype(any_string_dtype) else "boolean"
     )
+
+    values = Series(["ab", "AB", "abc", "ABC"], dtype=any_string_dtype)
+
+    result = values.str.match(re.compile("ab"))
+    expected = Series([True, False, True, False], dtype=expected_dtype)
+    tm.assert_series_equal(result, expected)
+
+    # TODO this currently works for pyarrow-backed dtypes but raises for python
+    if any_string_dtype == "string" and any_string_dtype.storage == "pyarrow":
+        result = values.str.match(re.compile("ab"), case=False)
+        expected = Series([True, True, True, True], dtype=expected_dtype)
+        tm.assert_series_equal(result, expected)
+    else:
+        with pytest.raises(
+            ValueError, match="cannot process flags argument with a compiled pattern"
+        ):
+            values.str.match(re.compile("ab"), case=False)
+
+    result = values.str.match(re.compile("ab", flags=re.IGNORECASE))
     expected = Series([True, True, True, True], dtype=expected_dtype)
     tm.assert_series_equal(result, expected)
 
+    with pytest.raises(
+        ValueError, match="cannot process flags argument with a compiled pattern"
+    ):
+        values.str.match(re.compile("ab"), flags=re.IGNORECASE)
+
 
 # --------------------------------------------------------------------------------------
 # str.fullmatch
@@ -913,14 +958,36 @@ def test_fullmatch_case_kwarg(any_string_dtype):
 
 def test_fullmatch_compiled_regex(any_string_dtype):
     # GH#61952
-    values = Series(["ab", "AB", "abc", "ABC"], dtype=any_string_dtype)
-    result = values.str.fullmatch(re.compile(r"ab"), case=False)
     expected_dtype = (
         np.bool_ if is_object_or_nan_string_dtype(any_string_dtype) else "boolean"
     )
+
+    values = Series(["ab", "AB", "abc", "ABC"], dtype=any_string_dtype)
+
+    result = values.str.fullmatch(re.compile("ab"))
+    expected = Series([True, False, False, False], dtype=expected_dtype)
+    tm.assert_series_equal(result, expected)
+
+    # TODO this currently works for pyarrow-backed dtypes but raises for python
+    if any_string_dtype == "string" and any_string_dtype.storage == "pyarrow":
+        result = values.str.fullmatch(re.compile("ab"), case=False)
+        expected = Series([True, True, False, False], dtype=expected_dtype)
+        tm.assert_series_equal(result, expected)
+    else:
+        with pytest.raises(
+            ValueError, match="cannot process flags argument with a compiled pattern"
+        ):
+            values.str.fullmatch(re.compile("ab"), case=False)
+
+    result = values.str.fullmatch(re.compile("ab", flags=re.IGNORECASE))
     expected = Series([True, True, False, False], dtype=expected_dtype)
     tm.assert_series_equal(result, expected)
 
+    with pytest.raises(
+        ValueError, match="cannot process flags argument with a compiled pattern"
+    ):
+        values.str.fullmatch(re.compile("ab"), flags=re.IGNORECASE)
+
 
 # --------------------------------------------------------------------------------------
 # str.findall

From e1100e2bd4156f86c1d42a4ebababbc058919f12 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 5 Sep 2025 09:48:15 +0200
Subject: [PATCH 2/5] add additional test for custom flags being respected

---
 pandas/core/arrays/string_arrow.py        | 22 ++++++++++++---------
 pandas/tests/strings/test_find_replace.py | 24 +++++++++++++++++++++++
 2 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index c59b22a135329..ecf65d8f31149 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -348,6 +348,15 @@ def astype(self, dtype, copy: bool = True):
     _str_len = ArrowStringArrayMixin._str_len
     _str_slice = ArrowStringArrayMixin._str_slice
 
+    @staticmethod
+    def _is_re_pattern_with_flags(pat: str | re.Pattern) -> bool:
+        # check if `pat` is a compiled regex pattern with flags that are not
+        # supported by pyarrow
+        return (
+            isinstance(pat, re.Pattern)
+            and (pat.flags & ~(re.IGNORECASE | re.UNICODE)) != 0
+        )
+
     @staticmethod
     def _preprocess_re_pattern(pat: re.Pattern, case: bool):
         flags = pat.flags
@@ -369,12 +378,11 @@ def _str_contains(
         na=lib.no_default,
         regex: bool = True,
     ):
-        if flags:
+        if flags or self._is_re_pattern_with_flags(pat):
             return super()._str_contains(pat, case, flags, na, regex)
         if isinstance(pat, re.Pattern):
+            # TODO flags passed separately by user are ignored
             pat, case, flags = self._preprocess_re_pattern(pat, case)
-            if flags:
-                return super()._str_contains(pat, case, flags, na, regex)
 
         return ArrowStringArrayMixin._str_contains(self, pat, case, flags, na, regex)
 
@@ -385,12 +393,10 @@ def _str_match(
         flags: int = 0,
         na: Scalar | lib.NoDefault = lib.no_default,
     ):
-        if flags:
+        if flags or self._is_re_pattern_with_flags(pat):
             return super()._str_match(pat, case, flags, na)
         if isinstance(pat, re.Pattern):
             pat, case, flags = self._preprocess_re_pattern(pat, case)
-            if flags:
-                return super()._str_match(pat, case, flags, na)
 
         return ArrowStringArrayMixin._str_match(self, pat, case, flags, na)
 
@@ -401,12 +407,10 @@ def _str_fullmatch(
         flags: int = 0,
         na: Scalar | lib.NoDefault = lib.no_default,
     ):
-        if flags:
+        if flags or self._is_re_pattern_with_flags(pat):
             return super()._str_fullmatch(pat, case, flags, na)
         if isinstance(pat, re.Pattern):
             pat, case, flags = self._preprocess_re_pattern(pat, case)
-            if flags:
-                return super()._str_fullmatch(pat, case, flags, na)
 
         return ArrowStringArrayMixin._str_fullmatch(self, pat, case, flags, na)
 
diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py
index 8a235dc9a8105..093aa1aac27e2 100644
--- a/pandas/tests/strings/test_find_replace.py
+++ b/pandas/tests/strings/test_find_replace.py
@@ -317,6 +317,30 @@ def test_contains_compiled_regex(any_string_dtype):
         ser.str.contains(pat, flags=re.IGNORECASE)
 
 
+def test_contains_compiled_regex_flags(any_string_dtype):
+    # ensure other (than ignorecase) flags are respected
+    expected_dtype = (
+        np.bool_ if is_object_or_nan_string_dtype(any_string_dtype) else "boolean"
+    )
+
+    ser = Series(["foobar", "foo\nbar", "Baz"], dtype=any_string_dtype)
+
+    pat = re.compile("^ba")
+    result = ser.str.contains(pat)
+    expected = Series([False, False, False], dtype=expected_dtype)
+    tm.assert_series_equal(result, expected)
+
+    pat = re.compile("^ba", flags=re.MULTILINE)
+    result = ser.str.contains(pat)
+    expected = Series([False, True, False], dtype=expected_dtype)
+    tm.assert_series_equal(result, expected)
+
+    pat = re.compile("^ba", flags=re.MULTILINE | re.IGNORECASE)
+    result = ser.str.contains(pat)
+    expected = Series([False, True, True], dtype=expected_dtype)
+    tm.assert_series_equal(result, expected)
+
+
 # --------------------------------------------------------------------------------------
 # str.startswith
 # --------------------------------------------------------------------------------------

From 5cb60e0b5b1661834f7e84fc08b1f9d0bdc4b51c Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 5 Sep 2025 10:03:38 +0200
Subject: [PATCH 3/5] update type annotations

---
 pandas/core/arrays/_arrow_string_mixins.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/_arrow_string_mixins.py b/pandas/core/arrays/_arrow_string_mixins.py
index d80b097066c27..ad118d6be6b18 100644
--- a/pandas/core/arrays/_arrow_string_mixins.py
+++ b/pandas/core/arrays/_arrow_string_mixins.py
@@ -311,7 +311,7 @@ def _str_contains(
 
     def _str_match(
         self,
-        pat: str | re.Pattern,
+        pat: str,
         case: bool = True,
         flags: int = 0,
         na: Scalar | lib.NoDefault = lib.no_default,
@@ -322,7 +322,7 @@ def _str_match(
 
     def _str_fullmatch(
         self,
-        pat: str | re.Pattern,
+        pat: str,
         case: bool = True,
         flags: int = 0,
         na: Scalar | lib.NoDefault = lib.no_default,

From e065d332c0735c169a26d142be42a0fe310253c4 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 5 Sep 2025 10:40:55 +0200
Subject: [PATCH 4/5] add whatsnew

---
 doc/source/whatsnew/v2.3.3.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v2.3.3.rst b/doc/source/whatsnew/v2.3.3.rst
index e31ae4a8a647b..cbde6f52d4472 100644
--- a/doc/source/whatsnew/v2.3.3.rst
+++ b/doc/source/whatsnew/v2.3.3.rst
@@ -22,7 +22,8 @@ become the default string dtype in pandas 3.0. See
 
 Bug fixes
 ^^^^^^^^^
--
+- Fix regression in ``~Series.str.contains``, ``~Series.str.match`` and ``~Series.str.fullmatch``
+  with a compiled regex and custom flags (:issue:`62240`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_233.contributors:

From 6a1445125d0f948c6208bad3b18562b4ef300768 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 5 Sep 2025 11:11:01 +0200
Subject: [PATCH 5/5] try fixing typing issues

---
 pandas/core/arrays/string_arrow.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index ecf65d8f31149..e396ce91a293a 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -358,9 +358,9 @@ def _is_re_pattern_with_flags(pat: str | re.Pattern) -> bool:
         )
 
     @staticmethod
-    def _preprocess_re_pattern(pat: re.Pattern, case: bool):
+    def _preprocess_re_pattern(pat: re.Pattern, case: bool) -> tuple[str, bool, int]:
+        pattern = pat.pattern
         flags = pat.flags
-        pat = pat.pattern
         # flags is not supported by pyarrow, but `case` is -> extract and remove
         if flags & re.IGNORECASE:
             case = False
@@ -368,7 +368,7 @@ def _preprocess_re_pattern(pat: re.Pattern, case: bool):
         # when creating a pattern with re.compile and a string, it automatically
         # gets a UNICODE flag, while pyarrow assumes unicode for strings anyway
         flags = flags & ~re.UNICODE
-        return pat, case, flags
+        return pattern, case, flags
 
     def _str_contains(
         self,