Skip to content

Commit d348852

Browse files
Backport PR #62410 on branch 2.3.x (BUG: fix bug in str.match for Arrow backend with optional groups) (#62412)
Co-authored-by: Joris Van den Bossche <[email protected]>
1 parent 3ecc8f2 commit d348852

File tree

3 files changed

+26
-3
lines changed

3 files changed

+26
-3
lines changed

doc/source/whatsnew/v2.3.3.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ Bug fixes
3535
- Fix bug in :meth:`Series.str.replace` using named capture groups (e.g., ``\g<name>``) with the Arrow-backed dtype would raise an error (:issue:`57636`)
3636
- Fix regression in ``~Series.str.contains``, ``~Series.str.match`` and ``~Series.str.fullmatch``
3737
with a compiled regex and custom flags (:issue:`62240`)
38-
- Fix :meth:`Series.str.fullmatch` not matching patterns with groups correctly for the Arrow-backed string dtype (:issue:`61072`)
38+
- Fix :meth:`Series.str.match` and :meth:`Series.str.fullmatch` not matching patterns with groups correctly for the Arrow-backed string dtype (:issue:`61072`)
3939

4040

4141
Improvements and fixes for Copy-on-Write

pandas/core/arrays/_arrow_string_mixins.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,7 @@ def _str_match(
317317
na: Scalar | lib.NoDefault = lib.no_default,
318318
):
319319
if not pat.startswith("^"):
320-
pat = f"^{pat}"
320+
pat = f"^({pat})"
321321
return self._str_contains(pat, case, flags, na, regex=True)
322322

323323
def _str_fullmatch(

pandas/tests/strings/test_find_replace.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -977,6 +977,30 @@ def test_match_compiled_regex(any_string_dtype):
977977
values.str.match(re.compile("ab"), flags=re.IGNORECASE)
978978

979979

980+
@pytest.mark.parametrize(
981+
"pat, case, exp",
982+
[
983+
["ab", False, [True, False]],
984+
["Ab", True, [False, False]],
985+
["bc", True, [False, False]],
986+
["a[a-z]{1}", False, [True, False]],
987+
["A[a-z]{1}", True, [False, False]],
988+
# https://github.com/pandas-dev/pandas/issues/61072
989+
["(bc)|(ab)", True, [True, False]],
990+
["((bc)|(ab))", True, [True, False]],
991+
],
992+
)
993+
def test_str_match_extra_cases(any_string_dtype, pat, case, exp):
994+
ser = Series(["abc", "Xab"], dtype=any_string_dtype)
995+
result = ser.str.match(pat, case=case)
996+
997+
expected_dtype = (
998+
np.bool_ if is_object_or_nan_string_dtype(any_string_dtype) else "boolean"
999+
)
1000+
expected = Series(exp, dtype=expected_dtype)
1001+
tm.assert_series_equal(result, expected)
1002+
1003+
9801004
# --------------------------------------------------------------------------------------
9811005
# str.fullmatch
9821006
# --------------------------------------------------------------------------------------
@@ -1112,7 +1136,6 @@ def test_str_fullmatch_extra_cases(any_string_dtype, pat, case, na, exp):
11121136
expected_dtype = (
11131137
"object" if is_object_or_nan_string_dtype(any_string_dtype) else "boolean"
11141138
)
1115-
expected = Series([True, False, np.nan, False], dtype=expected_dtype)
11161139
expected = Series(exp, dtype=expected_dtype)
11171140
tm.assert_series_equal(result, expected)
11181141

0 commit comments

Comments
 (0)