Skip to content
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.3.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Bug fixes
- Fix bug in :meth:`Series.str.replace` using named capture groups (e.g., ``\g<name>``) with the Arrow-backed dtype would raise an error (:issue:`57636`)
- Fix regression in ``~Series.str.contains``, ``~Series.str.match`` and ``~Series.str.fullmatch``
with a compiled regex and custom flags (:issue:`62240`)
- Fix :meth:`Series.str.fullmatch` not matching patterns with groups correctly for the Arrow-backed string dtype (:issue:`61072`)

.. ---------------------------------------------------------------------------
.. _whatsnew_233.contributors:
Expand Down
8 changes: 6 additions & 2 deletions pandas/core/arrays/_arrow_string_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,8 +326,12 @@ def _str_fullmatch(
flags: int = 0,
na: Scalar | lib.NoDefault = lib.no_default,
):
if not pat.endswith("$") or pat.endswith("\\$"):
pat = f"{pat}$"
if (not pat.endswith("$") or pat.endswith("\\$")) and not pat.startswith("^"):
pat = f"^({pat})$"
elif not pat.endswith("$") or pat.endswith("\\$"):
pat = f"^({pat[1:]})$"
elif not pat.startswith("^"):
pat = f"^({pat[0:-1]})$"
return self._str_match(pat, case, flags, na)

def _str_find(self, sub: str, start: int = 0, end: int | None = None):
Expand Down
13 changes: 9 additions & 4 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -1870,23 +1870,28 @@ def test_str_match(pat, case, na, exp):

@pytest.mark.parametrize(
"pat, case, na, exp",
# Note: keep cases in sync with
# pandas/tests/strings/test_find_replace.py::test_str_fullmatch_extra_cases
[
["abc", False, None, [True, True, False, None]],
["abc", False, None, [True, False, False, None]],
["Abc", True, None, [False, False, False, None]],
["bc", True, None, [False, False, False, None]],
["ab", False, None, [True, True, False, None]],
["a[a-z]{2}", False, None, [True, True, False, None]],
["ab", False, None, [False, False, False, None]],
["a[a-z]{2}", False, None, [True, False, False, None]],
["A[a-z]{1}", True, None, [False, False, False, None]],
# GH Issue: #56652
["abc$", False, None, [True, False, False, None]],
["abc\\$", False, None, [False, True, False, None]],
["Abc$", True, None, [False, False, False, None]],
["Abc\\$", True, None, [False, False, False, None]],
# https://github.com/pandas-dev/pandas/issues/61072
["(abc)|(abx)", True, None, [True, False, False, None]],
["((abc)|(abx))", True, None, [True, False, False, None]],
],
)
def test_str_fullmatch(pat, case, na, exp):
ser = pd.Series(["abc", "abc$", "$abc", None], dtype=ArrowDtype(pa.string()))
result = ser.str.match(pat, case=case, na=na)
result = ser.str.fullmatch(pat, case=case, na=na)
expected = pd.Series(exp, dtype=ArrowDtype(pa.bool_()))
tm.assert_series_equal(result, expected)

Expand Down
38 changes: 38 additions & 0 deletions pandas/tests/strings/test_find_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -1075,6 +1075,44 @@ def test_fullmatch_compiled_regex(any_string_dtype):
values.str.fullmatch(re.compile("ab"), flags=re.IGNORECASE)


@pytest.mark.parametrize(
"pat, case, na, exp",
# Note: keep cases in sync with
# pandas/tests/extension/test_arrow.py::test_str_fullmatch
[
["abc", False, None, [True, False, False, None]],
["Abc", True, None, [False, False, False, None]],
["bc", True, None, [False, False, False, None]],
["ab", False, None, [False, False, False, None]],
["a[a-z]{2}", False, None, [True, False, False, None]],
["A[a-z]{1}", True, None, [False, False, False, None]],
# GH Issue: #56652
["abc$", False, None, [True, False, False, None]],
["abc\\$", False, None, [False, True, False, None]],
["Abc$", True, None, [False, False, False, None]],
["Abc\\$", True, None, [False, False, False, None]],
# https://github.com/pandas-dev/pandas/issues/61072
["(abc)|(abx)", True, None, [True, False, False, None]],
["((abc)|(abx))", True, None, [True, False, False, None]],
],
)
def test_str_fullmatch_extra_cases(any_string_dtype, pat, case, na, exp):
ser = Series(["abc", "abc$", "$abc", None], dtype=any_string_dtype)
result = ser.str.fullmatch(pat, case=case, na=na)

if any_string_dtype == "str":
# NaN propagates as False
exp[-1] = False
expected_dtype = bool
else:
expected_dtype = (
"object" if is_object_or_nan_string_dtype(any_string_dtype) else "boolean"
)
expected = Series([True, False, np.nan, False], dtype=expected_dtype)
expected = Series(exp, dtype=expected_dtype)
tm.assert_series_equal(result, expected)


# --------------------------------------------------------------------------------------
# str.findall
# --------------------------------------------------------------------------------------
Expand Down
Loading