From d593ffabaa23d716b8f2f2872d3dd78b62bdc97b Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 21 Sep 2025 09:13:33 -0400 Subject: [PATCH 1/2] CLN: Enforce deprecation of not validating argument --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/arrays/string_arrow.py | 15 ++------- pandas/core/strings/object_array.py | 33 +++--------------- pandas/tests/strings/test_find_replace.py | 41 +++++++---------------- pandas/util/_validators.py | 32 ++++++++++++++++++ 5 files changed, 52 insertions(+), 70 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 9210f1e0082f0..d8ceca892e8b0 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -744,6 +744,7 @@ Other Removals - Enforce deprecation in :func:`testing.assert_series_equal` and :func:`testing.assert_frame_equal` with object dtype and mismatched null-like values, which are now considered not-equal (:issue:`18463`) - Enforce banning of upcasting in in-place setitem-like operations (:issue:`59007`) (see `PDEP6 `_) - Enforced deprecation ``all`` and ``any`` reductions with ``datetime64``, :class:`DatetimeTZDtype`, and :class:`PeriodDtype` dtypes (:issue:`58029`) +- Enforced deprecation allowing non-``bool`` and NA values for ``na`` in :meth:`.str.contains`, :meth:`.str.startswith`, and :meth:`.str.endswith` (:issue:`59615`) - Enforced deprecation disallowing ``float`` "periods" in :func:`date_range`, :func:`period_range`, :func:`timedelta_range`, :func:`interval_range`, (:issue:`56036`) - Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`) - Enforced deprecation in :meth:`Series.value_counts` and :meth:`Index.value_counts` with object dtype performing dtype inference on the ``.index`` of the result (:issue:`56161`) diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 9046d83dcc09f..37fc608ebcebb 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -6,7 +6,6 @@ TYPE_CHECKING, Self, ) -import warnings import numpy as np @@ -19,7 +18,7 @@ PYARROW_MIN_VERSION, pa_version_under16p0, ) -from pandas.util._exceptions import find_stack_level +from pandas.util._validators import validate_na_arg from pandas.core.dtypes.common import ( is_scalar, @@ -242,17 +241,7 @@ def insert(self, loc: int, item) -> ArrowStringArray: return super().insert(loc, item) def _convert_bool_result(self, values, na=lib.no_default, method_name=None): - if na is not lib.no_default and not isna(na) and not isinstance(na, bool): - # TODO: Enforce in 3.0 (#59615) - # GH#59561 - warnings.warn( - f"Allowing a non-bool 'na' in obj.str.{method_name} is deprecated " - "and will raise in a future version.", - FutureWarning, # pdlint: ignore[warning_class] - stacklevel=find_stack_level(), - ) - na = bool(na) - + validate_na_arg(na, name="na", allow_no_default=True, allow_bool=True) if self.dtype.na_value is np.nan: if na is lib.no_default or isna(na): # NaN propagates as False diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index ba35542b7f112..1a0b2ff140119 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -9,14 +9,13 @@ cast, ) import unicodedata -import warnings import numpy as np from pandas._libs import lib import pandas._libs.missing as libmissing import pandas._libs.ops as libops -from pandas.util._exceptions import find_stack_level +from pandas.util._validators import validate_na_arg from pandas.core.dtypes.common import pandas_dtype from pandas.core.dtypes.missing import isna @@ -145,6 +144,7 @@ def _str_contains( na=lib.no_default, regex: bool = True, ): + validate_na_arg(na, name="na", allow_no_default=True, allow_bool=True) if regex: if not case: flags |= re.IGNORECASE @@ -158,41 +158,16 @@ def _str_contains( else: upper_pat = pat.upper() f = lambda x: upper_pat in x.upper() - if na is not lib.no_default and not isna(na) and not isinstance(na, bool): - # TODO: Enforce in 3.0 (#59615) - # GH#59561 - warnings.warn( - "Allowing a non-bool 'na' in obj.str.contains is deprecated " - "and will raise in a future version.", - FutureWarning, # pdlint: ignore[warning_class] - stacklevel=find_stack_level(), - ) return self._str_map(f, na, dtype=np.dtype("bool")) def _str_startswith(self, pat, na=lib.no_default): + validate_na_arg(na, name="na", allow_no_default=True, allow_bool=True) f = lambda x: x.startswith(pat) - if na is not lib.no_default and not isna(na) and not isinstance(na, bool): - # TODO: Enforce in 3.0 (#59615) - # GH#59561 - warnings.warn( - "Allowing a non-bool 'na' in obj.str.startswith is deprecated " - "and will raise in a future version.", - FutureWarning, # pdlint: ignore[warning_class] - stacklevel=find_stack_level(), - ) return self._str_map(f, na_value=na, dtype=np.dtype(bool)) def _str_endswith(self, pat, na=lib.no_default): + validate_na_arg(na, name="na", allow_no_default=True, allow_bool=True) f = lambda x: x.endswith(pat) - if na is not lib.no_default and not isna(na) and not isinstance(na, bool): - # TODO: Enforce in 3.0 (#59615) - # GH#59561 - warnings.warn( - "Allowing a non-bool 'na' in obj.str.endswith is deprecated " - "and will raise in a future version.", - FutureWarning, # pdlint: ignore[warning_class] - stacklevel=find_stack_level(), - ) return self._str_map(f, na_value=na, dtype=np.dtype(bool)) def _str_replace( diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py index 027db8f5e9ec0..b07017896b1e1 100644 --- a/pandas/tests/strings/test_find_replace.py +++ b/pandas/tests/strings/test_find_replace.py @@ -175,17 +175,14 @@ def test_contains_na_kwarg_for_nullable_string_dtype( values = Series(["a", "b", "c", "a", np.nan], dtype=nullable_string_dtype) - msg = ( - "Allowing a non-bool 'na' in obj.str.contains is deprecated and " - "will raise in a future version" - ) - warn = None - if not pd.isna(na) and not isinstance(na, bool): - warn = FutureWarning - with tm.assert_produces_warning(warn, match=msg): + if na in [0, 3] and na is not False: + msg = f"na must be a valid NA value; got {na}" + with pytest.raises(ValueError, match=msg): + values.str.contains("a", na=na, regex=regex) + else: result = values.str.contains("a", na=na, regex=regex) - expected = Series([True, False, False, True, expected], dtype="boolean") - tm.assert_series_equal(result, expected) + expected = Series([True, False, False, True, expected], dtype="boolean") + tm.assert_series_equal(result, expected) def test_contains_moar(any_string_dtype): @@ -255,19 +252,9 @@ def test_contains_nan(any_string_dtype): expected = Series([True, True, True], dtype=expected_dtype) tm.assert_series_equal(result, expected) - msg = ( - "Allowing a non-bool 'na' in obj.str.contains is deprecated and " - "will raise in a future version" - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = s.str.contains("foo", na="foo") - if any_string_dtype == "object": - expected = Series(["foo", "foo", "foo"], dtype=np.object_) - elif any_string_dtype.na_value is np.nan: - expected = Series([True, True, True], dtype=np.bool_) - else: - expected = Series([True, True, True], dtype="boolean") - tm.assert_series_equal(result, expected) + msg = "na must be a valid NA value; got foo" + with pytest.raises(ValueError, match=msg): + s.str.contains("foo", na="foo") result = s.str.contains("foo") if any_string_dtype == "str": @@ -352,12 +339,10 @@ def test_startswith_endswith_validate_na(any_string_dtype): ["om", np.nan, "foo_nom", "nom", "bar_foo", np.nan, "foo"], dtype=any_string_dtype, ) - - msg = "Allowing a non-bool 'na' in obj.str.startswith is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + msg = "na must be a valid NA value; got baz" + with pytest.raises(ValueError, match=msg): ser.str.startswith("kapow", na="baz") - msg = "Allowing a non-bool 'na' in obj.str.endswith is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with pytest.raises(ValueError, match=msg): ser.str.endswith("bar", na="baz") diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index 9aab19fe340ec..2f8939b714d63 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -22,6 +22,7 @@ is_bool, is_integer, ) +from pandas.core.dtypes.missing import isna BoolishT = TypeVar("BoolishT", bool, int) BoolishNoneT = TypeVar("BoolishNoneT", bool, int, None) @@ -269,6 +270,37 @@ def validate_bool_kwarg( return value +def validate_na_arg( + value, name: str, allow_no_default: bool = False, allow_bool: bool = False +): + """ + Validate na arguments. + + Parameters + ---------- + value : object + Value to validate. + name : str + Name of the argument, used to raise an informative error message. + allow_no_default : bool, default False + Whether to allow ``value`` to be ``lib.no_default``. + allow_bool : bool, default False + Whether to allow ``value`` to be an instance of bool. + + Raises + ______ + ValueError + When ``value`` is determined to be invalid. + """ + if allow_no_default and value is lib.no_default: + return + if allow_bool and isinstance(value, bool): + return + if isna(value): + return + raise ValueError(f"{name} must be a valid NA value; got {value}") + + def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = True): """ Validate the keyword arguments to 'fillna'. From 1e8555d9b3d717b550c76cf0f72f3d6b51da2156 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Tue, 30 Sep 2025 17:35:43 -0400 Subject: [PATCH 2/2] Finish up --- pandas/core/arrays/string_arrow.py | 2 +- pandas/core/strings/object_array.py | 6 +++--- pandas/tests/strings/test_find_replace.py | 6 +++--- pandas/util/_validators.py | 16 +++------------- 4 files changed, 10 insertions(+), 20 deletions(-) diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 37fc608ebcebb..4545d35ba5dae 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -241,7 +241,7 @@ def insert(self, loc: int, item) -> ArrowStringArray: return super().insert(loc, item) def _convert_bool_result(self, values, na=lib.no_default, method_name=None): - validate_na_arg(na, name="na", allow_no_default=True, allow_bool=True) + validate_na_arg(na, name="na") if self.dtype.na_value is np.nan: if na is lib.no_default or isna(na): # NaN propagates as False diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index 1a0b2ff140119..21975cc1e0b5e 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -144,7 +144,7 @@ def _str_contains( na=lib.no_default, regex: bool = True, ): - validate_na_arg(na, name="na", allow_no_default=True, allow_bool=True) + validate_na_arg(na, name="na") if regex: if not case: flags |= re.IGNORECASE @@ -161,12 +161,12 @@ def _str_contains( return self._str_map(f, na, dtype=np.dtype("bool")) def _str_startswith(self, pat, na=lib.no_default): - validate_na_arg(na, name="na", allow_no_default=True, allow_bool=True) + validate_na_arg(na, name="na") f = lambda x: x.startswith(pat) return self._str_map(f, na_value=na, dtype=np.dtype(bool)) def _str_endswith(self, pat, na=lib.no_default): - validate_na_arg(na, name="na", allow_no_default=True, allow_bool=True) + validate_na_arg(na, name="na") f = lambda x: x.endswith(pat) return self._str_map(f, na_value=na, dtype=np.dtype(bool)) diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py index 914561513c1d4..f75492a84042c 100644 --- a/pandas/tests/strings/test_find_replace.py +++ b/pandas/tests/strings/test_find_replace.py @@ -176,7 +176,7 @@ def test_contains_na_kwarg_for_nullable_string_dtype( values = Series(["a", "b", "c", "a", np.nan], dtype=nullable_string_dtype) if na in [0, 3] and na is not False: - msg = f"na must be a valid NA value; got {na}" + msg = f"na must be an NA value, True, or False; got {na}" with pytest.raises(ValueError, match=msg): values.str.contains("a", na=na, regex=regex) else: @@ -252,7 +252,7 @@ def test_contains_nan(any_string_dtype): expected = Series([True, True, True], dtype=expected_dtype) tm.assert_series_equal(result, expected) - msg = "na must be a valid NA value; got foo" + msg = "na must be an NA value, True, or False; got foo" with pytest.raises(ValueError, match=msg): s.str.contains("foo", na="foo") @@ -339,7 +339,7 @@ def test_startswith_endswith_validate_na(any_string_dtype): ["om", np.nan, "foo_nom", "nom", "bar_foo", np.nan, "foo"], dtype=any_string_dtype, ) - msg = "na must be a valid NA value; got baz" + msg = "na must be an NA value, True, or False; got baz" with pytest.raises(ValueError, match=msg): ser.str.startswith("kapow", na="baz") with pytest.raises(ValueError, match=msg): diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index 2f8939b714d63..6e543bbcf1e45 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -270,9 +270,7 @@ def validate_bool_kwarg( return value -def validate_na_arg( - value, name: str, allow_no_default: bool = False, allow_bool: bool = False -): +def validate_na_arg(value, name: str): """ Validate na arguments. @@ -282,23 +280,15 @@ def validate_na_arg( Value to validate. name : str Name of the argument, used to raise an informative error message. - allow_no_default : bool, default False - Whether to allow ``value`` to be ``lib.no_default``. - allow_bool : bool, default False - Whether to allow ``value`` to be an instance of bool. Raises ______ ValueError When ``value`` is determined to be invalid. """ - if allow_no_default and value is lib.no_default: + if value is lib.no_default or isinstance(value, bool) or isna(value): return - if allow_bool and isinstance(value, bool): - return - if isna(value): - return - raise ValueError(f"{name} must be a valid NA value; got {value}") + raise ValueError(f"{name} must be an NA value, True, or False; got {value}") def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = True):