diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 2817945c55a86..78910fa843a0a 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -518,6 +518,7 @@ Other Deprecations - Deprecated ``axis=1`` in :meth:`DataFrame.ewm`, :meth:`DataFrame.rolling`, :meth:`DataFrame.expanding`, transpose before calling the method instead (:issue:`51778`) - Deprecated ``axis=1`` in :meth:`DataFrame.groupby` and in :class:`Grouper` constructor, do ``frame.T.groupby(...)`` instead (:issue:`51203`) - Deprecated ``broadcast_axis`` keyword in :meth:`Series.align` and :meth:`DataFrame.align`, upcast before calling ``align`` with ``left = DataFrame({col: left for col in right.columns}, index=right.index)`` (:issue:`51856`) + - Deprecated passing ``numeric_only=None`` to reduction APIs (DataFrame/Series/GroupBy/Window). During the 2.x cycle pandas will emit a ``pandas.errors.PandasFutureWarning`` when ``numeric_only=None`` is used; in a future release this will raise a ``ValueError``. Please pass ``True`` or ``False`` explicitly (:issue:`53098`). - Deprecated ``downcast`` keyword in :meth:`Index.fillna` (:issue:`53956`) - Deprecated ``fill_method`` and ``limit`` keywords in :meth:`DataFrame.pct_change`, :meth:`Series.pct_change`, :meth:`.DataFrameGroupBy.pct_change`, and :meth:`.SeriesGroupBy.pct_change`, explicitly call e.g. :meth:`DataFrame.ffill` or :meth:`DataFrame.bfill` before calling ``pct_change`` instead (:issue:`53491`) - Deprecated ``method``, ``limit``, and ``fill_axis`` keywords in :meth:`DataFrame.align` and :meth:`Series.align`, explicitly call :meth:`DataFrame.fillna` or :meth:`Series.fillna` on the alignment results instead (:issue:`51856`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 643974db5f2bf..9760877c318c9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -11926,6 +11926,18 @@ def _reduce( if axis is not None: axis = self._get_axis_number(axis) + # validate numeric_only is strictly a bool (disallow None, ints, etc.) + # Deprecate passing None to numeric_only: warn now, error in a future + # release. See GH#53098. + from pandas.util._validators import deprecate_numeric_only_none + + deprecate_numeric_only_none(numeric_only, "numeric_only") + # During the 2.x cycle we warn if numeric_only is None but continue to + # accept None; later releases should set none_allowed=False to raise. + validate_bool_kwarg( + numeric_only, "numeric_only", none_allowed=True, int_allowed=False + ) + def func(values: np.ndarray): # We only use this in the case that operates on self.values return op(values, axis=axis, skipna=skipna, **kwds) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index fe7bf5bbc4c2c..0a35722e9a885 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -70,6 +70,7 @@ class providing the base-class of operations. doc, ) from pandas.util._exceptions import find_stack_level +from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.cast import ( coerce_indexer_dtype, @@ -1756,6 +1757,16 @@ def _cython_agg_general( # Note: we never get here with how="ohlc" for DataFrameGroupBy; # that goes through SeriesGroupBy + # validate numeric_only is strictly a bool (disallow None, ints, etc.) + # Deprecate passing None to numeric_only: warn now, error in a future + # release. See GH#53098. + from pandas.util._validators import deprecate_numeric_only_none + + deprecate_numeric_only_none(numeric_only, "numeric_only") + validate_bool_kwarg( + numeric_only, "numeric_only", none_allowed=True, int_allowed=False + ) + data = self._get_data_to_aggregate(numeric_only=numeric_only, name=how) def array_func(values: ArrayLike) -> ArrayLike: diff --git a/pandas/core/series.py b/pandas/core/series.py index a5c3bb8d51e8a..2b6a139b1f018 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -6664,6 +6664,16 @@ def _reduce( if axis is not None: self._get_axis_number(axis) + # validate numeric_only is strictly a bool (disallow None, ints, etc.) + # Deprecate passing None to numeric_only: warn now, error in a future + # release. See GH#53098. + from pandas.util._validators import deprecate_numeric_only_none + + deprecate_numeric_only_none(numeric_only, "numeric_only") + validate_bool_kwarg( + numeric_only, "numeric_only", none_allowed=True, int_allowed=False + ) + if isinstance(delegate, ExtensionArray): # dispatch to ExtensionArray interface return delegate._reduce(name, skipna=skipna, **kwds) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 5e16e02e8db6d..13cfbbc2814df 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -35,6 +35,7 @@ Substitution, doc, ) +from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.common import ( ensure_float64, @@ -254,6 +255,16 @@ def _validate_numeric_only(self, name: str, numeric_only: bool) -> None: numeric_only : bool Value passed by user. """ + # validate numeric_only argument type + # Deprecate passing None to numeric_only: warn now, error in a future + # release. See GH#53098. + from pandas.util._validators import deprecate_numeric_only_none + + deprecate_numeric_only_none(numeric_only, "numeric_only") + validate_bool_kwarg( + numeric_only, "numeric_only", none_allowed=True, int_allowed=False + ) + if ( self._selected_obj.ndim == 1 and numeric_only diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 7c4ce4c67f13d..b43f96a03aa96 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -850,6 +850,41 @@ def test_axis_1_sum_na(self, string_dtype_no_object, skipna, min_count): def test_sum_prod_nanops(self, method, unit, numeric_only): idx = ["a", "b", "c"] df = DataFrame({"a": [unit, unit], "b": [unit, np.nan], "c": [np.nan, np.nan]}) + # New behavior: numeric_only=None is deprecated; emit a warning but + # continue to accept it during the deprecation period. + if numeric_only is None: + from pandas import errors + + with tm.assert_produces_warning(errors.PandasFutureWarning): + # run the same checks as below while asserting we warned + result = getattr(df, method)(numeric_only=numeric_only) + expected = Series([unit, unit, unit], index=idx, dtype="float64") + tm.assert_series_equal(result, expected) + + result = getattr(df, method)(numeric_only=numeric_only, min_count=1) + expected = Series([unit, unit, np.nan], index=idx) + tm.assert_series_equal(result, expected) + + result = getattr(df, method)(numeric_only=numeric_only, min_count=0) + expected = Series([unit, unit, unit], index=idx, dtype="float64") + tm.assert_series_equal(result, expected) + + result = getattr(df.iloc[1:], method)( + numeric_only=numeric_only, min_count=1 + ) + expected = Series([unit, np.nan, np.nan], index=idx) + tm.assert_series_equal(result, expected) + + # min_count > 1 cases + df2 = DataFrame({"A": [unit] * 10, "B": [unit] * 5 + [np.nan] * 5}) + result = getattr(df2, method)(numeric_only=numeric_only, min_count=5) + expected = Series(result, index=["A", "B"]) + tm.assert_series_equal(result, expected) + + result = getattr(df2, method)(numeric_only=numeric_only, min_count=6) + expected = Series(result, index=["A", "B"]) + tm.assert_series_equal(result, expected) + return # The default result = getattr(df, method)(numeric_only=numeric_only) expected = Series([unit, unit, unit], index=idx, dtype="float64") @@ -1757,8 +1792,14 @@ def test_any_all_categorical_dtype_nuisance_column(self, all_boolean_reductions) with pytest.raises(TypeError, match="does not support operation"): getattr(df, all_boolean_reductions)(bool_only=False) - with pytest.raises(TypeError, match="does not support operation"): - getattr(df, all_boolean_reductions)(bool_only=None) + # With the deprecation in place, passing None should emit a + # PandasFutureWarning and then the operation should raise the + # original TypeError. Capture both. + from pandas import errors + + with tm.assert_produces_warning(errors.PandasFutureWarning): + with pytest.raises(TypeError, match="does not support operation"): + getattr(df, all_boolean_reductions)(bool_only=None) with pytest.raises(TypeError, match="does not support operation"): getattr(np, all_boolean_reductions)(df, axis=0) @@ -1995,6 +2036,20 @@ def test_minmax_extensionarray(method, numeric_only): int64_info = np.iinfo("int64") ser = Series([int64_info.max, None, int64_info.min], dtype=pd.Int64Dtype()) df = DataFrame({"Int64": ser}) + # New behavior: numeric_only=None is deprecated; emit a warning but + # continue to accept it during the deprecation period. + if numeric_only is None: + from pandas import errors + + with tm.assert_produces_warning(errors.PandasFutureWarning): + result = getattr(df, method)(numeric_only=numeric_only) + expected = Series( + [getattr(int64_info, method)], + dtype="Int64", + index=Index(["Int64"]), + ) + tm.assert_series_equal(result, expected) + return result = getattr(df, method)(numeric_only=numeric_only) expected = Series( [getattr(int64_info, method)], diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index f7e878b0633d3..30fd56e7303e3 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -269,6 +269,9 @@ def validate_bool_kwarg( return value +# deprecate_numeric_only_none defined later in file + + def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = True): """ Validate the keyword arguments to 'fillna'. @@ -341,6 +344,31 @@ def validate_percentile(q: float | Iterable[float]) -> np.ndarray: return q_arr +def deprecate_numeric_only_none(value: BoolishNoneT, arg_name: str) -> BoolishNoneT: + """ + Deprecation helper for the "numeric_only" argument when value is None. + + If ``value`` is ``None``, emit a PandasFutureWarning indicating that + passing ``None`` for ``numeric_only`` is deprecated and will be an error + in a future version. Return the input value unchanged. + + This helper allows a warn-first / error-later migration strategy during + the 2.x release cycle: callers can call this to warn users for now, and + later releases should enforce strict bool-only semantics. + """ + import warnings + + from pandas import errors + + if value is None: + msg = ( + f'Passing None for "{arg_name}" is deprecated and will raise a ' + "ValueError in a future version; please pass True or False." + ) + warnings.warn(msg, errors.PandasFutureWarning, stacklevel=2) + return value + + @overload def validate_ascending(ascending: BoolishT) -> BoolishT: ...