-
-
Notifications
You must be signed in to change notification settings - Fork 19.4k
Fix/53098 numeric only disallow non bools #62632
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
ecf23e1
7c43c70
af1f222
11de335
4efdd18
1569dba
0e71323
a520e54
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -70,6 +70,7 @@ class providing the base-class of operations. | |
| doc, | ||
| ) | ||
| from pandas.util._exceptions import find_stack_level | ||
| from pandas.util._validators import validate_bool_kwarg | ||
|
|
||
| from pandas.core.dtypes.cast import ( | ||
| coerce_indexer_dtype, | ||
|
|
@@ -1756,6 +1757,16 @@ def _cython_agg_general( | |
| # Note: we never get here with how="ohlc" for DataFrameGroupBy; | ||
| # that goes through SeriesGroupBy | ||
|
|
||
| # validate numeric_only is strictly a bool (disallow None, ints, etc.) | ||
| # Deprecate passing None to numeric_only: warn now, error in a future | ||
| # release. See GH#53098. | ||
| from pandas.util._validators import deprecate_numeric_only_none | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do this import at the top of the file |
||
|
|
||
| deprecate_numeric_only_none(numeric_only, "numeric_only") | ||
| validate_bool_kwarg( | ||
| numeric_only, "numeric_only", none_allowed=True, int_allowed=False | ||
| ) | ||
|
|
||
| data = self._get_data_to_aggregate(numeric_only=numeric_only, name=how) | ||
|
|
||
| def array_func(values: ArrayLike) -> ArrayLike: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -850,6 +850,41 @@ def test_axis_1_sum_na(self, string_dtype_no_object, skipna, min_count): | |
| def test_sum_prod_nanops(self, method, unit, numeric_only): | ||
| idx = ["a", "b", "c"] | ||
| df = DataFrame({"a": [unit, unit], "b": [unit, np.nan], "c": [np.nan, np.nan]}) | ||
| # New behavior: numeric_only=None is deprecated; emit a warning but | ||
| # continue to accept it during the deprecation period. | ||
| if numeric_only is None: | ||
| from pandas import errors | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. top of the file, should probably be Pandas4Warning? |
||
|
|
||
| with tm.assert_produces_warning(errors.PandasFutureWarning): | ||
| # run the same checks as below while asserting we warned | ||
| result = getattr(df, method)(numeric_only=numeric_only) | ||
| expected = Series([unit, unit, unit], index=idx, dtype="float64") | ||
| tm.assert_series_equal(result, expected) | ||
|
|
||
| result = getattr(df, method)(numeric_only=numeric_only, min_count=1) | ||
| expected = Series([unit, unit, np.nan], index=idx) | ||
| tm.assert_series_equal(result, expected) | ||
|
|
||
| result = getattr(df, method)(numeric_only=numeric_only, min_count=0) | ||
| expected = Series([unit, unit, unit], index=idx, dtype="float64") | ||
| tm.assert_series_equal(result, expected) | ||
|
|
||
| result = getattr(df.iloc[1:], method)( | ||
| numeric_only=numeric_only, min_count=1 | ||
| ) | ||
| expected = Series([unit, np.nan, np.nan], index=idx) | ||
| tm.assert_series_equal(result, expected) | ||
|
|
||
| # min_count > 1 cases | ||
| df2 = DataFrame({"A": [unit] * 10, "B": [unit] * 5 + [np.nan] * 5}) | ||
| result = getattr(df2, method)(numeric_only=numeric_only, min_count=5) | ||
| expected = Series(result, index=["A", "B"]) | ||
| tm.assert_series_equal(result, expected) | ||
|
|
||
| result = getattr(df2, method)(numeric_only=numeric_only, min_count=6) | ||
| expected = Series(result, index=["A", "B"]) | ||
| tm.assert_series_equal(result, expected) | ||
| return | ||
| # The default | ||
| result = getattr(df, method)(numeric_only=numeric_only) | ||
| expected = Series([unit, unit, unit], index=idx, dtype="float64") | ||
|
|
@@ -1757,8 +1792,14 @@ def test_any_all_categorical_dtype_nuisance_column(self, all_boolean_reductions) | |
| with pytest.raises(TypeError, match="does not support operation"): | ||
| getattr(df, all_boolean_reductions)(bool_only=False) | ||
|
|
||
| with pytest.raises(TypeError, match="does not support operation"): | ||
| getattr(df, all_boolean_reductions)(bool_only=None) | ||
| # With the deprecation in place, passing None should emit a | ||
| # PandasFutureWarning and then the operation should raise the | ||
| # original TypeError. Capture both. | ||
| from pandas import errors | ||
|
|
||
| with tm.assert_produces_warning(errors.PandasFutureWarning): | ||
| with pytest.raises(TypeError, match="does not support operation"): | ||
| getattr(df, all_boolean_reductions)(bool_only=None) | ||
|
|
||
| with pytest.raises(TypeError, match="does not support operation"): | ||
| getattr(np, all_boolean_reductions)(df, axis=0) | ||
|
|
@@ -1995,6 +2036,20 @@ def test_minmax_extensionarray(method, numeric_only): | |
| int64_info = np.iinfo("int64") | ||
| ser = Series([int64_info.max, None, int64_info.min], dtype=pd.Int64Dtype()) | ||
| df = DataFrame({"Int64": ser}) | ||
| # New behavior: numeric_only=None is deprecated; emit a warning but | ||
| # continue to accept it during the deprecation period. | ||
| if numeric_only is None: | ||
| from pandas import errors | ||
|
|
||
| with tm.assert_produces_warning(errors.PandasFutureWarning): | ||
| result = getattr(df, method)(numeric_only=numeric_only) | ||
| expected = Series( | ||
| [getattr(int64_info, method)], | ||
| dtype="Int64", | ||
| index=Index(["Int64"]), | ||
| ) | ||
| tm.assert_series_equal(result, expected) | ||
| return | ||
| result = getattr(df, method)(numeric_only=numeric_only) | ||
| expected = Series( | ||
| [getattr(int64_info, method)], | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -269,6 +269,9 @@ def validate_bool_kwarg( | |
| return value | ||
|
|
||
|
|
||
| # deprecate_numeric_only_none defined later in file | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not needed |
||
|
|
||
|
|
||
| def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = True): | ||
| """ | ||
| Validate the keyword arguments to 'fillna'. | ||
|
|
@@ -341,6 +344,31 @@ def validate_percentile(q: float | Iterable[float]) -> np.ndarray: | |
| return q_arr | ||
|
|
||
|
|
||
| def deprecate_numeric_only_none(value: BoolishNoneT, arg_name: str) -> BoolishNoneT: | ||
| """ | ||
| Deprecation helper for the "numeric_only" argument when value is None. | ||
|
|
||
| If ``value`` is ``None``, emit a PandasFutureWarning indicating that | ||
| passing ``None`` for ``numeric_only`` is deprecated and will be an error | ||
| in a future version. Return the input value unchanged. | ||
|
|
||
| This helper allows a warn-first / error-later migration strategy during | ||
| the 2.x release cycle: callers can call this to warn users for now, and | ||
| later releases should enforce strict bool-only semantics. | ||
| """ | ||
| import warnings | ||
|
|
||
| from pandas import errors | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. top of the file |
||
|
|
||
| if value is None: | ||
| msg = ( | ||
| f'Passing None for "{arg_name}" is deprecated and will raise a ' | ||
| "ValueError in a future version; please pass True or False." | ||
| ) | ||
| warnings.warn(msg, errors.PandasFutureWarning, stacklevel=2) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use stacklevel=find_stack_level() |
||
| return value | ||
|
|
||
|
|
||
| @overload | ||
| def validate_ascending(ascending: BoolishT) -> BoolishT: ... | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this comment can go in the implementation of deprecate_numeric_only_none, is not needed at every place where it is called