From ecf23e1b6a4b34ff7f1795c9676965146bdd87b1 Mon Sep 17 00:00:00 2001 From: divya1974 Date: Wed, 8 Oct 2025 12:48:38 -0400 Subject: [PATCH 1/6] performance improved approach --- pandas/core/algorithms.py | 29 ++++++++++++++++++++++++ pandas/tests/series/methods/test_isin.py | 10 ++++++++ 2 files changed, 39 insertions(+) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index bbca78459ca75..502363bdfad03 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -579,6 +579,35 @@ def f(c, v): f = lambda a, b: np.isin(a, b).ravel() else: + # Fast-path for integer mixes: if both sides are integer-kind and + # have different dtypes, avoid upcasting to float64 (which loses + # precision for large 64-bit integers). When possible, perform the + # comparison in unsigned 64-bit space which preserves exact integer + # equality and uses the integer hashtable for performance. + if ( + values.dtype.kind in "iu" + and comps_array.dtype.kind in "iu" + and not is_dtype_equal(values.dtype, comps_array.dtype) + ): + try: + # only proceed when both arrays are non-empty + if values.size > 0 and comps_array.size > 0: + signed_negative = False + if values.dtype.kind == "i": + # using min is vectorized and fast + signed_negative = values.min() < 0 + if comps_array.dtype.kind == "i": + signed_negative = signed_negative or (comps_array.min() < 0) + + if not signed_negative: + # safe to cast both to uint64 for exact comparison + values_u = values.astype("uint64", copy=False) + comps_u = comps_array.astype("uint64", copy=False) + return htable.ismember(comps_u, values_u) + except Exception: + # fall back to generic behavior on any error + pass + common = np_find_common_type(values.dtype, comps_array.dtype) values = values.astype(common, copy=False) comps_array = comps_array.astype(common, copy=False) diff --git a/pandas/tests/series/methods/test_isin.py b/pandas/tests/series/methods/test_isin.py index 4f8484252ba8f..4e60dd592f5b9 100644 --- a/pandas/tests/series/methods/test_isin.py +++ b/pandas/tests/series/methods/test_isin.py @@ -267,3 +267,13 @@ def test_isin_filtering_on_iterable(data, isin): expected_result = Series([True, True, False]) tm.assert_series_equal(result, expected_result) + + +def test_isin_int64_vs_uint64_mismatch(): + # Regression test for mixing signed int64 Series with uint64 values + ser = Series([1378774140726870442], dtype=np.int64) + vals = [np.uint64(1378774140726870528)] + + res = ser.isin(vals) + expected = Series([False]) + tm.assert_series_equal(res, expected) From 7c43c70a18bfce1f606a234458c17fd8c7fb1654 Mon Sep 17 00:00:00 2001 From: divya1974 Date: Wed, 8 Oct 2025 12:56:11 -0400 Subject: [PATCH 2/6] Revert "performance improved approach" This reverts commit ecf23e1b6a4b34ff7f1795c9676965146bdd87b1. --- pandas/core/algorithms.py | 29 ------------------------ pandas/tests/series/methods/test_isin.py | 10 -------- 2 files changed, 39 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 502363bdfad03..bbca78459ca75 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -579,35 +579,6 @@ def f(c, v): f = lambda a, b: np.isin(a, b).ravel() else: - # Fast-path for integer mixes: if both sides are integer-kind and - # have different dtypes, avoid upcasting to float64 (which loses - # precision for large 64-bit integers). When possible, perform the - # comparison in unsigned 64-bit space which preserves exact integer - # equality and uses the integer hashtable for performance. - if ( - values.dtype.kind in "iu" - and comps_array.dtype.kind in "iu" - and not is_dtype_equal(values.dtype, comps_array.dtype) - ): - try: - # only proceed when both arrays are non-empty - if values.size > 0 and comps_array.size > 0: - signed_negative = False - if values.dtype.kind == "i": - # using min is vectorized and fast - signed_negative = values.min() < 0 - if comps_array.dtype.kind == "i": - signed_negative = signed_negative or (comps_array.min() < 0) - - if not signed_negative: - # safe to cast both to uint64 for exact comparison - values_u = values.astype("uint64", copy=False) - comps_u = comps_array.astype("uint64", copy=False) - return htable.ismember(comps_u, values_u) - except Exception: - # fall back to generic behavior on any error - pass - common = np_find_common_type(values.dtype, comps_array.dtype) values = values.astype(common, copy=False) comps_array = comps_array.astype(common, copy=False) diff --git a/pandas/tests/series/methods/test_isin.py b/pandas/tests/series/methods/test_isin.py index 4e60dd592f5b9..4f8484252ba8f 100644 --- a/pandas/tests/series/methods/test_isin.py +++ b/pandas/tests/series/methods/test_isin.py @@ -267,13 +267,3 @@ def test_isin_filtering_on_iterable(data, isin): expected_result = Series([True, True, False]) tm.assert_series_equal(result, expected_result) - - -def test_isin_int64_vs_uint64_mismatch(): - # Regression test for mixing signed int64 Series with uint64 values - ser = Series([1378774140726870442], dtype=np.int64) - vals = [np.uint64(1378774140726870528)] - - res = ser.isin(vals) - expected = Series([False]) - tm.assert_series_equal(res, expected) From af1f2228348be9c0bbce65f797aa3fe64b1d0d01 Mon Sep 17 00:00:00 2001 From: divya1974 Date: Wed, 8 Oct 2025 19:43:35 -0400 Subject: [PATCH 3/6] DEPR: add deprecate_numeric_only_none helper (warn on numeric_only=None during 2.x)\n\nAdds a centralized helper that emits a PandasFutureWarning for numeric_only=None to support a warn-first migration (GH#53098). --- pandas/util/_validators.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index f7e878b0633d3..30fd56e7303e3 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -269,6 +269,9 @@ def validate_bool_kwarg( return value +# deprecate_numeric_only_none defined later in file + + def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = True): """ Validate the keyword arguments to 'fillna'. @@ -341,6 +344,31 @@ def validate_percentile(q: float | Iterable[float]) -> np.ndarray: return q_arr +def deprecate_numeric_only_none(value: BoolishNoneT, arg_name: str) -> BoolishNoneT: + """ + Deprecation helper for the "numeric_only" argument when value is None. + + If ``value`` is ``None``, emit a PandasFutureWarning indicating that + passing ``None`` for ``numeric_only`` is deprecated and will be an error + in a future version. Return the input value unchanged. + + This helper allows a warn-first / error-later migration strategy during + the 2.x release cycle: callers can call this to warn users for now, and + later releases should enforce strict bool-only semantics. + """ + import warnings + + from pandas import errors + + if value is None: + msg = ( + f'Passing None for "{arg_name}" is deprecated and will raise a ' + "ValueError in a future version; please pass True or False." + ) + warnings.warn(msg, errors.PandasFutureWarning, stacklevel=2) + return value + + @overload def validate_ascending(ascending: BoolishT) -> BoolishT: ... From 11de335e33de2aec988107109a9ffb9cb3519bbe Mon Sep 17 00:00:00 2001 From: divya1974 Date: Wed, 8 Oct 2025 19:43:55 -0400 Subject: [PATCH 4/6] ENH: deprecate numeric_only=None in reductions during 2.x\n\nCall deprecate_numeric_only_none and validate_bool_kwarg at central reduction entry points (DataFrame._reduce, Series._reduce, GroupBy._cython_agg_general, Rolling._validate_numeric_only). Emit PandasFutureWarning for numeric_only=None (GH#53098). --- pandas/core/frame.py | 12 ++++++++++++ pandas/core/groupby/groupby.py | 11 +++++++++++ pandas/core/series.py | 10 ++++++++++ pandas/core/window/rolling.py | 11 +++++++++++ 4 files changed, 44 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 643974db5f2bf..9760877c318c9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -11926,6 +11926,18 @@ def _reduce( if axis is not None: axis = self._get_axis_number(axis) + # validate numeric_only is strictly a bool (disallow None, ints, etc.) + # Deprecate passing None to numeric_only: warn now, error in a future + # release. See GH#53098. + from pandas.util._validators import deprecate_numeric_only_none + + deprecate_numeric_only_none(numeric_only, "numeric_only") + # During the 2.x cycle we warn if numeric_only is None but continue to + # accept None; later releases should set none_allowed=False to raise. + validate_bool_kwarg( + numeric_only, "numeric_only", none_allowed=True, int_allowed=False + ) + def func(values: np.ndarray): # We only use this in the case that operates on self.values return op(values, axis=axis, skipna=skipna, **kwds) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index fe7bf5bbc4c2c..0a35722e9a885 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -70,6 +70,7 @@ class providing the base-class of operations. doc, ) from pandas.util._exceptions import find_stack_level +from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.cast import ( coerce_indexer_dtype, @@ -1756,6 +1757,16 @@ def _cython_agg_general( # Note: we never get here with how="ohlc" for DataFrameGroupBy; # that goes through SeriesGroupBy + # validate numeric_only is strictly a bool (disallow None, ints, etc.) + # Deprecate passing None to numeric_only: warn now, error in a future + # release. See GH#53098. + from pandas.util._validators import deprecate_numeric_only_none + + deprecate_numeric_only_none(numeric_only, "numeric_only") + validate_bool_kwarg( + numeric_only, "numeric_only", none_allowed=True, int_allowed=False + ) + data = self._get_data_to_aggregate(numeric_only=numeric_only, name=how) def array_func(values: ArrayLike) -> ArrayLike: diff --git a/pandas/core/series.py b/pandas/core/series.py index a5c3bb8d51e8a..2b6a139b1f018 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -6664,6 +6664,16 @@ def _reduce( if axis is not None: self._get_axis_number(axis) + # validate numeric_only is strictly a bool (disallow None, ints, etc.) + # Deprecate passing None to numeric_only: warn now, error in a future + # release. See GH#53098. + from pandas.util._validators import deprecate_numeric_only_none + + deprecate_numeric_only_none(numeric_only, "numeric_only") + validate_bool_kwarg( + numeric_only, "numeric_only", none_allowed=True, int_allowed=False + ) + if isinstance(delegate, ExtensionArray): # dispatch to ExtensionArray interface return delegate._reduce(name, skipna=skipna, **kwds) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 5e16e02e8db6d..13cfbbc2814df 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -35,6 +35,7 @@ Substitution, doc, ) +from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.common import ( ensure_float64, @@ -254,6 +255,16 @@ def _validate_numeric_only(self, name: str, numeric_only: bool) -> None: numeric_only : bool Value passed by user. """ + # validate numeric_only argument type + # Deprecate passing None to numeric_only: warn now, error in a future + # release. See GH#53098. + from pandas.util._validators import deprecate_numeric_only_none + + deprecate_numeric_only_none(numeric_only, "numeric_only") + validate_bool_kwarg( + numeric_only, "numeric_only", none_allowed=True, int_allowed=False + ) + if ( self._selected_obj.ndim == 1 and numeric_only From 4efdd18ad7b4e368582e55edc7626c24d538ea84 Mon Sep 17 00:00:00 2001 From: divya1974 Date: Wed, 8 Oct 2025 19:45:11 -0400 Subject: [PATCH 5/6] TST: update frame reductions tests to assert PandasFutureWarning for numeric_only=None using tm.assert_produces_warning --- pandas/tests/frame/test_reductions.py | 59 ++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 7c4ce4c67f13d..b43f96a03aa96 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -850,6 +850,41 @@ def test_axis_1_sum_na(self, string_dtype_no_object, skipna, min_count): def test_sum_prod_nanops(self, method, unit, numeric_only): idx = ["a", "b", "c"] df = DataFrame({"a": [unit, unit], "b": [unit, np.nan], "c": [np.nan, np.nan]}) + # New behavior: numeric_only=None is deprecated; emit a warning but + # continue to accept it during the deprecation period. + if numeric_only is None: + from pandas import errors + + with tm.assert_produces_warning(errors.PandasFutureWarning): + # run the same checks as below while asserting we warned + result = getattr(df, method)(numeric_only=numeric_only) + expected = Series([unit, unit, unit], index=idx, dtype="float64") + tm.assert_series_equal(result, expected) + + result = getattr(df, method)(numeric_only=numeric_only, min_count=1) + expected = Series([unit, unit, np.nan], index=idx) + tm.assert_series_equal(result, expected) + + result = getattr(df, method)(numeric_only=numeric_only, min_count=0) + expected = Series([unit, unit, unit], index=idx, dtype="float64") + tm.assert_series_equal(result, expected) + + result = getattr(df.iloc[1:], method)( + numeric_only=numeric_only, min_count=1 + ) + expected = Series([unit, np.nan, np.nan], index=idx) + tm.assert_series_equal(result, expected) + + # min_count > 1 cases + df2 = DataFrame({"A": [unit] * 10, "B": [unit] * 5 + [np.nan] * 5}) + result = getattr(df2, method)(numeric_only=numeric_only, min_count=5) + expected = Series(result, index=["A", "B"]) + tm.assert_series_equal(result, expected) + + result = getattr(df2, method)(numeric_only=numeric_only, min_count=6) + expected = Series(result, index=["A", "B"]) + tm.assert_series_equal(result, expected) + return # The default result = getattr(df, method)(numeric_only=numeric_only) expected = Series([unit, unit, unit], index=idx, dtype="float64") @@ -1757,8 +1792,14 @@ def test_any_all_categorical_dtype_nuisance_column(self, all_boolean_reductions) with pytest.raises(TypeError, match="does not support operation"): getattr(df, all_boolean_reductions)(bool_only=False) - with pytest.raises(TypeError, match="does not support operation"): - getattr(df, all_boolean_reductions)(bool_only=None) + # With the deprecation in place, passing None should emit a + # PandasFutureWarning and then the operation should raise the + # original TypeError. Capture both. + from pandas import errors + + with tm.assert_produces_warning(errors.PandasFutureWarning): + with pytest.raises(TypeError, match="does not support operation"): + getattr(df, all_boolean_reductions)(bool_only=None) with pytest.raises(TypeError, match="does not support operation"): getattr(np, all_boolean_reductions)(df, axis=0) @@ -1995,6 +2036,20 @@ def test_minmax_extensionarray(method, numeric_only): int64_info = np.iinfo("int64") ser = Series([int64_info.max, None, int64_info.min], dtype=pd.Int64Dtype()) df = DataFrame({"Int64": ser}) + # New behavior: numeric_only=None is deprecated; emit a warning but + # continue to accept it during the deprecation period. + if numeric_only is None: + from pandas import errors + + with tm.assert_produces_warning(errors.PandasFutureWarning): + result = getattr(df, method)(numeric_only=numeric_only) + expected = Series( + [getattr(int64_info, method)], + dtype="Int64", + index=Index(["Int64"]), + ) + tm.assert_series_equal(result, expected) + return result = getattr(df, method)(numeric_only=numeric_only) expected = Series( [getattr(int64_info, method)], From 1569dbafcf0d613e8bf48d7a4f246b1e7bac1c8f Mon Sep 17 00:00:00 2001 From: divya1974 Date: Wed, 8 Oct 2025 19:45:18 -0400 Subject: [PATCH 6/6] =?UTF-8?q?DOC:=20whatsnew=20note=20=E2=80=94=20deprec?= =?UTF-8?q?ate=20numeric=5Fonly=3DNone=20in=20reductions=20during=202.x=20?= =?UTF-8?q?(GH#53098)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- doc/source/whatsnew/v2.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 2817945c55a86..78910fa843a0a 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -518,6 +518,7 @@ Other Deprecations - Deprecated ``axis=1`` in :meth:`DataFrame.ewm`, :meth:`DataFrame.rolling`, :meth:`DataFrame.expanding`, transpose before calling the method instead (:issue:`51778`) - Deprecated ``axis=1`` in :meth:`DataFrame.groupby` and in :class:`Grouper` constructor, do ``frame.T.groupby(...)`` instead (:issue:`51203`) - Deprecated ``broadcast_axis`` keyword in :meth:`Series.align` and :meth:`DataFrame.align`, upcast before calling ``align`` with ``left = DataFrame({col: left for col in right.columns}, index=right.index)`` (:issue:`51856`) + - Deprecated passing ``numeric_only=None`` to reduction APIs (DataFrame/Series/GroupBy/Window). During the 2.x cycle pandas will emit a ``pandas.errors.PandasFutureWarning`` when ``numeric_only=None`` is used; in a future release this will raise a ``ValueError``. Please pass ``True`` or ``False`` explicitly (:issue:`53098`). - Deprecated ``downcast`` keyword in :meth:`Index.fillna` (:issue:`53956`) - Deprecated ``fill_method`` and ``limit`` keywords in :meth:`DataFrame.pct_change`, :meth:`Series.pct_change`, :meth:`.DataFrameGroupBy.pct_change`, and :meth:`.SeriesGroupBy.pct_change`, explicitly call e.g. :meth:`DataFrame.ffill` or :meth:`DataFrame.bfill` before calling ``pct_change`` instead (:issue:`53491`) - Deprecated ``method``, ``limit``, and ``fill_axis`` keywords in :meth:`DataFrame.align` and :meth:`Series.align`, explicitly call :meth:`DataFrame.fillna` or :meth:`Series.fillna` on the alignment results instead (:issue:`51856`)