From 50ef24a44e82fac0d1a13d6f3890a861984760a5 Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Mon, 25 Aug 2025 01:48:19 +0530 Subject: [PATCH] DOC: Clarify Series.eq behaviour with missing values --- pandas/core/series.py | 53 ++++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index ce5b2e5ed8de5..6c37548d2b130 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -20,6 +20,7 @@ TYPE_CHECKING, Any, Literal, + Self, cast, overload, ) @@ -40,6 +41,7 @@ from pandas.errors import ( ChainedAssignmentError, InvalidIndexError, + Pandas4Warning, ) from pandas.errors.cow import ( _chained_assignment_method_msg, @@ -191,7 +193,6 @@ ReindexMethod, Renamer, Scalar, - Self, SortKind, StorageOptions, Suffixes, @@ -764,11 +765,13 @@ def values(self): array([1, 2, 3]) >>> pd.Series(list("aabc")).values - array(['a', 'a', 'b', 'c'], dtype=object) + + ['a', 'a', 'b', 'c'] + Length: 4, dtype: str >>> pd.Series(list("aabc")).astype("category").values ['a', 'a', 'b', 'c'] - Categories (3, object): ['a', 'b', 'c'] + Categories (3, str): ['a', 'b', 'c'] Timezone aware datetime data is converted to UTC: @@ -1471,7 +1474,7 @@ def to_string( ) -> None: ... @deprecate_nonkeyword_arguments( - version="4.0", allowed_args=["self", "buf"], name="to_string" + Pandas4Warning, allowed_args=["self", "buf"], name="to_string" ) def to_string( self, @@ -1629,7 +1632,7 @@ def to_markdown( ), ) @deprecate_nonkeyword_arguments( - version="4.0", allowed_args=["self", "buf"], name="to_markdown" + Pandas4Warning, allowed_args=["self", "buf"], name="to_markdown" ) def to_markdown( self, @@ -1970,7 +1973,7 @@ def groupby( as_index: bool = True, sort: bool = True, group_keys: bool = True, - observed: bool = False, + observed: bool = True, dropna: bool = True, ) -> SeriesGroupBy: from pandas.core.groupby.generic import SeriesGroupBy @@ -2144,12 +2147,12 @@ def unique(self) -> ArrayLike: >>> pd.Series(pd.Categorical(list("baabc"))).unique() ['b', 'a', 'c'] - Categories (3, object): ['a', 'b', 'c'] + Categories (3, str): ['a', 'b', 'c'] >>> pd.Series( ... pd.Categorical(list("baabc"), categories=list("abc"), ordered=True) ... ).unique() ['b', 'a', 'c'] - Categories (3, object): ['a' < 'b' < 'c'] + Categories (3, str): ['a' < 'b' < 'c'] """ return super().unique() @@ -3109,7 +3112,8 @@ def combine( Function that takes two scalars as inputs and returns an element. fill_value : scalar, optional The value to assume when an index is missing from - one Series or the other. The default specifies to use the + one Series or the other. Scalars are any value that is not a numpy.ndarray, + list, tuple or Series. The default specifies to use the appropriate NaN value for the underlying dtype of the Series. Returns @@ -6070,6 +6074,11 @@ def eq( Equivalent to ``series == other``, but with support to substitute a fill_value for missing data in either one of the inputs. + By default, comparisons with missing values (e.g. ``np.nan``, ``pd.NA``) will + return ``False`` for those positions, even when comparing missing values to + themselves. If ``fill_value`` is specified, missing values are replaced before + comparison. + Parameters ---------- other : Series or scalar value @@ -6683,7 +6692,7 @@ def any( # type: ignore[override] filter_type="bool", ) - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="all") + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="all") @Appender(make_doc("all", ndim=1)) def all( self, @@ -6703,7 +6712,7 @@ def all( filter_type="bool", ) - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="min") + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="min") def min( self, axis: Axis | None = 0, @@ -6774,7 +6783,7 @@ def min( self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="max") + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="max") def max( self, axis: Axis | None = 0, @@ -6845,7 +6854,7 @@ def max( self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="sum") + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="sum") def sum( self, axis: Axis | None = None, @@ -6946,7 +6955,7 @@ def sum( **kwargs, ) - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="prod") + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="prod") @doc(make_doc("prod", ndim=1)) def prod( self, @@ -6965,7 +6974,7 @@ def prod( **kwargs, ) - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="mean") + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="mean") def mean( self, axis: Axis | None = 0, @@ -7019,7 +7028,9 @@ def mean( self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="median") + @deprecate_nonkeyword_arguments( + Pandas4Warning, allowed_args=["self"], name="median" + ) def median( self, axis: Axis | None = 0, @@ -7100,7 +7111,7 @@ def median( self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="sem") + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="sem") @doc(make_doc("sem", ndim=1)) def sem( self, @@ -7119,7 +7130,7 @@ def sem( **kwargs, ) - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="var") + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="var") def var( self, axis: Axis | None = None, @@ -7206,7 +7217,7 @@ def var( **kwargs, ) - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="std") + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="std") @doc(make_doc("std", ndim=1)) def std( self, @@ -7225,7 +7236,7 @@ def std( **kwargs, ) - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="skew") + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="skew") @doc(make_doc("skew", ndim=1)) def skew( self, @@ -7238,7 +7249,7 @@ def skew( self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) - @deprecate_nonkeyword_arguments(version="4.0", allowed_args=["self"], name="kurt") + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="kurt") def kurt( self, axis: Axis | None = 0,