From da1288753fa9c5d32504d02577d425725a806bfb Mon Sep 17 00:00:00 2001 From: richard Date: Tue, 4 Nov 2025 17:42:37 -0500 Subject: [PATCH 1/8] DOC: Run all doctests --- .github/workflows/code-checks.yml | 2 ++ pandas/_libs/interval.pyx | 1 - pandas/_libs/missing.pyx | 1 - pandas/_libs/tslibs/nattype.pyx | 1 - pandas/_libs/tslibs/np_datetime.pyx | 4 +--- pandas/_libs/tslibs/offsets.pyx | 4 ++-- pandas/_libs/tslibs/period.pyx | 2 +- pandas/core/apply.py | 8 +++++--- pandas/core/arrays/arrow/array.py | 8 +++++--- pandas/core/arrays/base.py | 7 +++---- pandas/core/arrays/boolean.py | 3 +-- pandas/core/arrays/categorical.py | 4 ++-- pandas/core/arrays/datetimes.py | 4 ++-- pandas/core/arrays/floating.py | 3 +-- pandas/core/arrays/integer.py | 3 +-- pandas/core/arrays/interval.py | 4 ++-- pandas/core/arrays/numpy_.py | 3 ++- pandas/core/arrays/period.py | 4 ++-- pandas/core/arrays/sparse/array.py | 8 +++++--- pandas/core/arrays/string_.py | 3 +-- pandas/core/arrays/string_arrow.py | 4 ++-- pandas/core/arrays/timedeltas.py | 4 ++-- pandas/core/col.py | 3 +-- pandas/core/dtypes/base.py | 3 +-- pandas/core/flags.py | 5 +++-- pandas/core/groupby/grouper.py | 10 +++++++--- pandas/core/indexers/objects.py | 10 ++++------ pandas/core/indexes/frozen.py | 5 +++-- pandas/core/indexes/range.py | 2 -- pandas/core/interchange/dataframe_protocol.py | 5 +++-- pandas/core/resample.py | 16 ++++++---------- pandas/core/window/ewm.py | 11 ++++++----- pandas/core/window/expanding.py | 8 ++++---- pandas/core/window/rolling.py | 9 ++++----- pandas/io/json/_json.py | 3 +-- pandas/io/sas/sasreader.py | 3 +-- pandas/io/stata.py | 2 +- pandas/plotting/_core.py | 3 +-- pandas/util/_decorators.py | 14 ++++++++++---- pandas/util/version/__init__.py | 3 ++- 40 files changed, 100 insertions(+), 100 deletions(-) diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml index 82b1ef586e5dc..083c80b7c291b 100644 --- a/.github/workflows/code-checks.yml +++ b/.github/workflows/code-checks.yml @@ -57,6 +57,8 @@ jobs: run: sudo apt-get update && sudo apt-get install -y libegl1 libopengl0 - name: Run doctests + env: + PANDAS_SET_MODULE_DUNDER: 0 run: cd ci && ./code_checks.sh doctests if: ${{ steps.build.outcome == 'success' && always() }} diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index a1cd4c9d15447..ad32adf5d19f9 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -445,7 +445,6 @@ cdef class Interval(IntervalMixin): >>> interval.closed 'left' """ - __module__ = "pandas" def __init__(self, left, right, str closed="right"): # note: it is faster to just do these checks than to use a special diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index a67c533d03e0e..b24dd9d61589d 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -394,7 +394,6 @@ class NAType(C_NAType): True """ __module__ = "pandas.api.typing" - _instance = None def __new__(cls, *args, **kwargs): diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index a0265297fe873..fef104b9760af 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -371,7 +371,6 @@ class NaTType(_NaT): 0 2023-01-01 1 NaT """ - __module__ = "pandas.api.typing" def __new__(cls): diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 0fc7a6945d2e0..102ae21d55a9b 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -193,7 +193,6 @@ class OutOfBoundsDatetime(ValueError): at position 0 """ __module__ = "pandas.errors" - pass class OutOfBoundsTimedelta(ValueError): @@ -213,9 +212,8 @@ class OutOfBoundsTimedelta(ValueError): OutOfBoundsTimedelta: Cannot cast 139999 days 00:00:00 to unit='ns' without overflow. """ - __module__ = "pandas.errors" # Timedelta analogue to OutOfBoundsDatetime - pass + __module__ = "pandas.errors" cdef get_implementation_bounds( diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index be86118a2b9e2..547a39d39a3b9 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -47,6 +47,7 @@ from pandas._libs.tslibs.ccalendar import ( int_to_weekday, weekday_to_int, ) +from pandas.util._decorators import set_module from pandas.util._exceptions import find_stack_level from pandas._libs.tslibs.ccalendar cimport ( @@ -1695,6 +1696,7 @@ class OffsetMeta(type): # TODO: figure out a way to use a metaclass with a cdef class +@set_module("pandas") class DateOffset(RelativeDeltaOffset, metaclass=OffsetMeta): """ Standard kind of date increment used for a date range. @@ -1822,8 +1824,6 @@ class DateOffset(RelativeDeltaOffset, metaclass=OffsetMeta): >>> ts + pd.DateOffset(hour=8) Timestamp('2017-01-01 08:10:11') """ - __module__ = "pandas" - def __setattr__(self, name, value): raise AttributeError("DateOffset objects are immutable.") diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index facf430060e73..fd905248c4558 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1626,12 +1626,12 @@ DIFFERENT_FREQ = ("Input has different freq={other_freq} " "from {cls}(freq={own_freq})") +@set_module("pandas.errors") class IncompatibleFrequency(TypeError): """ Raised when trying to compare or operate between Periods with different frequencies. """ - __module__ = "pandas.errors" pass diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 1098ceb4c3929..c1889dc212697 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -29,7 +29,10 @@ ) from pandas.compat._optional import import_optional_dependency from pandas.errors import SpecificationError -from pandas.util._decorators import cache_readonly +from pandas.util._decorators import ( + cache_readonly, + set_module, +) from pandas.core.dtypes.cast import is_nested_object from pandas.core.dtypes.common import ( @@ -75,6 +78,7 @@ ResType: TypeAlias = dict[int, Any] +@set_module("pandas.api.executors") class BaseExecutionEngine(abc.ABC): """ Base class for execution engines for map and apply methods. @@ -88,8 +92,6 @@ class BaseExecutionEngine(abc.ABC): simply runs the code with the Python interpreter and pandas. """ - __module__ = "pandas.api.executors" - @staticmethod @abc.abstractmethod def map( diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 53c938faf9257..b5d01572deeac 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -36,7 +36,10 @@ PYARROW_MIN_VERSION, ) from pandas.errors import Pandas4Warning -from pandas.util._decorators import doc +from pandas.util._decorators import ( + doc, + set_module, +) from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.cast import ( @@ -241,6 +244,7 @@ def to_pyarrow_type( return None +@set_module("pandas.arrays") class ArrowExtensionArray( OpsMixin, ExtensionArraySupportsAnyAll, @@ -296,8 +300,6 @@ class ArrowExtensionArray( Length: 3, dtype: int64[pyarrow] """ # noqa: E501 (http link too long) - __module__ = "pandas.arrays" - _pa_array: pa.ChunkedArray _dtype: ArrowDtype diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index e091ecf18668d..a0fca279d5e3c 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -31,6 +31,7 @@ from pandas.errors import AbstractMethodError from pandas.util._decorators import ( cache_readonly, + set_module, ) from pandas.util._validators import ( validate_bool_kwarg, @@ -105,6 +106,7 @@ _extension_array_shared_docs: dict[str, str] = {} +@set_module("pandas.api.extensions") class ExtensionArray: """ Abstract base class for custom 1-D array types. @@ -256,8 +258,6 @@ class ExtensionArray: https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/list/array.py """ - __module__ = "pandas.api.extensions" - # '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray. # Don't override this. _typ = "extension" @@ -2788,6 +2788,7 @@ def _add_logical_ops(cls) -> None: setattr(cls, "__rxor__", cls._create_logical_method(roperator.rxor)) +@set_module("pandas.api.extensions") class ExtensionScalarOpsMixin(ExtensionOpsMixin): """ A mixin for defining ops on an ExtensionArray. @@ -2814,8 +2815,6 @@ class ExtensionScalarOpsMixin(ExtensionOpsMixin): with NumPy arrays. """ - __module__ = "pandas.api.extensions" - @classmethod def _create_method(cls, op, coerce_to_dtype: bool = True, result_dtype=None): """ diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 2d7bae7833f29..15e59060898f2 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -262,6 +262,7 @@ def coerce_to_array( return values, mask +@set_module("pandas.arrays") class BooleanArray(BaseMaskedArray): """ Array of boolean (True/False) data with missing values. @@ -321,8 +322,6 @@ class BooleanArray(BaseMaskedArray): Length: 3, dtype: boolean """ - __module__ = "pandas.arrays" - _TRUE_VALUES = {"True", "TRUE", "true", "1", "1.0"} _FALSE_VALUES = {"False", "FALSE", "false", "0", "0.0"} diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 41e5c6f65dbb9..7080b8d797bbe 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -26,6 +26,7 @@ from pandas._libs.arrays import NDArrayBacked from pandas.compat.numpy import function as nv from pandas.errors import Pandas4Warning +from pandas.util._decorators import set_module from pandas.util._exceptions import find_stack_level from pandas.util._validators import validate_bool_kwarg @@ -245,6 +246,7 @@ def contains(cat, key, container) -> bool: return any(loc_ in container for loc_ in loc) +@set_module("pandas") class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMixin): """ Represent a categorical variable in classic R / S-plus fashion. @@ -361,8 +363,6 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMi 'c' """ - __module__ = "pandas" - # For comparisons, so that numpy uses our implementation if the compare # ops, which raise __array_priority__ = 1000 diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 4cf5f4b13890e..942856847f57b 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -46,6 +46,7 @@ ) from pandas._libs.tslibs.dtypes import abbrev_to_npy_unit from pandas.errors import PerformanceWarning +from pandas.util._decorators import set_module from pandas.util._exceptions import find_stack_level from pandas.util._validators import validate_inclusive @@ -171,6 +172,7 @@ def f(self): return property(f) +@set_module("pandas.arrays") class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): """ Pandas ExtensionArray for tz-naive or tz-aware datetime data. @@ -223,8 +225,6 @@ class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): Length: 2, dtype: datetime64[s] """ - __module__ = "pandas.arrays" - _typ = "datetimearray" _internal_fill_value = np.datetime64("NaT", "ns") _recognized_scalars = (datetime, np.datetime64) diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py index e547c3fe76089..602b0c225999e 100644 --- a/pandas/core/arrays/floating.py +++ b/pandas/core/arrays/floating.py @@ -63,6 +63,7 @@ def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarr return values.astype(dtype, copy=copy) +@set_module("pandas.arrays") class FloatingArray(NumericArray): """ Array of floating (optional missing) values. @@ -129,8 +130,6 @@ class FloatingArray(NumericArray): Length: 3, dtype: Float32 """ - __module__ = "pandas.arrays" - _dtype_cls = FloatingDtype diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 7a8ca85a83db5..3cf950b6594a1 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -71,6 +71,7 @@ def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarr ) from err +@set_module("pandas.arrays") class IntegerArray(NumericArray): """ Array of integer (optional missing) values. @@ -142,8 +143,6 @@ class IntegerArray(NumericArray): Length: 3, dtype: UInt16 """ - __module__ = "pandas.arrays" - _dtype_cls = IntegerDtype diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 3e724b176b76d..e9dd687ca5184 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -39,6 +39,7 @@ ) from pandas.compat.numpy import function as nv from pandas.errors import IntCastingNaNError +from pandas.util._decorators import set_module from pandas.core.dtypes.cast import ( LossySetitemError, @@ -176,6 +177,7 @@ """ +@set_module("pandas.arrays") class IntervalArray(IntervalMixin, ExtensionArray): """ Pandas array for interval data that are closed on the same side. @@ -243,8 +245,6 @@ class IntervalArray(IntervalMixin, ExtensionArray): :meth:`IntervalArray.from_breaks`, and :meth:`IntervalArray.from_tuples`. """ - __module__ = "pandas.arrays" - can_hold_na = True _na_value = _fill_value = np.nan diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index eca47d3c9657f..dfe99e50f1f31 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -13,6 +13,7 @@ from pandas._libs import lib from pandas._libs.tslibs import is_supported_dtype from pandas.compat.numpy import function as nv +from pandas.util._decorators import set_module from pandas.core.dtypes.astype import astype_array from pandas.core.dtypes.cast import ( @@ -52,12 +53,12 @@ from pandas.arrays import StringArray +@set_module("pandas.arrays") class NumpyExtensionArray( OpsMixin, NDArrayBackedExtensionArray, ObjectStringArrayMixin, ): - __module__ = "pandas.arrays" """ A pandas ExtensionArray for NumPy data. diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 18e4ff31164ac..fe5163d8a77a2 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -54,6 +54,7 @@ from pandas.util._decorators import ( cache_readonly, doc, + set_module, ) from pandas.core.dtypes.common import ( @@ -120,6 +121,7 @@ def f(self): return property(f) +@set_module("pandas.arrays") # error: Definition of "_concat_same_type" in base class "NDArrayBacked" is # incompatible with definition in base class "ExtensionArray" class PeriodArray(dtl.DatelikeOps, libperiod.PeriodMixin): # type: ignore[misc] @@ -177,8 +179,6 @@ class PeriodArray(dtl.DatelikeOps, libperiod.PeriodMixin): # type: ignore[misc] Length: 2, dtype: period[D] """ - __module__ = "pandas.arrays" - # array priority higher than numpy scalars __array_priority__ = 1000 _typ = "periodarray" # ABCPeriodArray diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 83e16f5d4b8db..b1585ad28e671 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -31,7 +31,10 @@ from pandas._libs.tslibs import NaT from pandas.compat.numpy import function as nv from pandas.errors import PerformanceWarning -from pandas.util._decorators import doc +from pandas.util._decorators import ( + doc, + set_module, +) from pandas.util._exceptions import find_stack_level from pandas.util._validators import ( validate_bool_kwarg, @@ -289,6 +292,7 @@ def _wrap_result( ) +@set_module("pandas.arrays") class SparseArray(OpsMixin, PandasObject, ExtensionArray): """ An ExtensionArray for storing sparse data. @@ -370,8 +374,6 @@ class SparseArray(OpsMixin, PandasObject, ExtensionArray): Indices: array([2, 3], dtype=int32) """ - __module__ = "pandas.arrays" - _subtyp = "sparse_array" # register ABCSparseArray _hidden_attrs = PandasObject._hidden_attrs | frozenset([]) _sparse_index: SparseIndex diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index ec591d7711fa9..6cb75ba44a3fd 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -548,6 +548,7 @@ def view(self, dtype: Dtype | None = None) -> Self: return super().view() +@set_module("pandas.arrays") # error: Definition of "_concat_same_type" in base class "NDArrayBacked" is # incompatible with definition in base class "ExtensionArray" class StringArray(BaseStringArray, NumpyExtensionArray): # type: ignore[misc] @@ -633,8 +634,6 @@ class StringArray(BaseStringArray, NumpyExtensionArray): # type: ignore[misc] Length: 3, dtype: boolean """ - __module__ = "pandas.arrays" - # undo the NumpyExtensionArray hack _typ = "extension" diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 7dd41cc0e9960..58385e84b8b4d 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -18,6 +18,7 @@ PYARROW_MIN_VERSION, pa_version_under16p0, ) +from pandas.util._decorators import set_module from pandas.util._validators import validate_na_arg from pandas.core.dtypes.common import ( @@ -80,6 +81,7 @@ def _is_string_view(typ): # fallback for the ones that pyarrow doesn't yet support +@set_module("pandas.arrays") class ArrowStringArray(ObjectStringArrayMixin, ArrowExtensionArray, BaseStringArray): """ Extension array for string data in a ``pyarrow.ChunkedArray``. @@ -124,8 +126,6 @@ class ArrowStringArray(ObjectStringArrayMixin, ArrowExtensionArray, BaseStringAr Length: 4, dtype: string """ - __module__ = "pandas.arrays" - # error: Incompatible types in assignment (expression has type "StringDtype", # base class "ArrowExtensionArray" defined the type as "ArrowDtype") _dtype: StringDtype # type: ignore[assignment] diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index c081d6190204e..5b69787c3b2bc 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -40,6 +40,7 @@ truediv_object_array, ) from pandas.compat.numpy import function as nv +from pandas.util._decorators import set_module from pandas.util._validators import validate_endpoints from pandas.core.dtypes.common import ( @@ -103,6 +104,7 @@ def f(self) -> np.ndarray: return property(f) +@set_module("pandas.arrays") class TimedeltaArray(dtl.TimelikeOps): """ Pandas ExtensionArray for timedelta data. @@ -147,8 +149,6 @@ class TimedeltaArray(dtl.TimelikeOps): Length: 2, dtype: timedelta64[ns] """ - __module__ = "pandas.arrays" - _typ = "timedeltaarray" _internal_fill_value = np.timedelta64("NaT", "ns") _recognized_scalars = (timedelta, np.timedelta64, Tick) diff --git a/pandas/core/col.py b/pandas/core/col.py index 416a34aaa6c4a..39c4a7fd016c2 100644 --- a/pandas/core/col.py +++ b/pandas/core/col.py @@ -71,6 +71,7 @@ def _pretty_print_args_kwargs(*args: Any, **kwargs: Any) -> str: return ", ".join(all_args) +@set_module("pandas.api.typing") class Expression: """ Class representing a deferred column. @@ -78,8 +79,6 @@ class Expression: This is not meant to be instantiated directly. Instead, use :meth:`pandas.col`. """ - __module__ = "pandas.api.typing" - def __init__(self, func: Callable[[DataFrame], Any], repr_str: str) -> None: self._func = func self._repr_str = repr_str diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 59f360650ff8c..08a0c8a921c71 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -42,6 +42,7 @@ ExtensionDtypeT = TypeVar("ExtensionDtypeT", bound="ExtensionDtype") +@set_module("pandas.api.extensions") class ExtensionDtype: """ A custom data type, to be paired with an ExtensionArray. @@ -112,8 +113,6 @@ class property**. provided for registering virtual subclasses. """ - __module__ = "pandas.api.extensions" - _metadata: tuple[str, ...] = () def __str__(self) -> str: diff --git a/pandas/core/flags.py b/pandas/core/flags.py index a98380e9f7d16..f6088e3f40b1b 100644 --- a/pandas/core/flags.py +++ b/pandas/core/flags.py @@ -3,10 +3,13 @@ from typing import TYPE_CHECKING import weakref +from pandas.util._decorators import set_module + if TYPE_CHECKING: from pandas.core.generic import NDFrame +@set_module("pandas") class Flags: """ Flags that apply to pandas objects. @@ -55,8 +58,6 @@ class Flags: """ - __module__ = "pandas" - _keys: set[str] = {"allows_duplicate_labels"} def __init__(self, obj: NDFrame, *, allows_duplicate_labels: bool) -> None: diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index a45ce1f385e4d..311726b9aa01e 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -17,7 +17,10 @@ ) from pandas._libs.tslibs import OutOfBoundsDatetime from pandas.errors import InvalidIndexError -from pandas.util._decorators import cache_readonly +from pandas.util._decorators import ( + cache_readonly, + set_module, +) from pandas.core.dtypes.common import ( ensure_int64, @@ -63,6 +66,9 @@ from pandas.core.generic import NDFrame +set_module("pandas") + + class Grouper: """ A Grouper allows the user to specify a groupby instruction for an object. @@ -253,8 +259,6 @@ class Grouper: Freq: 17min, dtype: int64 """ - __module__ = "pandas" - sort: bool dropna: bool _grouper: Index | None diff --git a/pandas/core/indexers/objects.py b/pandas/core/indexers/objects.py index 2c2413c74f2fa..42fc4971f1613 100644 --- a/pandas/core/indexers/objects.py +++ b/pandas/core/indexers/objects.py @@ -8,6 +8,7 @@ from pandas._libs.tslibs import BaseOffset from pandas._libs.window.indexers import calculate_variable_window_bounds +from pandas.util._decorators import set_module from pandas.core.dtypes.common import ensure_platform_int @@ -16,6 +17,7 @@ from pandas.tseries.offsets import Nano +@set_module("pandas.api.indexers") class BaseIndexer: """ Base class for window bounds calculations. @@ -58,8 +60,6 @@ class BaseIndexer: 4 4.0 """ - __module__ = "pandas.api.indexers" - def __init__( self, index_array: np.ndarray | None = None, window_size: int = 0, **kwargs ) -> None: @@ -212,6 +212,7 @@ def get_window_bounds( ) +@set_module("pandas.api.indexers") class VariableOffsetWindowIndexer(BaseIndexer): """ Calculate window boundaries based on a non-fixed offset such as a BusinessDay. @@ -273,8 +274,6 @@ class VariableOffsetWindowIndexer(BaseIndexer): 2020-01-10 9.0 """ - __module__ = "pandas.api.indexers" - def __init__( self, index_array: np.ndarray | None = None, @@ -437,6 +436,7 @@ def get_window_bounds( ) +@set_module("pandas.api.indexers") class FixedForwardWindowIndexer(BaseIndexer): """ Creates window boundaries for fixed-length windows that include the current row. @@ -482,8 +482,6 @@ class FixedForwardWindowIndexer(BaseIndexer): 4 4.0 """ - __module__ = "pandas.api.indexers" - def get_window_bounds( self, num_values: int = 0, diff --git a/pandas/core/indexes/frozen.py b/pandas/core/indexes/frozen.py index fe06e235e0fd9..59800c3b9dc5e 100644 --- a/pandas/core/indexes/frozen.py +++ b/pandas/core/indexes/frozen.py @@ -14,11 +14,14 @@ Self, ) +from pandas.util._decorators import set_module + from pandas.core.base import PandasObject from pandas.io.formats.printing import pprint_thing +@set_module("pandas.api.typing") class FrozenList(PandasObject, list): """ Container that doesn't allow setting item *but* @@ -26,8 +29,6 @@ class FrozenList(PandasObject, list): for lookups, appropriately, etc. """ - __module__ = "pandas.api.typing" - # Side note: This has to be of type list. Otherwise, # it messes up PyTables type checks. diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 7a025cdd5fb68..c5dcb53d3c81c 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -136,8 +136,6 @@ class RangeIndex(Index): [] """ - __module__ = "pandas" - _typ = "rangeindex" _dtype_validation_metadata = (is_signed_integer_dtype, "signed integer") _range: range diff --git a/pandas/core/interchange/dataframe_protocol.py b/pandas/core/interchange/dataframe_protocol.py index 15bd323d5fade..28aa13202a712 100644 --- a/pandas/core/interchange/dataframe_protocol.py +++ b/pandas/core/interchange/dataframe_protocol.py @@ -15,6 +15,8 @@ TypedDict, ) +from pandas.util._decorators import set_module + if TYPE_CHECKING: from collections.abc import ( Iterable, @@ -362,6 +364,7 @@ def get_buffers(self) -> ColumnBuffers: # pass +@set_module("pandas.api.interchange") class DataFrame(ABC): """ A data frame class, with only the methods required by the interchange @@ -377,8 +380,6 @@ class DataFrame(ABC): to the dataframe interchange protocol specification. """ - __module__ = "pandas.api.interchange" - version = 0 # version of the protocol @abstractmethod diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 2694b581a6707..1fdcbbec837a0 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -30,6 +30,7 @@ AbstractMethodError, Pandas4Warning, ) +from pandas.util._decorators import set_module from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.dtypes import ( @@ -115,6 +116,7 @@ _shared_docs_kwargs: dict[str, str] = {} +@set_module("pandas.api.typing") class Resampler(BaseGroupBy, PandasObject): """ Class for resampling datetimelike data, a groupby-like operation. @@ -136,8 +138,6 @@ class Resampler(BaseGroupBy, PandasObject): After resampling, see aggregate, apply, and transform functions. """ - __module__ = "pandas.api.typing" - _grouper: BinGrouper _timegrouper: TimeGrouper binner: DatetimeIndex | TimedeltaIndex | PeriodIndex # depends on subclass @@ -2171,6 +2171,7 @@ def _wrap_result(self, result): return result +@set_module("pandas.api.typing") # error: Definition of "ax" in base class "_GroupByMixin" is incompatible # with definition in base class "DatetimeIndexResampler" class DatetimeIndexResamplerGroupby( # type: ignore[misc] @@ -2180,8 +2181,6 @@ class DatetimeIndexResamplerGroupby( # type: ignore[misc] Provides a resample of a groupby implementation """ - __module__ = "pandas.api.typing" - @property def _resampler_cls(self): return DatetimeIndexResampler @@ -2274,6 +2273,7 @@ def _upsample(self, method, limit: int | None = None, fill_value=None): return self._wrap_result(new_obj) +@set_module("pandas.api.typing") # error: Definition of "ax" in base class "_GroupByMixin" is incompatible with # definition in base class "PeriodIndexResampler" class PeriodIndexResamplerGroupby( # type: ignore[misc] @@ -2283,8 +2283,6 @@ class PeriodIndexResamplerGroupby( # type: ignore[misc] Provides a resample of a groupby implementation. """ - __module__ = "pandas.api.typing" - @property def _resampler_cls(self): return PeriodIndexResampler @@ -2312,6 +2310,7 @@ def _adjust_binner_for_upsample(self, binner): return binner +@set_module("pandas.api.typing") # error: Definition of "ax" in base class "_GroupByMixin" is incompatible with # definition in base class "DatetimeIndexResampler" class TimedeltaIndexResamplerGroupby( # type: ignore[misc] @@ -2321,8 +2320,6 @@ class TimedeltaIndexResamplerGroupby( # type: ignore[misc] Provides a resample of a groupby implementation. """ - __module__ = "pandas.api.typing" - @property def _resampler_cls(self): return TimedeltaIndexResampler @@ -2357,6 +2354,7 @@ def get_resampler_for_grouping( return resampler._get_resampler_for_grouping(groupby=groupby, key=tg.key) +@set_module("pandas.api.typing") class TimeGrouper(Grouper): """ Custom groupby class for time-interval grouping. @@ -2370,8 +2368,6 @@ class TimeGrouper(Grouper): If axis is PeriodIndex """ - __module__ = "pandas.api.typing" - _attributes = Grouper._attributes + ( "closed", "label", diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 1ea05e24d0db5..0a5cfad105f26 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -9,7 +9,10 @@ from pandas._libs.tslibs import Timedelta import pandas._libs.window.aggregations as window_aggregations -from pandas.util._decorators import doc +from pandas.util._decorators import ( + doc, + set_module, +) from pandas.core.dtypes.common import ( is_datetime64_dtype, @@ -129,6 +132,7 @@ def _calculate_deltas( return np.diff(_times) / _halflife +@set_module("pandas.api.typing") class ExponentialMovingWindow(BaseWindow): r""" Provide exponentially weighted (EW) calculations. @@ -316,8 +320,6 @@ class ExponentialMovingWindow(BaseWindow): 4 3.233686 """ - __module__ = "pandas.api.typing" - _attributes = [ "com", "span", @@ -904,13 +906,12 @@ def _cov(X, Y): ) +@set_module("pandas.api.typing") class ExponentialMovingWindowGroupby(BaseWindowGroupby, ExponentialMovingWindow): """ Provide an exponential moving window groupby implementation. """ - __module__ = "pandas.api.typing" - _attributes = ExponentialMovingWindow._attributes + BaseWindowGroupby._attributes def __init__(self, obj, *args, _grouper=None, **kwargs) -> None: diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index 2527a5dd508d8..ad49cc0d5ef63 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -10,6 +10,8 @@ overload, ) +from pandas.util._decorators import set_module + from pandas.core.indexers.objects import ( BaseIndexer, ExpandingIndexer, @@ -37,6 +39,7 @@ from pandas.core.generic import NDFrame +@set_module("pandas.api.typing") class Expanding(RollingAndExpandingMixin): """ Provide expanding window calculations. @@ -106,8 +109,6 @@ class Expanding(RollingAndExpandingMixin): 4 7.0 """ - __module__ = "pandas.api.typing" - _attributes: list[str] = ["min_periods", "method"] def __init__( @@ -1452,13 +1453,12 @@ def corr( ) +@set_module("pandas.api.typing") class ExpandingGroupby(BaseWindowGroupby, Expanding): """ Provide an expanding groupby implementation. """ - __module__ = "pandas.api.typing" - _attributes = Expanding._attributes + BaseWindowGroupby._attributes def _get_window_indexer(self) -> GroupbyIndexer: diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index e6f84941f6b1a..8d5b8ba508e9e 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -29,6 +29,7 @@ import pandas._libs.window.aggregations as window_aggregations from pandas.compat._optional import import_optional_dependency from pandas.errors import DataError +from pandas.util._decorators import set_module from pandas.core.dtypes.common import ( ensure_float64, @@ -855,6 +856,7 @@ def _gotitem(self, key, ndim, subset=None): return super()._gotitem(key, ndim, subset=subset) +@set_module("pandas.api.typing") class Window(BaseWindow): """ Provide rolling window calculations. @@ -1111,8 +1113,6 @@ class Window(BaseWindow): 2020-01-03 2020-01-02 6.0 """ - __module__ = "pandas.api.typing" - _attributes = [ "window", "min_periods", @@ -1969,8 +1969,8 @@ def corr_func(x, y): ) +@set_module("pandas.api.typing") class Rolling(RollingAndExpandingMixin): - __module__ = "pandas.api.typing" _attributes: list[str] = [ "window", "min_periods", @@ -3532,13 +3532,12 @@ def corr( Rolling.__doc__ = Window.__doc__ +@set_module("pandas.api.typing") class RollingGroupby(BaseWindowGroupby, Rolling): """ Provide a rolling groupby implementation. """ - __module__ = "pandas.api.typing" - _attributes = Rolling._attributes + BaseWindowGroupby._attributes def _get_window_indexer(self) -> GroupbyIndexer: diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index bfa61253c9c1f..d52513918b6e2 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -820,6 +820,7 @@ def read_json( return json_reader.read() +@set_module("pandas.api.typing") class JsonReader(abc.Iterator, Generic[FrameSeriesStrT]): """ JsonReader provides an interface for reading in a JSON file. @@ -829,8 +830,6 @@ class JsonReader(abc.Iterator, Generic[FrameSeriesStrT]): whole document. """ - __module__ = "pandas.api.typing" - def __init__( self, filepath_or_buffer, diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index 936cc4868daf2..ffb0b668c2dc3 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -37,13 +37,12 @@ from pandas import DataFrame +@set_module("pandas.api.typing") class SASReader(Iterator["DataFrame"], ABC): """ Abstract class for XportReader and SAS7BDATReader. """ - __module__ = "pandas.api.typing" - @abstractmethod def read(self, nrows: int | None = None) -> DataFrame: ... diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 1f953650365ef..0d5b0213de1ce 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -1018,8 +1018,8 @@ def __init__(self) -> None: } +@set_module("pandas.api.typing") class StataReader(StataParser, abc.Iterator): - __module__ = "pandas.api.typing" __doc__ = _stata_reader_doc _path_or_buf: IO[bytes] # Will be assigned by `_open_file`. diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index c9dd179fac6e0..a456098f68cfc 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -769,6 +769,7 @@ def boxplot_frame_groupby( ) +@set_module("pandas.plotting") class PlotAccessor(PandasObject): """ Make plots of Series or DataFrame. @@ -987,8 +988,6 @@ class PlotAccessor(PandasObject): >>> plot = df.groupby("col2").plot(kind="bar", title="DataFrameGroupBy Plot") """ - __module__ = "pandas.plotting" - _common_kinds = ("line", "bar", "barh", "kde", "density", "area", "hist", "box") _series_kinds = ("pie",) _dataframe_kinds = ("scatter", "hexbin") diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index dd2ed6c00e48c..eeb3fd07ddf0b 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -2,6 +2,7 @@ from functools import wraps import inspect +import os from textwrap import dedent from typing import ( TYPE_CHECKING, @@ -523,10 +524,15 @@ def example(): assert example.__module__ == "pandas" """ + if os.environ.get("PANDAS_SET_MODULE_DUNDER", "1") == "0": - def decorator(func: F) -> F: - if module is not None: - func.__module__ = module - return func + def decorator(func: F) -> F: + return func + else: + + def decorator(func: F) -> F: + if module is not None: + func.__module__ = module + return func return decorator diff --git a/pandas/util/version/__init__.py b/pandas/util/version/__init__.py index 412a606bb023e..0a88a165cb6d3 100644 --- a/pandas/util/version/__init__.py +++ b/pandas/util/version/__init__.py @@ -111,7 +111,6 @@ def parse(version: str) -> Version: # The docstring is from an older version of the packaging library to avoid # errors in the docstring validation. class InvalidVersion(ValueError): - __module__ = "pandas.errors" """ An invalid version was found, users should refer to PEP 440. @@ -130,6 +129,8 @@ class InvalidVersion(ValueError): InvalidVersion: Invalid version: '1.' """ + __module__ = "pandas.errors" + class _BaseVersion: _key: tuple[Any, ...] From 477cc4fc25e7671d43481d25d234d29771b60ec2 Mon Sep 17 00:00:00 2001 From: richard Date: Tue, 4 Nov 2025 13:36:33 -0500 Subject: [PATCH 2/8] ENH: Add future.python_scalars --- pandas/_config/__init__.py | 5 ++ pandas/conftest.py | 5 ++ pandas/core/arraylike.py | 5 +- pandas/core/arrays/masked.py | 19 ++++++-- pandas/core/config_init.py | 7 +++ pandas/core/dtypes/cast.py | 11 +++++ pandas/core/indexes/base.py | 11 +++-- pandas/core/interchange/column.py | 7 ++- pandas/core/series.py | 10 ++-- pandas/tests/arrays/boolean/test_reduction.py | 46 +++++++++++-------- pandas/tests/arrays/floating/test_function.py | 7 ++- pandas/tests/arrays/integer/test_dtypes.py | 4 +- pandas/tests/frame/test_reductions.py | 40 +++++++++++----- pandas/tests/groupby/test_categorical.py | 6 ++- pandas/tests/indexes/test_numpy_compat.py | 2 +- pandas/tests/reductions/test_reductions.py | 8 +++- .../tests/reductions/test_stat_reductions.py | 14 ++++-- pandas/tests/series/test_ufunc.py | 8 +++- pandas/tests/test_nanops.py | 20 +++++--- .../moments/test_moments_consistency_ewm.py | 25 ++++++++-- .../test_moments_consistency_expanding.py | 35 +++++++++++--- .../test_moments_consistency_rolling.py | 37 ++++++++++++--- 22 files changed, 247 insertions(+), 85 deletions(-) diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py index ee709eff2eeae..dc380e3777f4e 100644 --- a/pandas/_config/__init__.py +++ b/pandas/_config/__init__.py @@ -35,6 +35,11 @@ def using_string_dtype() -> bool: return _mode_options["infer_string"] +def using_python_scalars() -> bool: + _mode_options = _global_config["future"] + return _mode_options["python_scalars"] + + def is_nan_na() -> bool: _mode_options = _global_config["mode"] return _mode_options["nan_is_na"] diff --git a/pandas/conftest.py b/pandas/conftest.py index 7fe4ec7a5ee4f..ba4d44dbb1b8a 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -2097,6 +2097,11 @@ def using_infer_string() -> bool: return pd.options.future.infer_string is True +@pytest.fixture +def using_python_scalars() -> bool: + return pd.options.future.python_scalars is True + + _warsaws: list[Any] = ["Europe/Warsaw", "dateutil/Europe/Warsaw"] if pytz is not None: _warsaws.append(pytz.timezone("Europe/Warsaw")) diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index eeef8016db07f..1e90cf1949a7a 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -15,6 +15,7 @@ from pandas._libs import lib from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op +from pandas.core.dtypes.cast import maybe_unbox_numpy_scalar from pandas.core.dtypes.generic import ABCNDFrame from pandas.core import roperator @@ -529,4 +530,6 @@ def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwar # By default, numpy's reductions do not skip NaNs, so we have to # pass skipna=False - return getattr(self, method_name)(skipna=False, **kwargs) + result = getattr(self, method_name)(skipna=False, **kwargs) + result = maybe_unbox_numpy_scalar(result) + return result diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 6085b577f4392..8c8fbef11e82a 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -12,7 +12,10 @@ import numpy as np -from pandas._config import is_nan_na +from pandas._config import ( + is_nan_na, + using_python_scalars, +) from pandas._libs import ( algos as libalgos, @@ -27,7 +30,10 @@ from pandas.errors import AbstractMethodError from pandas.core.dtypes.base import ExtensionDtype -from pandas.core.dtypes.cast import maybe_downcast_to_dtype +from pandas.core.dtypes.cast import ( + maybe_downcast_to_dtype, + maybe_unbox_numpy_scalar, +) from pandas.core.dtypes.common import ( is_bool, is_integer_dtype, @@ -1518,7 +1524,10 @@ def _reduce( if isna(result): return self._wrap_na_result(name=name, axis=0, mask_size=(1,)) else: - result = result.reshape(1) + if using_python_scalars(): + result = np.array([result]) + else: + result = result.reshape(1) mask = np.zeros(1, dtype=bool) return self._maybe_mask_result(result, mask) @@ -1742,7 +1751,7 @@ def any( values = self._data.copy() np.putmask(values, self._mask, self.dtype._falsey_value) - result = values.any() + result = maybe_unbox_numpy_scalar(values.any()) if skipna: return result else: @@ -1828,7 +1837,7 @@ def all( values = self._data.copy() np.putmask(values, self._mask, self.dtype._truthy_value) - result = values.all(axis=axis) + result = maybe_unbox_numpy_scalar(values.all(axis=axis)) if skipna: return result # type: ignore[return-value] diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 83015f4007793..60a20010d4daa 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -900,5 +900,12 @@ def register_converter_cb(key: str) -> None: validator=is_one_of_factory([True, False]), ) + cf.register_option( + "python_scalars", + False if os.environ.get("PANDAS_FUTURE_PYTHON_SCALARS", "0") == "0" else True, + "Whether to return Python scalars instead of NumPy or PyArrow scalars", + validator=is_one_of_factory([True, False]), + ) + # GH#59502 cf.deprecate_option("future.no_silent_downcasting", Pandas4Warning) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 3b615c70ebea2..d86ae281b66a9 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -20,6 +20,7 @@ from pandas._config import ( is_nan_na, + using_python_scalars, using_string_dtype, ) @@ -1434,6 +1435,16 @@ def construct_1d_arraylike_from_scalar( return subarr +def maybe_unbox_numpy_scalar(value): + result = value + if using_python_scalars() and isinstance(value, np.generic): + if isinstance(result, np.longdouble): + result = float(result) + else: + result = value.item() + return result + + def _maybe_box_and_unbox_datetimelike(value: Scalar, dtype: DtypeObj): # Caller is responsible for checking dtype.kind in "mM" diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 4186a13926c6f..ccffd03bc646e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -92,6 +92,7 @@ common_dtype_categorical_compat, find_result_type, infer_dtype_from, + maybe_unbox_numpy_scalar, np_can_hold_element, ) from pandas.core.dtypes.common import ( @@ -7532,7 +7533,7 @@ def min(self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs) # quick check first = self[0] if not isna(first): - return first + return maybe_unbox_numpy_scalar(first) if not self._is_multi and self.hasnans: # Take advantage of cache @@ -7543,7 +7544,7 @@ def min(self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs) if not self._is_multi and not isinstance(self._values, np.ndarray): return self._values._reduce(name="min", skipna=skipna) - return nanops.nanmin(self._values, skipna=skipna) + return maybe_unbox_numpy_scalar(nanops.nanmin(self._values, skipna=skipna)) def max(self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs): """ @@ -7596,18 +7597,18 @@ def max(self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs) # quick check last = self[-1] if not isna(last): - return last + return maybe_unbox_numpy_scalar(last) if not self._is_multi and self.hasnans: # Take advantage of cache mask = self._isnan if not skipna or mask.all(): - return self._na_value + return maybe_unbox_numpy_scalar(self._na_value) if not self._is_multi and not isinstance(self._values, np.ndarray): return self._values._reduce(name="max", skipna=skipna) - return nanops.nanmax(self._values, skipna=skipna) + return maybe_unbox_numpy_scalar(nanops.nanmax(self._values, skipna=skipna)) # -------------------------------------------------------------------- diff --git a/pandas/core/interchange/column.py b/pandas/core/interchange/column.py index 1fbffe09278fc..00a8e742c6fa9 100644 --- a/pandas/core/interchange/column.py +++ b/pandas/core/interchange/column.py @@ -7,6 +7,8 @@ import numpy as np +from pandas._config import using_python_scalars + from pandas._libs.lib import infer_dtype from pandas._libs.tslibs import iNaT from pandas.errors import NoBufferPresent @@ -232,7 +234,10 @@ def null_count(self) -> int: """ Number of null elements. Should always be known. """ - return self._col.isna().sum().item() + result = self._col.isna().sum() + if not using_python_scalars(): + result = result.item() + return result @property def metadata(self) -> dict[str, pd.Index]: diff --git a/pandas/core/series.py b/pandas/core/series.py index 1bdbbd6c41f34..e62943b3fb312 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -73,6 +73,7 @@ find_common_type, infer_dtype_from, maybe_box_native, + maybe_unbox_numpy_scalar, ) from pandas.core.dtypes.common import ( is_dict_like, @@ -2014,7 +2015,7 @@ def count(self) -> int: >>> s.count() 2 """ - return notna(self._values).sum().astype("int64") + return maybe_unbox_numpy_scalar(notna(self._values).sum().astype("int64")) def mode(self, dropna: bool = True) -> Series: """ @@ -7356,7 +7357,7 @@ def _reduce( if isinstance(delegate, ExtensionArray): # dispatch to ExtensionArray interface - return delegate._reduce(name, skipna=skipna, **kwds) + result = delegate._reduce(name, skipna=skipna, **kwds) else: # dispatch to numpy arrays @@ -7370,7 +7371,10 @@ def _reduce( f"Series.{name} does not allow {kwd_name}={numeric_only} " "with non-numeric dtypes." ) - return op(delegate, skipna=skipna, **kwds) + result = op(delegate, skipna=skipna, **kwds) + + result = maybe_unbox_numpy_scalar(result) + return result @Appender(make_doc("any", ndim=1)) # error: Signature of "any" incompatible with supertype "NDFrame" diff --git a/pandas/tests/arrays/boolean/test_reduction.py b/pandas/tests/arrays/boolean/test_reduction.py index 696ae1df4c9fd..6770bf4e5b446 100644 --- a/pandas/tests/arrays/boolean/test_reduction.py +++ b/pandas/tests/arrays/boolean/test_reduction.py @@ -25,12 +25,15 @@ def data(): ([False, False], False, False, False, False), ], ) -def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip): +def test_any_all( + values, exp_any, exp_all, exp_any_noskip, exp_all_noskip, using_python_scalars +): # the methods return numpy scalars - exp_any = pd.NA if exp_any is pd.NA else np.bool_(exp_any) - exp_all = pd.NA if exp_all is pd.NA else np.bool_(exp_all) - exp_any_noskip = pd.NA if exp_any_noskip is pd.NA else np.bool_(exp_any_noskip) - exp_all_noskip = pd.NA if exp_all_noskip is pd.NA else np.bool_(exp_all_noskip) + if not using_python_scalars: + exp_any = pd.NA if exp_any is pd.NA else np.bool_(exp_any) + exp_all = pd.NA if exp_all is pd.NA else np.bool_(exp_all) + exp_any_noskip = pd.NA if exp_any_noskip is pd.NA else np.bool_(exp_any_noskip) + exp_all_noskip = pd.NA if exp_all_noskip is pd.NA else np.bool_(exp_all_noskip) for con in [pd.array, pd.Series]: a = con(values, dtype="boolean") @@ -39,23 +42,30 @@ def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip): assert a.any(skipna=False) is exp_any_noskip assert a.all(skipna=False) is exp_all_noskip - assert np.any(a.any()) is exp_any - assert np.all(a.all()) is exp_all - -def test_reductions_return_types(dropna, data, all_numeric_reductions): +def test_reductions_return_types( + dropna, data, all_numeric_reductions, using_python_scalars +): op = all_numeric_reductions s = pd.Series(data) if dropna: s = s.dropna() - if op in ("sum", "prod"): - assert isinstance(getattr(s, op)(), np.int_) - elif op == "count": - # Oddly on the 32 bit build (but not Windows), this is intc (!= intp) - assert isinstance(getattr(s, op)(), np.integer) - elif op in ("min", "max"): - assert isinstance(getattr(s, op)(), np.bool_) + if using_python_scalars: + expected = { + "sum": int, + "prod": int, + "count": int, + "min": bool, + "max": bool, + }.get(op, float) else: - # "mean", "std", "var", "median", "kurt", "skew" - assert isinstance(getattr(s, op)(), np.float64) + expected = { + "sum": np.int_, + "prod": np.int_, + "count": np.integer, + "min": np.bool_, + "max": np.bool_, + }.get(op, np.float64) + result = getattr(s, op)() + assert isinstance(result, expected), f"{type(result)} vs {expected}" diff --git a/pandas/tests/arrays/floating/test_function.py b/pandas/tests/arrays/floating/test_function.py index e03e8f30197b9..9d4dc81847188 100644 --- a/pandas/tests/arrays/floating/test_function.py +++ b/pandas/tests/arrays/floating/test_function.py @@ -171,7 +171,7 @@ def test_floating_array_numpy_sum(values, expected): @pytest.mark.parametrize("op", ["sum", "min", "max", "prod"]) -def test_preserve_dtypes(op): +def test_preserve_dtypes(op, using_python_scalars): df = pd.DataFrame( { "A": ["a", "b", "b"], @@ -182,7 +182,10 @@ def test_preserve_dtypes(op): # op result = getattr(df.C, op)() - assert isinstance(result, np.float64) + if using_python_scalars: + assert isinstance(result, float) + else: + assert isinstance(result, np.float64) # groupby result = getattr(df.groupby("A"), op)() diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py index 1b4f070d47e4e..8b832b8dd151e 100644 --- a/pandas/tests/arrays/integer/test_dtypes.py +++ b/pandas/tests/arrays/integer/test_dtypes.py @@ -22,7 +22,7 @@ def test_dtypes(dtype): @pytest.mark.parametrize("op", ["sum", "min", "max", "prod"]) -def test_preserve_dtypes(op): +def test_preserve_dtypes(op, using_python_scalars): # for ops that enable (mean would actually work here # but generally it is a float return value) df = pd.DataFrame( @@ -35,7 +35,7 @@ def test_preserve_dtypes(op): # op result = getattr(df.C, op)() - if op in {"sum", "prod", "min", "max"}: + if op in {"sum", "prod", "min", "max"} and not using_python_scalars: assert isinstance(result, np.int64) else: assert isinstance(result, int) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 7c4ce4c67f13d..7ee84a3768043 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1298,7 +1298,7 @@ def wrapper(x): assert r0.all() assert r1.all() - def test_any_all_extra(self): + def test_any_all_extra(self, using_python_scalars): df = DataFrame( { "A": [True, False, False], @@ -1322,13 +1322,19 @@ def test_any_all_extra(self): tm.assert_series_equal(result, expected) # Axis is None - result = df.all(axis=None).item() + result = df.all(axis=None) + if not using_python_scalars: + result = result.item() assert result is False - result = df.any(axis=None).item() + result = df.any(axis=None) + if not using_python_scalars: + result = result.item() assert result is True - result = df[["C"]].all(axis=None).item() + result = df[["C"]].all(axis=None) + if not using_python_scalars: + result = result.item() assert result is True @pytest.mark.parametrize("axis", [0, 1]) @@ -1436,7 +1442,7 @@ def test_any_all_bool_only(self): ), ], ) - def test_any_all_np_func(self, func, data, expected): + def test_any_all_np_func(self, func, data, expected, using_python_scalars): # GH 19976 data = DataFrame(data) @@ -1463,20 +1469,30 @@ def test_any_all_np_func(self, func, data, expected): elif data.dtypes.apply(lambda x: x != "category").any(): result = func(data) - assert isinstance(result, np.bool_) - assert result.item() is expected + if using_python_scalars: + assert result is expected + else: + assert isinstance(result, np.bool_) + assert result.item() is expected # method version result = getattr(DataFrame(data), func.__name__)(axis=None) - assert isinstance(result, np.bool_) - assert result.item() is expected + if using_python_scalars: + assert result is expected + else: + assert isinstance(result, np.bool_) + assert result.item() is expected - def test_any_all_object(self): + def test_any_all_object(self, using_python_scalars): # GH 19976 - result = np.all(DataFrame(columns=["a", "b"])).item() + result = np.all(DataFrame(columns=["a", "b"])) + if not using_python_scalars: + result = result.item() assert result is True - result = np.any(DataFrame(columns=["a", "b"])).item() + result = np.any(DataFrame(columns=["a", "b"])) + if not using_python_scalars: + result = result.item() assert result is False def test_any_all_object_bool_only(self): diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 529f9564ff83e..0abf5f90abcc2 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1511,7 +1511,7 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_true(reduction_fun @pytest.mark.parametrize("observed", [False, None]) def test_dataframe_groupby_on_2_categoricals_when_observed_is_false( - reduction_func, observed + reduction_func, observed, using_python_scalars ): # GH 23865 # GH 27075 @@ -1553,7 +1553,9 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_false( expected = _results_for_groupbys_with_missing_categories[reduction_func] - if expected is np.nan: + if using_python_scalars and reduction_func == "size": + assert (res.loc[unobserved_cats] == expected).all() is True + elif expected is np.nan: assert res.loc[unobserved_cats].isnull().all().all() else: assert (res.loc[unobserved_cats] == expected).all().all() diff --git a/pandas/tests/indexes/test_numpy_compat.py b/pandas/tests/indexes/test_numpy_compat.py index 86d0ca1280596..8004e97698b67 100644 --- a/pandas/tests/indexes/test_numpy_compat.py +++ b/pandas/tests/indexes/test_numpy_compat.py @@ -151,7 +151,7 @@ def test_numpy_ufuncs_other(index, func): @pytest.mark.parametrize("func", [np.maximum, np.minimum]) -def test_numpy_ufuncs_reductions(index, func, request): +def test_numpy_ufuncs_reductions(index, func): # TODO: overlap with tests.series.test_ufunc.test_reductions if len(index) == 0: pytest.skip("Test doesn't make sense for empty index.") diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index db27572b9da26..6cfeade5a255b 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -1268,11 +1268,15 @@ def test_sum_uint64(self): expected = np.uint64(10000000000000000000) tm.assert_almost_equal(result, expected) - def test_signedness_preserved_after_sum(self): + def test_signedness_preserved_after_sum(self, using_python_scalars): # GH 37491 ser = Series([1, 2, 3, 4]) - assert ser.astype("uint8").sum().dtype == "uint64" + result = ser.astype("uint8").sum() + if using_python_scalars: + assert isinstance(result, int) + else: + assert result.dtype == "uint64" class TestDatetime64SeriesReductions: diff --git a/pandas/tests/reductions/test_stat_reductions.py b/pandas/tests/reductions/test_stat_reductions.py index 4af1ca1d4800a..6aeaae51b107b 100644 --- a/pandas/tests/reductions/test_stat_reductions.py +++ b/pandas/tests/reductions/test_stat_reductions.py @@ -228,7 +228,7 @@ def test_sem(self): result = s.sem(ddof=1) assert pd.isna(result) - def test_skew(self): + def test_skew(self, using_python_scalars): sp_stats = pytest.importorskip("scipy.stats") string_series = Series(range(20), dtype=np.float64, name="series") @@ -247,7 +247,10 @@ def test_skew(self): assert np.isnan(df.skew()).all() else: assert 0 == s.skew() - assert isinstance(s.skew(), np.float64) # GH53482 + if using_python_scalars: + assert isinstance(s.skew(), float) + else: + assert isinstance(s.skew(), np.float64) # GH53482 assert (df.skew() == 0).all() def test_kurt(self): @@ -258,7 +261,7 @@ def test_kurt(self): alt = lambda x: sp_stats.kurtosis(x, bias=False) self._check_stat_op("kurt", alt, string_series) - def test_kurt_corner(self): + def test_kurt_corner(self, using_python_scalars): # test corner cases, kurt() returns NaN unless there's at least 4 # values min_N = 4 @@ -270,5 +273,8 @@ def test_kurt_corner(self): assert np.isnan(df.kurt()).all() else: assert 0 == s.kurt() - assert isinstance(s.kurt(), np.float64) # GH53482 + if using_python_scalars: + assert isinstance(s.kurt(), float) + else: + assert isinstance(s.kurt(), np.float64) # GH53482 assert (df.kurt() == 0).all() diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 5faacbb5559a9..7615510268a7a 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -330,7 +330,7 @@ def test_add(self, values_for_np_reduce, box_with_array): with pytest.raises(TypeError, match=msg): np.add.reduce(obj) - def test_max(self, values_for_np_reduce, box_with_array): + def test_max(self, values_for_np_reduce, box_with_array, using_python_scalars): box = box_with_array values = values_for_np_reduce @@ -349,12 +349,14 @@ def test_max(self, values_for_np_reduce, box_with_array): tm.assert_series_equal(result, expected) else: expected = values[1] + if using_python_scalars and values.dtype.kind in ["i", "f"]: + expected = expected.item() assert result == expected if same_type: # check we have e.g. Timestamp instead of dt64 assert type(result) == type(expected) - def test_min(self, values_for_np_reduce, box_with_array): + def test_min(self, values_for_np_reduce, box_with_array, using_python_scalars): box = box_with_array values = values_for_np_reduce @@ -372,6 +374,8 @@ def test_min(self, values_for_np_reduce, box_with_array): tm.assert_series_equal(result, expected) else: expected = values[0] + if using_python_scalars and values.dtype.kind in ["i", "f"]: + expected = expected.item() assert result == expected if same_type: # check we have e.g. Timestamp instead of dt64 diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 6788f2056bb9a..1b84a3e578ead 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -1274,17 +1274,20 @@ def test_check_bottleneck_disallow(any_real_numpy_dtype, func): @pytest.mark.parametrize("val", [2**55, -(2**55), 20150515061816532]) -def test_nanmean_overflow(disable_bottleneck, val): +def test_nanmean_overflow(disable_bottleneck, val, using_python_scalars): # GH 10155 # In the previous implementation mean can overflow for int dtypes, it # is now consistent with numpy ser = Series(val, index=range(500), dtype=np.int64) result = ser.mean() - np_result = ser.values.mean() assert result == val - assert result == np_result - assert result.dtype == np.float64 + if using_python_scalars: + assert isinstance(result, float) + else: + np_result = ser.values.mean() + assert result == np_result + assert result.dtype == np.float64 @pytest.mark.parametrize( @@ -1299,13 +1302,18 @@ def test_nanmean_overflow(disable_bottleneck, val): ], ) @pytest.mark.parametrize("method", ["mean", "std", "var", "skew", "kurt", "min", "max"]) -def test_returned_dtype(disable_bottleneck, dtype, method): +def test_returned_dtype(disable_bottleneck, dtype, method, using_python_scalars): if dtype is None: pytest.skip("np.float128 not available") ser = Series(range(10), dtype=dtype) result = getattr(ser, method)() - if is_integer_dtype(dtype) and method not in ["min", "max"]: + if using_python_scalars: + if is_integer_dtype(dtype) and method in ["min", "max"]: + assert isinstance(result, int) + else: + assert isinstance(result, float) + elif is_integer_dtype(dtype) and method not in ["min", "max"]: assert result.dtype == np.float64 else: assert result.dtype == dtype diff --git a/pandas/tests/window/moments/test_moments_consistency_ewm.py b/pandas/tests/window/moments/test_moments_consistency_ewm.py index 49dee50954f4f..b24e1f822e998 100644 --- a/pandas/tests/window/moments/test_moments_consistency_ewm.py +++ b/pandas/tests/window/moments/test_moments_consistency_ewm.py @@ -133,7 +133,10 @@ def test_moments_consistency_var(all_data, adjust, ignore_na, min_periods, bias) var_x = all_data.ewm( com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na ).var(bias=bias) - assert not (var_x < 0).any().any() + if isinstance(all_data, Series): + assert not (var_x < 0).any() + else: + assert not (var_x < 0).any().any() if bias: # check that biased var(x) == mean(x^2) - mean(x)^2 @@ -156,7 +159,10 @@ def test_moments_consistency_var_constant( ).var(bias=bias) # check that variance of constant series is identically 0 - assert not (var_x > 0).any().any() + if isinstance(consistent_data, Series): + assert not (var_x > 0).any() + else: + assert not (var_x > 0).any().any() expected = consistent_data * np.nan expected[count_x >= max(min_periods, 1)] = 0.0 if not bias: @@ -170,12 +176,18 @@ def test_ewm_consistency_std(all_data, adjust, ignore_na, min_periods, bias): var_x = all_data.ewm( com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na ).var(bias=bias) - assert not (var_x < 0).any().any() + if isinstance(all_data, Series): + assert not (var_x < 0).any() + else: + assert not (var_x < 0).any().any() std_x = all_data.ewm( com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na ).std(bias=bias) - assert not (std_x < 0).any().any() + if isinstance(all_data, Series): + assert not (std_x < 0).any() + else: + assert not (std_x < 0).any().any() # check that var(x) == std(x)^2 tm.assert_equal(var_x, std_x * std_x) @@ -183,7 +195,10 @@ def test_ewm_consistency_std(all_data, adjust, ignore_na, min_periods, bias): cov_x_x = all_data.ewm( com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na ).cov(all_data, bias=bias) - assert not (cov_x_x < 0).any().any() + if isinstance(all_data, Series): + assert not (cov_x_x < 0).any() + else: + assert not (cov_x_x < 0).any().any() # check that var(x) == cov(x, x) tm.assert_equal(var_x, cov_x_x) diff --git a/pandas/tests/window/moments/test_moments_consistency_expanding.py b/pandas/tests/window/moments/test_moments_consistency_expanding.py index 7d2fa1ad5d211..894220d831300 100644 --- a/pandas/tests/window/moments/test_moments_consistency_expanding.py +++ b/pandas/tests/window/moments/test_moments_consistency_expanding.py @@ -6,11 +6,17 @@ def no_nans(x): - return x.notna().all().all() + if isinstance(x, Series): + return x.notna().all() + else: + return x.notna().all().all() def all_na(x): - return x.isnull().all().all() + if isinstance(x, Series): + return x.isnull().all() + else: + return x.isnull().all().all() @pytest.mark.parametrize("f", [lambda v: Series(v).sum(), np.nansum, np.sum]) @@ -32,7 +38,10 @@ def test_expanding_apply_consistency_sum_nans(request, all_data, min_periods, f) @pytest.mark.parametrize("ddof", [0, 1]) def test_moments_consistency_var(all_data, min_periods, ddof): var_x = all_data.expanding(min_periods=min_periods).var(ddof=ddof) - assert not (var_x < 0).any().any() + if isinstance(all_data, Series): + assert not (var_x < 0).any() + else: + assert not (var_x < 0).any().any() if ddof == 0: # check that biased var(x) == mean(x^2) - mean(x)^2 @@ -47,7 +56,10 @@ def test_moments_consistency_var_constant(consistent_data, min_periods, ddof): var_x = consistent_data.expanding(min_periods=min_periods).var(ddof=ddof) # check that variance of constant series is identically 0 - assert not (var_x > 0).any().any() + if isinstance(consistent_data, Series): + assert not (var_x > 0).any() + else: + assert not (var_x > 0).any().any() expected = consistent_data * np.nan expected[count_x >= max(min_periods, 1)] = 0.0 if ddof == 1: @@ -58,16 +70,25 @@ def test_moments_consistency_var_constant(consistent_data, min_periods, ddof): @pytest.mark.parametrize("ddof", [0, 1]) def test_expanding_consistency_var_std_cov(all_data, min_periods, ddof): var_x = all_data.expanding(min_periods=min_periods).var(ddof=ddof) - assert not (var_x < 0).any().any() + if isinstance(all_data, Series): + assert not (var_x < 0).any() + else: + assert not (var_x < 0).any().any() std_x = all_data.expanding(min_periods=min_periods).std(ddof=ddof) - assert not (std_x < 0).any().any() + if isinstance(all_data, Series): + assert not (std_x < 0).any() + else: + assert not (std_x < 0).any().any() # check that var(x) == std(x)^2 tm.assert_equal(var_x, std_x * std_x) cov_x_x = all_data.expanding(min_periods=min_periods).cov(all_data, ddof=ddof) - assert not (cov_x_x < 0).any().any() + if isinstance(all_data, Series): + assert not (cov_x_x < 0).any() + else: + assert not (cov_x_x < 0).any().any() # check that var(x) == cov(x, x) tm.assert_equal(var_x, cov_x_x) diff --git a/pandas/tests/window/moments/test_moments_consistency_rolling.py b/pandas/tests/window/moments/test_moments_consistency_rolling.py index be22338c00cb2..80d02eb51199d 100644 --- a/pandas/tests/window/moments/test_moments_consistency_rolling.py +++ b/pandas/tests/window/moments/test_moments_consistency_rolling.py @@ -1,16 +1,24 @@ import numpy as np import pytest +from pandas._config import using_python_scalars + from pandas import Series import pandas._testing as tm def no_nans(x): - return x.notna().all().all() + if using_python_scalars() and isinstance(x, Series): + return x.notna().all() + else: + return x.notna().all().all() def all_na(x): - return x.isnull().all().all() + if using_python_scalars() and isinstance(x, Series): + return x.isnull().all() + else: + return x.isnull().all().all() @pytest.fixture(params=[(1, 0), (5, 1)]) @@ -48,7 +56,10 @@ def test_moments_consistency_var(all_data, rolling_consistency_cases, center, dd var_x = all_data.rolling(window=window, min_periods=min_periods, center=center).var( ddof=ddof ) - assert not (var_x < 0).any().any() + if isinstance(all_data, Series): + assert not (var_x < 0).any() + else: + assert not (var_x < 0).any().any() if ddof == 0: # check that biased var(x) == mean(x^2) - mean(x)^2 @@ -77,7 +88,10 @@ def test_moments_consistency_var_constant( ).var(ddof=ddof) # check that variance of constant series is identically 0 - assert not (var_x > 0).any().any() + if isinstance(consistent_data, Series): + assert not (var_x > 0).any() + else: + assert not (var_x > 0).any().any() expected = consistent_data * np.nan expected[count_x >= max(min_periods, 1)] = 0.0 if ddof == 1: @@ -94,12 +108,18 @@ def test_rolling_consistency_var_std_cov( var_x = all_data.rolling(window=window, min_periods=min_periods, center=center).var( ddof=ddof ) - assert not (var_x < 0).any().any() + if isinstance(all_data, Series): + assert not (var_x < 0).any() + else: + assert not (var_x < 0).any().any() std_x = all_data.rolling(window=window, min_periods=min_periods, center=center).std( ddof=ddof ) - assert not (std_x < 0).any().any() + if isinstance(all_data, Series): + assert not (std_x < 0).any() + else: + assert not (std_x < 0).any().any() # check that var(x) == std(x)^2 tm.assert_equal(var_x, std_x * std_x) @@ -107,7 +127,10 @@ def test_rolling_consistency_var_std_cov( cov_x_x = all_data.rolling( window=window, min_periods=min_periods, center=center ).cov(all_data, ddof=ddof) - assert not (cov_x_x < 0).any().any() + if isinstance(all_data, Series): + assert not (cov_x_x < 0).any() + else: + assert not (cov_x_x < 0).any().any() # check that var(x) == cov(x, x) tm.assert_equal(var_x, cov_x_x) From bd953a26009692c7e7a54988eefb483ae9b8340b Mon Sep 17 00:00:00 2001 From: richard Date: Thu, 6 Nov 2025 17:07:03 -0500 Subject: [PATCH 3/8] Indicate config is experimental --- pandas/core/config_init.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 60a20010d4daa..5a1f24e43ca10 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -903,7 +903,8 @@ def register_converter_cb(key: str) -> None: cf.register_option( "python_scalars", False if os.environ.get("PANDAS_FUTURE_PYTHON_SCALARS", "0") == "0" else True, - "Whether to return Python scalars instead of NumPy or PyArrow scalars", + "Whether to return Python scalars instead of NumPy or PyArrow scalars. " + "Currently experimental, setting to True is not recommended for end users.", validator=is_one_of_factory([True, False]), ) From 0896a2f6482074e0fa093b83b5396e25dec924e5 Mon Sep 17 00:00:00 2001 From: richard Date: Thu, 6 Nov 2025 17:19:52 -0500 Subject: [PATCH 4/8] Add CI job --- .github/workflows/unit-tests.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 6cf9381a70d5b..46d6c7b5400a8 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -31,6 +31,7 @@ jobs: # Prevent the include jobs from overriding other jobs pattern: [""] pandas_future_infer_string: ["1"] + pandas_future_python_scalars: ["0"] include: - name: "Downstream Compat" env_file: actions-313-downstream_compat.yaml @@ -75,6 +76,10 @@ jobs: env_file: actions-313.yaml pandas_future_infer_string: "0" platform: ubuntu-24.04 + - name: "PANDAS_FUTURE_PYTHON_SCALARS=1" + env_file: actions-313.yaml + pandas_future_python_scalars: "1" + platform: ubuntu-24.04 - name: "Numpy Dev" env_file: actions-313-numpydev.yaml pattern: "not slow and not network and not single_cpu" @@ -92,6 +97,7 @@ jobs: LC_ALL: ${{ matrix.lc_all || '' }} PANDAS_CI: '1' PANDAS_FUTURE_INFER_STRING: ${{ matrix.pandas_future_infer_string || '1' }} + PANDAS_FUTURE_PYTHON_SCALARS: ${{ matrix.pandas_future_python_scalars || '0' }} TEST_ARGS: ${{ matrix.test_args || '' }} PYTEST_WORKERS: 'auto' PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }} From 64de4aa5c812894f1158b182217cff9ad6ab1715 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Thu, 13 Nov 2025 17:16:19 -0500 Subject: [PATCH 5/8] Type-hint fixes --- pandas/core/arrays/masked.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 8c8fbef11e82a..901a33c6e267b 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -1840,10 +1840,10 @@ def all( result = maybe_unbox_numpy_scalar(values.all(axis=axis)) if skipna: - return result # type: ignore[return-value] + return result else: if not result or len(self) == 0 or not self._mask.any(): - return result # type: ignore[return-value] + return result else: return self.dtype.na_value From 7ac83a50af87a410bdd81fb73bb4b42ffc9992fa Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Thu, 20 Nov 2025 15:10:21 -0500 Subject: [PATCH 6/8] Cleanup implementation --- .github/workflows/code-checks.yml | 2 -- pandas/conftest.py | 14 ++++++++++++++ pandas/util/_decorators.py | 14 ++++---------- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml index c7bd7a97c9a35..656d0ed21ba05 100644 --- a/.github/workflows/code-checks.yml +++ b/.github/workflows/code-checks.yml @@ -57,8 +57,6 @@ jobs: run: sudo apt-get update && sudo apt-get install -y libegl1 libopengl0 - name: Run doctests - env: - PANDAS_SET_MODULE_DUNDER: 0 run: cd ci && ./code_checks.sh doctests if: ${{ steps.build.outcome == 'success' && always() }} diff --git a/pandas/conftest.py b/pandas/conftest.py index 41fc71190ab64..73a9f7e040c48 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -109,6 +109,20 @@ def pytest_addoption(parser) -> None: ) +def pytest_sessionstart(session): + import doctest + import inspect + + orig = doctest.DocTestFinder._from_module + + def _from_module(self, module, object): + if inspect.isfunction(object) and "." in object.__qualname__: + return True + return orig(self, module, object) + + doctest.DocTestFinder._from_module = _from_module + + def ignore_doctest_warning(item: pytest.Item, path: str, message: str) -> None: """Ignore doctest warning. diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index eeb3fd07ddf0b..dd2ed6c00e48c 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -2,7 +2,6 @@ from functools import wraps import inspect -import os from textwrap import dedent from typing import ( TYPE_CHECKING, @@ -524,15 +523,10 @@ def example(): assert example.__module__ == "pandas" """ - if os.environ.get("PANDAS_SET_MODULE_DUNDER", "1") == "0": - def decorator(func: F) -> F: - return func - else: - - def decorator(func: F) -> F: - if module is not None: - func.__module__ = module - return func + def decorator(func: F) -> F: + if module is not None: + func.__module__ = module + return func return decorator From 4debbdf0c7d7cc31fa795b3f9966f30861f717b3 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Thu, 20 Nov 2025 16:02:56 -0500 Subject: [PATCH 7/8] First pass at docstrings --- pandas/core/accessor.py | 4 +- pandas/core/arrays/datetimes.py | 4 +- pandas/core/arrays/masked.py | 32 +++--- pandas/core/arrays/sparse/array.py | 6 +- pandas/core/dtypes/dtypes.py | 2 +- pandas/core/frame.py | 67 ++++++------- pandas/core/generic.py | 2 +- pandas/core/groupby/generic.py | 32 +++--- pandas/core/indexes/base.py | 56 +++++------ pandas/core/indexes/category.py | 4 +- pandas/core/indexes/multi.py | 12 ++- pandas/core/reshape/merge.py | 2 +- pandas/core/series.py | 151 +++++++++++++++-------------- pandas/io/formats/info.py | 6 +- 14 files changed, 191 insertions(+), 189 deletions(-) diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 01cdc417742bd..4163de0d2cf01 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -416,7 +416,7 @@ def register_dataframe_accessor(name: str) -> Callable[[TypeT], TypeT]: AttributeError: The series must contain integer data only. >>> df = pd.Series([1, 2, 3]) >>> df.int_accessor.sum() -np.int64(6)""" +6""" @set_module("pandas.api.extensions") @@ -481,7 +481,7 @@ def register_series_accessor(name: str) -> Callable[[TypeT], TypeT]: AttributeError: The series must contain integer data only. >>> df = pd.Series([1, 2, 3]) >>> df.int_accessor.sum() - np.int64(6) + 6 """ from pandas import Series diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index f68b9d458751e..6bc50c994caf1 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1046,7 +1046,7 @@ def tz_localize( 4 2018-10-28 02:30:00+01:00 5 2018-10-28 03:00:00+01:00 6 2018-10-28 03:30:00+01:00 - dtype: datetime64[s, CET] + dtype: datetime64[us, CET] In some cases, inferring the DST is impossible. In such cases, you can pass an ndarray to the ambiguous parameter to set the DST explicitly @@ -1058,7 +1058,7 @@ def tz_localize( 0 2018-10-28 01:20:00+02:00 1 2018-10-28 02:36:00+02:00 2 2018-10-28 03:46:00+01:00 - dtype: datetime64[s, CET] + dtype: datetime64[us, CET] If the DST transition causes nonexistent times, you can shift these dates forward or backwards with a timedelta object or `'shift_forward'` diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 112ad2a2e4c3b..e73db1d499a7d 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -1739,25 +1739,25 @@ def any( skips NAs): >>> pd.array([True, False, True]).any() - np.True_ + True >>> pd.array([True, False, pd.NA]).any() - np.True_ + True >>> pd.array([False, False, pd.NA]).any() - np.False_ + False >>> pd.array([], dtype="boolean").any() - np.False_ + False >>> pd.array([pd.NA], dtype="boolean").any() - np.False_ + False >>> pd.array([pd.NA], dtype="Float64").any() - np.False_ + False With ``skipna=False``, the result can be NA if this is logically required (whether ``pd.NA`` is True or False influences the result): >>> pd.array([True, False, pd.NA]).any(skipna=False) - np.True_ + True >>> pd.array([1, 0, pd.NA]).any(skipna=False) - np.True_ + True >>> pd.array([False, False, pd.NA]).any(skipna=False) >>> pd.array([0, 0, pd.NA]).any(skipna=False) @@ -1825,17 +1825,17 @@ def all( skips NAs): >>> pd.array([True, True, pd.NA]).all() - np.True_ + True >>> pd.array([1, 1, pd.NA]).all() - np.True_ + True >>> pd.array([True, False, pd.NA]).all() - np.False_ + False >>> pd.array([], dtype="boolean").all() - np.True_ + True >>> pd.array([pd.NA], dtype="boolean").all() - np.True_ + True >>> pd.array([pd.NA], dtype="Float64").all() - np.True_ + True With ``skipna=False``, the result can be NA if this is logically required (whether ``pd.NA`` is True or False influences the result): @@ -1845,9 +1845,9 @@ def all( >>> pd.array([1, 1, pd.NA]).all(skipna=False) >>> pd.array([True, False, pd.NA]).all(skipna=False) - np.False_ + False >>> pd.array([1, 0, pd.NA]).all(skipna=False) - np.False_ + False """ nv.validate_all((), kwargs) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 7d2285fba9fd0..4c2ca1d54a813 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1288,7 +1288,7 @@ def astype(self, dtype: AstypeArg | None = None, copy: bool = True): IntIndex Indices: array([2, 3], dtype=int32) - >>> arr.astype(SparseDtype(np.dtype("int32"))) + >>> arr.astype(pd.SparseDtype(np.dtype("int32"))) [0, 0, 1, 2] Fill: 0 IntIndex @@ -1297,7 +1297,7 @@ def astype(self, dtype: AstypeArg | None = None, copy: bool = True): Using a NumPy dtype with a different kind (e.g. float) will coerce just ``self.sp_values``. - >>> arr.astype(SparseDtype(np.dtype("float64"))) + >>> arr.astype(pd.SparseDtype(np.dtype("float64"))) ... # doctest: +NORMALIZE_WHITESPACE [nan, nan, 1.0, 2.0] Fill: nan @@ -1306,7 +1306,7 @@ def astype(self, dtype: AstypeArg | None = None, copy: bool = True): Using a SparseDtype, you can also change the fill value as well. - >>> arr.astype(SparseDtype("float64", fill_value=0.0)) + >>> arr.astype(pd.SparseDtype("float64", fill_value=0.0)) ... # doctest: +NORMALIZE_WHITESPACE [0.0, 0.0, 1.0, 2.0] Fill: 0.0 diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 2e3d73edcdf4f..5e1323739d888 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -2049,7 +2049,7 @@ def update_dtype(self, dtype) -> SparseDtype: Examples -------- >>> SparseDtype(int, 0).update_dtype(float) - Sparse[float64, 0.0] + Sparse[float64, np.float64(0.0)] >>> SparseDtype(int, 1).update_dtype(SparseDtype(float, np.nan)) Sparse[float64, nan] diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c8c246434f6d8..6e927fc4e8e44 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -972,7 +972,7 @@ def __dataframe__( >>> df_not_necessarily_pandas = pd.DataFrame({"A": [1, 2], "B": [3, 4]}) >>> interchange_object = df_not_necessarily_pandas.__dataframe__() >>> interchange_object.column_names() - Index(['A', 'B'], dtype='object') + Index(['A', 'B'], dtype='str') >>> df_pandas = pd.api.interchange.from_dataframe( ... interchange_object.select_columns_by_name(["A"]) ... ) @@ -1484,7 +1484,7 @@ def style(self) -> Styler: panda bear polar bear koala marsupial - Name: species, dtype: object + Name: species, dtype: str label: population content: panda 1864 @@ -1536,7 +1536,7 @@ def items(self) -> Iterable[tuple[Hashable, Series]]: panda bear polar bear koala marsupial - Name: species, dtype: object + Name: species, dtype: str label: population content: panda 1864 @@ -3791,7 +3791,7 @@ def memory_usage(self, index: bool = True, deep: bool = False) -> Series: 4 1 1.0 1.0+0.0j 1 True >>> df.memory_usage() - Index 128 + Index 132 int64 40000 float64 40000 complex128 80000 @@ -3810,7 +3810,7 @@ def memory_usage(self, index: bool = True, deep: bool = False) -> Series: The memory footprint of `object` dtype columns is ignored by default: >>> df.memory_usage(deep=True) - Index 128 + Index 132 int64 40000 float64 40000 complex128 80000 @@ -3943,7 +3943,7 @@ def transpose( the `object` dtype: >>> df2.dtypes - name object + name str score float64 employed bool kids int64 @@ -4381,10 +4381,10 @@ def _setitem(self, key, value) -> None: >>> s = pd.Series([10, 20], index=[1, 3]) # Note: index 3 doesn't exist in df >>> df["B"] = s # Assigns by index label, not position >>> df - A B - 0 1 NaN - 1 2 10 - 2 3 NaN + A B + 0 1 NaN + 1 2 10.0 + 2 3 NaN Series assignment with partial index match: @@ -5109,6 +5109,7 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None: 2 48 3 28 4 12 + dtype: int64 Local variables shall be explicitly referenced using ``@`` character in front of the name: @@ -5970,7 +5971,7 @@ def rename( >>> df.index RangeIndex(start=0, stop=3, step=1) >>> df.rename(index=str).index - Index(['0', '1', '2'], dtype='object') + Index(['0', '1', '2'], dtype='str') >>> df.rename(columns={"A": "a", "B": "b", "C": "c"}, errors="raise") Traceback (most recent call last): @@ -6043,7 +6044,7 @@ def pop(self, item: Hashable) -> Series: 1 bird 2 mammal 3 mammal - Name: class, dtype: object + Name: class, dtype: str >>> df name max_speed @@ -7797,14 +7798,14 @@ def value_counts( >>> df first_name middle_name 0 John Smith - 1 Anne - 2 John + 1 Anne NaN + 2 John NaN 3 Beth Louise >>> df.value_counts() first_name middle_name - Beth Louise 1 John Smith 1 + Beth Louise 1 Name: count, dtype: int64 >>> df.value_counts(dropna=False) @@ -9088,10 +9089,10 @@ def combine( ... index=[1, 2], ... ) >>> df2.combine(df1, take_smaller) - A B C - 0 0.0 NaN NaN - 1 0.0 3.0 NaN - 2 NaN 3.0 NaN + B C A + 0 NaN NaN 0.0 + 1 3.0 NaN 0.0 + 2 3.0 NaN NaN >>> df2.combine(df1, take_smaller, overwrite=False) A B C @@ -12776,7 +12777,7 @@ def sem( Examples -------- >>> s = pd.Series([1, 2, 3]) - >>> s.sem().round(6) + >>> round(s.sem(), 6) 0.57735 With a DataFrame @@ -13439,10 +13440,8 @@ def idxmin( >>> df = pd.DataFrame( ... { - ... { - ... "consumption": [10.51, 103.11, 55.48], - ... "co2_emissions": [37.2, 19.66, 1712], - ... } + ... "consumption": [10.51, 103.11, 55.48], + ... "co2_emissions": [37.2, 19.66, 1712], ... }, ... index=["Pork", "Wheat Products", "Beef"], ... ) @@ -13458,7 +13457,7 @@ def idxmin( >>> df.idxmin() consumption Pork co2_emissions Wheat Products - dtype: object + dtype: str To return the index for the minimum value in each row, use ``axis="columns"``. @@ -13542,10 +13541,8 @@ def idxmax( >>> df = pd.DataFrame( ... { - ... { - ... "consumption": [10.51, 103.11, 55.48], - ... "co2_emissions": [37.2, 19.66, 1712], - ... } + ... "consumption": [10.51, 103.11, 55.48], + ... "co2_emissions": [37.2, 19.66, 1712], ... }, ... index=["Pork", "Wheat Products", "Beef"], ... ) @@ -13559,9 +13556,9 @@ def idxmax( By default, it returns the index for the maximum value in each column. >>> df.idxmax() - consumption Wheat Products - co2_emissions Beef - dtype: object + consumption Wheat Products + co2_emissions Beef + dtype: str To return the index for the maximum value in each row, use ``axis="columns"``. @@ -13569,7 +13566,7 @@ def idxmax( Pork co2_emissions Wheat Products consumption Beef co2_emissions - dtype: object + dtype: str """ axis = self._get_axis_number(axis) @@ -14067,7 +14064,7 @@ def to_period( >>> idx DatetimeIndex(['2001-03-31', '2002-05-31', '2003-08-31'], - dtype='datetime64[s]', freq=None) + dtype='datetime64[us]', freq=None) >>> idx.to_period("M") PeriodIndex(['2001-03', '2002-05', '2003-08'], dtype='period[M]') @@ -14296,7 +14293,7 @@ def isin_(x): 0 1 3 1 2 4 >>> df.columns - Index(['A', 'B'], dtype='object') + Index(['A', 'B'], dtype='str') """, ) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 940231233e308..bbfd18f8e42b4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -12998,7 +12998,7 @@ def make_doc(name: str, ndim: int) -> str: Examples -------- >>> s = pd.Series([1, 2, 3]) - >>> s.sem().round(6) + >>> round(s.sem(), 6) 0.57735 With a DataFrame diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 93e04fe61555e..eed1e46369575 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1226,7 +1226,7 @@ def take( 3 parrot 2 2 lion 1 monkey - Name: name, dtype: object + Name: name, dtype: str We may take elements using negative integers for positive indices, starting from the end of the object, just like with Python lists. @@ -1236,7 +1236,7 @@ def take( 4 falcon 2 0 rabbit 1 monkey - Name: name, dtype: object + Name: name, dtype: str """ result = self._op_via_apply("take", indices=indices, **kwargs) return result @@ -2668,19 +2668,19 @@ def idxmax( ... ) >>> df - consumption co2_emissions - Pork 10.51 37.20 - Wheat Products 103.11 19.66 - Beef 55.48 1712.00 + consumption co2_emissions food_type + Pork 10.51 37.20 meat + Wheat Products 103.11 19.66 plant + Beef 55.48 1712.00 meat By default, it returns the index for the maximum value in each column according to the group. >>> df.groupby("food_type").idxmax() - consumption co2_emissions + consumption co2_emissions food_type - animal Beef Beef - plant Wheat Products Wheat Products + meat Beef Beef + plant Wheat Products Wheat Products """ return self._idxmax_idxmin("idxmax", numeric_only=numeric_only, skipna=skipna) @@ -2740,19 +2740,19 @@ def idxmin( ... ) >>> df - consumption co2_emissions - Pork 10.51 37.20 - Wheat Products 103.11 19.66 - Beef 55.48 1712.00 + consumption co2_emissions food_type + Pork 10.51 37.20 meat + Wheat Products 103.11 19.66 plant + Beef 55.48 1712.00 meat By default, it returns the index for the minimum value in each column according to the group. >>> df.groupby("food_type").idxmin() - consumption co2_emissions + consumption co2_emissions food_type - animal Pork Pork - plant Wheat Products Wheat Products + meat Pork Pork + plant Wheat Products Wheat Products """ return self._idxmax_idxmin("idxmin", numeric_only=numeric_only, skipna=skipna) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e8b30138423b9..2deed450c2072 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1019,7 +1019,7 @@ def ravel(self, order: str_t = "C") -> Self: -------- >>> s = pd.Series([1, 2, 3], index=["a", "b", "c"]) >>> s.index.ravel() - Index(['a', 'b', 'c'], dtype='object') + Index(['a', 'b', 'c'], dtype='str') """ return self[:] @@ -1349,11 +1349,11 @@ def repeat(self, repeats, axis: None = None) -> Self: -------- >>> idx = pd.Index(["a", "b", "c"]) >>> idx - Index(['a', 'b', 'c'], dtype='object') + Index(['a', 'b', 'c'], dtype='str') >>> idx.repeat(2) - Index(['a', 'a', 'b', 'b', 'c', 'c'], dtype='object') + Index(['a', 'a', 'b', 'b', 'c', 'c'], dtype='str') >>> idx.repeat([1, 2, 3]) - Index(['a', 'b', 'b', 'c', 'c', 'c'], dtype='object') + Index(['a', 'b', 'b', 'c', 'c', 'c'], dtype='str') """ repeats = ensure_platform_int(repeats) nv.validate_repeat((), {"axis": axis}) @@ -1674,7 +1674,7 @@ def to_series(self, index=None, name: Hashable | None = None) -> Series: Ant Ant Bear Bear Cow Cow - Name: animal, dtype: object + Name: animal, dtype: str To enforce a new index, specify new labels to ``index``: @@ -1682,7 +1682,7 @@ def to_series(self, index=None, name: Hashable | None = None) -> Series: 0 Ant 1 Bear 2 Cow - Name: animal, dtype: object + Name: animal, dtype: str To override the name of the resulting column, specify ``name``: @@ -1691,7 +1691,7 @@ def to_series(self, index=None, name: Hashable | None = None) -> Series: Ant Ant Bear Bear Cow Cow - Name: zoo, dtype: object + Name: zoo, dtype: str """ from pandas import Series @@ -2080,7 +2080,7 @@ def rename(self, name, *, inplace: bool = False) -> Self | None: -------- >>> idx = pd.Index(["A", "C", "A", "B"], name="score") >>> idx.rename("grade") - Index(['A', 'C', 'A', 'B'], dtype='object', name='grade') + Index(['A', 'C', 'A', 'B'], dtype='str', name='grade') >>> idx = pd.MultiIndex.from_product( ... [["python", "cobra"], [2018, 2019]], names=["kind", "year"] @@ -2222,12 +2222,12 @@ def _get_level_values(self, level) -> Index: -------- >>> idx = pd.Index(list("abc")) >>> idx - Index(['a', 'b', 'c'], dtype='object') + Index(['a', 'b', 'c'], dtype='str') Get level values by supplying `level` as integer: >>> idx.get_level_values(0) - Index(['a', 'b', 'c'], dtype='object') + Index(['a', 'b', 'c'], dtype='str') """ self._validate_index_level(level) return self @@ -2668,7 +2668,7 @@ def isna(self) -> npt.NDArray[np.bool_]: >>> idx = pd.Index(["black", "", "red", None]) >>> idx - Index(['black', '', 'red', None], dtype='object') + Index(['black', '', 'red', nan], dtype='str') >>> idx.isna() array([False, False, False, True]) @@ -2679,7 +2679,7 @@ def isna(self) -> npt.NDArray[np.bool_]: ... ) >>> idx DatetimeIndex(['1940-04-25', 'NaT', 'NaT', 'NaT'], - dtype='datetime64[s]', freq=None) + dtype='datetime64[us]', freq=None) >>> idx.isna() array([False, True, True, True]) """ @@ -2725,7 +2725,7 @@ def notna(self) -> npt.NDArray[np.bool_]: >>> idx = pd.Index(["black", "", "red", None]) >>> idx - Index(['black', '', 'red', None], dtype='object') + Index(['black', '', 'red', nan], dtype='str') >>> idx.notna() array([ True, True, True, False]) """ @@ -2878,18 +2878,18 @@ def drop_duplicates(self, *, keep: DropKeep = "first") -> Self: set of duplicated entries. The default value of keep is 'first'. >>> idx.drop_duplicates(keep="first") - Index(['llama', 'cow', 'beetle', 'hippo'], dtype='object') + Index(['llama', 'cow', 'beetle', 'hippo'], dtype='str') The value 'last' keeps the last occurrence for each set of duplicated entries. >>> idx.drop_duplicates(keep="last") - Index(['cow', 'beetle', 'llama', 'hippo'], dtype='object') + Index(['cow', 'beetle', 'llama', 'hippo'], dtype='str') The value ``False`` discards all sets of duplicated entries. >>> idx.drop_duplicates(keep=False) - Index(['cow', 'beetle', 'hippo'], dtype='object') + Index(['cow', 'beetle', 'hippo'], dtype='str') """ if self.is_unique: return self._view() @@ -4201,9 +4201,9 @@ def reindex( -------- >>> idx = pd.Index(["car", "bike", "train", "tractor"]) >>> idx - Index(['car', 'bike', 'train', 'tractor'], dtype='object') + Index(['car', 'bike', 'train', 'tractor'], dtype='str') >>> idx.reindex(["car", "bike"]) - (Index(['car', 'bike'], dtype='object'), array([0, 1])) + (Index(['car', 'bike'], dtype='str'), array([0, 1])) """ # GH6552: preserve names when reindexing to non-named target # (i.e. neither Index nor Series). @@ -5161,9 +5161,9 @@ def where(self, cond, other=None) -> Index: -------- >>> idx = pd.Index(["car", "bike", "train", "tractor"]) >>> idx - Index(['car', 'bike', 'train', 'tractor'], dtype='object') + Index(['car', 'bike', 'train', 'tractor'], dtype='str') >>> idx.where(idx.isin(["car", "train"]), "other") - Index(['car', 'other', 'train', 'other'], dtype='object') + Index(['car', 'other', 'train', 'other'], dtype='str') """ if isinstance(self, ABCMultiIndex): raise NotImplementedError( @@ -5506,7 +5506,7 @@ def equals(self, other: Any) -> bool: >>> idx2 = pd.Index(["1", "2", "3"]) >>> idx2 - Index(['1', '2', '3'], dtype='object') + Index(['1', '2', '3'], dtype='str') >>> idx1.equals(idx2) False @@ -5961,14 +5961,14 @@ def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]: -------- >>> idx = pd.Index(["b", "a", "d", "c"]) >>> idx - Index(['b', 'a', 'd', 'c'], dtype='object') + Index(['b', 'a', 'd', 'c'], dtype='str') >>> order = idx.argsort() >>> order array([1, 0, 3, 2]) >>> idx[order] - Index(['a', 'b', 'c', 'd'], dtype='object') + Index(['a', 'b', 'c', 'd'], dtype='str') """ # This works for either ndarray or EA, is overridden # by RangeIndex, MultIIndex @@ -6491,17 +6491,17 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None): -------- >>> idx = pd.Index([1, 2, 3]) >>> idx.map({1: "a", 2: "b", 3: "c"}) - Index(['a', 'b', 'c'], dtype='object') + Index(['a', 'b', 'c'], dtype='str') Using `map` with a function: >>> idx = pd.Index([1, 2, 3]) >>> idx.map("I am a {}".format) - Index(['I am a 1', 'I am a 2', 'I am a 3'], dtype='object') + Index(['I am a 1', 'I am a 2', 'I am a 3'], dtype='str') >>> idx = pd.Index(["a", "b", "c"]) >>> idx.map(lambda x: x.upper()) - Index(['A', 'B', 'C'], dtype='object') + Index(['A', 'B', 'C'], dtype='str') """ from pandas.core.indexes.multi import MultiIndex @@ -7137,7 +7137,7 @@ def drop( -------- >>> idx = pd.Index(["a", "b", "c"]) >>> idx.drop(["a"]) - Index(['b', 'c'], dtype='object') + Index(['b', 'c'], dtype='str') """ if not isinstance(labels, Index): # avoid materializing e.g. RangeIndex @@ -7175,7 +7175,7 @@ def infer_objects(self, copy: bool = True) -> Index: Examples -------- >>> pd.Index(["a", 1]).infer_objects() - Index(['a', '1'], dtype='object') + Index(['a', 1], dtype='object') >>> pd.Index([1, 2], dtype="object").infer_objects() Index([1, 2], dtype='int64') """ diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index cbefaac77dd82..3e8fbce5ebada 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -508,13 +508,13 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None): If the mapping is not one-to-one an :class:`~pandas.Index` is returned: >>> idx.map({"a": "first", "b": "second", "c": "first"}) - Index(['first', 'second', 'first'], dtype='object') + Index(['first', 'second', 'first'], dtype='str') If a `dict` is used, all unmapped categories are mapped to `NaN` and the result is an :class:`~pandas.Index`: >>> idx.map({"a": "first", "b": "second"}) - Index(['first', 'second', nan], dtype='object') + Index(['first', 'second', nan], dtype='str') """ mapped = self._values.map(mapper, na_action=na_action) return Index(mapped, name=self.name) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 43e6469e078f0..4e8f74c1636d7 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1851,9 +1851,9 @@ def get_level_values(self, level) -> Index: Get level values by supplying level as either integer or name: >>> mi.get_level_values(0) - Index(['a', 'b', 'c'], dtype='object', name='level_1') + Index(['a', 'b', 'c'], dtype='str', name='level_1') >>> mi.get_level_values("level_2") - Index(['d', 'e', 'f'], dtype='object', name='level_2') + Index(['d', 'e', 'f'], dtype='str', name='level_2') If a level contains missing values, the return type of the level may be cast to ``float``. @@ -2481,7 +2481,9 @@ def argsort( -------- >>> midx = pd.MultiIndex.from_arrays([[3, 2], ["e", "c"]]) >>> midx - MultiIndex([(3, 'e'), (2, 'c')]) + MultiIndex([(3, 'e'), + (2, 'c')], + ) >>> order = midx.argsort() >>> order @@ -3418,10 +3420,10 @@ def get_loc_level(self, key, level: IndexLabel = 0, drop_level: bool = True): >>> mi = pd.MultiIndex.from_arrays([list("abb"), list("def")], names=["A", "B"]) >>> mi.get_loc_level("b") - (slice(1, 3, None), Index(['e', 'f'], dtype='object', name='B')) + (slice(1, 3, None), Index(['e', 'f'], dtype='str', name='B')) >>> mi.get_loc_level("e", level="B") - (array([False, True, False]), Index(['b'], dtype='object', name='A')) + (array([False, True, False]), Index(['b'], dtype='str', name='A')) >>> mi.get_loc_level(["b", "e"]) (1, None) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 0de300dcaf55f..73fbcc75ebd57 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -327,7 +327,7 @@ def merge( Traceback (most recent call last): ... ValueError: columns overlap but no suffix specified: - Index(['value'], dtype='object') + Index(['value'], dtype='str') >>> df1 = pd.DataFrame({"a": ["foo", "bar"], "b": [1, 2]}) >>> df2 = pd.DataFrame({"a": ["foo", "baz"], "c": [3, 4]}) diff --git a/pandas/core/series.py b/pandas/core/series.py index 235b9f5389556..011e477d232c3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1218,7 +1218,7 @@ def repeat(self, repeats: int | Sequence[int], axis: None = None) -> Series: 0 a 1 b 2 c - dtype: object + dtype: str >>> s.repeat(2) 0 a 0 a @@ -1226,7 +1226,7 @@ def repeat(self, repeats: int | Sequence[int], axis: None = None) -> Series: 1 b 2 c 2 c - dtype: object + dtype: str >>> s.repeat([1, 2, 3]) 0 a 1 b @@ -1234,7 +1234,7 @@ def repeat(self, repeats: int | Sequence[int], axis: None = None) -> Series: 2 c 2 c 2 c - dtype: object + dtype: str """ nv.validate_repeat((), {"axis": axis}) new_index = self.index.repeat(repeats) @@ -2227,7 +2227,7 @@ def drop_duplicates( 3 beetle 4 llama 5 hippo - Name: animal, dtype: object + Name: animal, dtype: str With the 'keep' parameter, the selection behavior of duplicated values can be changed. The value 'first' keeps the first occurrence for each @@ -2238,7 +2238,7 @@ def drop_duplicates( 1 cow 3 beetle 5 hippo - Name: animal, dtype: object + Name: animal, dtype: str The value 'last' for parameter 'keep' keeps the last occurrence for each set of duplicated entries. @@ -2248,7 +2248,7 @@ def drop_duplicates( 3 beetle 4 llama 5 hippo - Name: animal, dtype: object + Name: animal, dtype: str The value ``False`` for parameter 'keep' discards all sets of duplicated entries. @@ -2257,7 +2257,7 @@ def drop_duplicates( 1 cow 3 beetle 5 hippo - Name: animal, dtype: object + Name: animal, dtype: str """ inplace = validate_bool_kwarg(inplace, "inplace") result = super().drop_duplicates(keep=keep) @@ -2602,7 +2602,7 @@ def quantile( return self._constructor(result, index=idx, name=self.name) else: # scalar - return result.iloc[0] + return maybe_unbox_numpy_scalar(result.iloc[0]) def corr( self, @@ -2689,9 +2689,11 @@ def corr( other_values = other.to_numpy(dtype=float, na_value=np.nan, copy=False) if method in ["pearson", "spearman", "kendall"] or callable(method): - return nanops.nancorr( + result = nanops.nancorr( this_values, other_values, method=method, min_periods=min_periods ) + result = maybe_unbox_numpy_scalar(result) + return result raise ValueError( "method must be either 'pearson', " @@ -2743,9 +2745,11 @@ def cov( return np.nan this_values = this.to_numpy(dtype=float, na_value=np.nan, copy=False) other_values = other.to_numpy(dtype=float, na_value=np.nan, copy=False) - return nanops.nancov( + result = nanops.nancov( this_values, other_values, min_periods=min_periods, ddof=ddof ) + result = maybe_unbox_numpy_scalar(result) + return result @doc( klass="Series", @@ -2958,11 +2962,12 @@ def dot(self, other: AnyArrayLike | DataFrame) -> Series | np.ndarray: np.dot(lvals, rvals), index=other.columns, copy=False, dtype=common_type ).__finalize__(self, method="dot") elif isinstance(other, Series): - return np.dot(lvals, rvals) + result = np.dot(lvals, rvals) elif isinstance(rvals, np.ndarray): - return np.dot(lvals, rvals) + result = np.dot(lvals, rvals) else: # pragma: no cover raise TypeError(f"unsupported type: {type(other)}") + return maybe_unbox_numpy_scalar(result) def __matmul__(self, other): """ @@ -3066,7 +3071,7 @@ def compare( other a 3 self d other b - dtype: object + dtype: str Keep all original rows @@ -3312,7 +3317,7 @@ def update(self, other: Series | Sequence | Mapping) -> None: 0 d 1 b 2 e - dtype: object + dtype: str >>> s = pd.Series([1, 2, 3]) >>> s.update(pd.Series([4, 5, 6, 7, 8])) @@ -3511,7 +3516,7 @@ def sort_values( 2 d 3 a 4 c - dtype: object + dtype: str >>> s.sort_values() 3 a @@ -3519,7 +3524,7 @@ def sort_values( 4 c 2 d 0 z - dtype: object + dtype: str Sort using a key function. Your `key` function will be given the ``Series`` of values and should return an array-like. @@ -3531,14 +3536,14 @@ def sort_values( 0 a 2 c 4 e - dtype: object + dtype: str >>> s.sort_values(key=lambda x: x.str.lower()) 0 a 1 B 2 c 3 D 4 e - dtype: object + dtype: str NumPy ufuncs work well here. For example, we can sort by the ``sin`` of the value @@ -3718,7 +3723,7 @@ def sort_index( 2 b 3 a 4 d - dtype: object + dtype: str Sort Descending @@ -3727,7 +3732,7 @@ def sort_index( 3 a 2 b 1 c - dtype: object + dtype: str By default NaNs are put at the end, but use `na_position` to place them at the beginning @@ -3738,7 +3743,7 @@ def sort_index( 1.0 c 2.0 b 3.0 a - dtype: object + dtype: str Specify index level to sort @@ -4111,11 +4116,11 @@ def swaplevel( ... ], ... ) >>> s - Final exam History January A - Geography February B - Coursework History March A - Geography April C - dtype: object + Final exam History January A + Geography February B + Coursework History March A + Geography April C + dtype: str In the following example, we will swap the levels of the indices. Here, we will swap the levels column-wise, but levels can be swapped row-wise @@ -4124,11 +4129,11 @@ def swaplevel( last indices. >>> s.swaplevel() - Final exam January History A - February Geography B - Coursework March History A - April Geography C - dtype: object + Final exam January History A + February Geography B + Coursework March History A + April Geography C + dtype: str By supplying one argument, we can choose which index to swap the last index with. We can for example swap the first index with the last one as @@ -4139,7 +4144,7 @@ def swaplevel( February Geography Final exam B March History Coursework A April Geography Coursework C - dtype: object + dtype: str We can also define explicitly which indices we want to swap by supplying values for both i and j. Here, we for example swap the first and second indices. @@ -4149,7 +4154,7 @@ def swaplevel( Geography Final exam February B History Coursework March A Geography Coursework April C - dtype: object + dtype: str """ self._check_copy_deprecation(copy) assert isinstance(self.index, MultiIndex) @@ -4413,7 +4418,7 @@ def map( 1 dog 2 NaN 3 rabbit - dtype: object + dtype: str ``map`` accepts a ``dict`` or a ``Series``. Values that are not found in the ``dict`` are converted to ``NaN``, unless the dict has a default @@ -4424,7 +4429,7 @@ def map( 1 puppy 2 NaN 3 NaN - dtype: object + dtype: str It also accepts a function: @@ -4433,7 +4438,7 @@ def map( 1 I am a dog 2 I am a nan 3 I am a rabbit - dtype: object + dtype: str To avoid applying the function to missing values (and keep them as ``NaN``) ``na_action='ignore'`` can be used: @@ -4443,7 +4448,7 @@ def map( 1 I am a dog 2 NaN 3 I am a rabbit - dtype: object + dtype: str For categorical data, the function is only applied to the categories: @@ -4696,7 +4701,7 @@ def transform( Examples -------- - >>> df = pd.DataFrame({{"A": range(3), "B": range(1, 4)}}) + >>> df = pd.DataFrame({"A": range(3), "B": range(1, 4)}) >>> df A B 0 0 1 @@ -4727,19 +4732,17 @@ def transform( >>> df = pd.DataFrame( ... { - ... { - ... "Date": [ - ... "2015-05-08", - ... "2015-05-07", - ... "2015-05-06", - ... "2015-05-05", - ... "2015-05-08", - ... "2015-05-07", - ... "2015-05-06", - ... "2015-05-05", - ... ], - ... "Data": [5, 8, 6, 1, 50, 100, 60, 120], - ... } + ... "Date": [ + ... "2015-05-08", + ... "2015-05-07", + ... "2015-05-06", + ... "2015-05-05", + ... "2015-05-08", + ... "2015-05-07", + ... "2015-05-06", + ... "2015-05-05", + ... ], + ... "Data": [5, 8, 6, 1, 50, 100, 60, 120], ... } ... ) >>> df @@ -4765,10 +4768,8 @@ def transform( >>> df = pd.DataFrame( ... { - ... { - ... "c": [1, 1, 1, 2, 2, 2, 2], - ... "type": ["m", "n", "o", "m", "m", "n", "n"], - ... } + ... "c": [1, 1, 1, 2, 2, 2, 2], + ... "type": ["m", "n", "o", "m", "m", "n", "n"], ... } ... ) >>> df @@ -5288,7 +5289,7 @@ def reindex( # type: ignore[override] >>> date_index = pd.date_range("1/1/2010", periods=6, freq="D") >>> df2 = pd.DataFrame( - ... {{"prices": [100, 101, np.nan, 100, 89, 88]}}, index=date_index + ... {"prices": [100, 101, np.nan, 100, 89, 88]}, index=date_index ... ) >>> df2 prices @@ -5451,13 +5452,13 @@ def rename_axis( 0 dog 1 cat 2 monkey - dtype: object + dtype: str >>> s.rename_axis("animal") animal 0 dog 1 cat 2 monkey - dtype: object + dtype: str """ return super().rename_axis( mapper=mapper, @@ -5643,7 +5644,7 @@ def pop(self, item: Hashable) -> Any: 2 3 dtype: int64 """ - return super().pop(item=item) + return maybe_unbox_numpy_scalar(super().pop(item=item)) def info( self, @@ -5713,17 +5714,17 @@ def info( Series name: None Non-Null Count Dtype -------------- ----- - 5 non-null object - dtypes: object(1) - memory usage: 80.0+ bytes + 5 non-null str + dtypes: str(1) + memory usage: 106.0 bytes Prints a summary excluding information about its values: >>> s.info(verbose=False) Index: 5 entries, 1 to 5 - dtypes: object(1) - memory usage: 80.0+ bytes + dtypes: str(1) + memory usage: 106.0 bytes Pipe output of Series.info to buffer instead of sys.stdout, get buffer content and writes to a text file: @@ -5747,9 +5748,9 @@ def info( Series name: None Non-Null Count Dtype -------------- ----- - 1000000 non-null object - dtypes: object(1) - memory usage: 7.6+ MB + 1000000 non-null str + dtypes: str(1) + memory usage: 8.6 MB >>> s.info(memory_usage="deep") @@ -5757,9 +5758,9 @@ def info( Series name: None Non-Null Count Dtype -------------- ----- - 1000000 non-null object - dtypes: object(1) - memory usage: 55.3 MB + 1000000 non-null str + dtypes: str(1) + memory usage: 8.6 MB """ return SeriesInfo(self, memory_usage).render( buf=buf, @@ -5799,7 +5800,7 @@ def memory_usage(self, index: bool = True, deep: bool = False) -> int: -------- >>> s = pd.Series(range(3)) >>> s.memory_usage() - 152 + 156 Not including the index gives the size of the rest of the data, which is necessarily smaller: @@ -5811,11 +5812,13 @@ def memory_usage(self, index: bool = True, deep: bool = False) -> int: >>> s = pd.Series(["a", "b"]) >>> s.values - array(['a', 'b'], dtype=object) + + ['a', 'b'] + Length: 2, dtype: str >>> s.memory_usage() - 144 + 150 >>> s.memory_usage(deep=True) - 244 + 150 """ v = self._memory_usage(deep=deep) if index: diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py index dac523898092a..f21e89322fcc8 100644 --- a/pandas/io/formats/info.py +++ b/pandas/io/formats/info.py @@ -78,10 +78,10 @@ # Column Non-Null Count Dtype --- ------ -------------- ----- 0 int_col 5 non-null int64 - 1 text_col 5 non-null object + 1 text_col 5 non-null str 2 float_col 5 non-null float64 - dtypes: float64(1), int64(1), object(1) - memory usage: 248.0+ bytes + dtypes: float64(1), int64(1), str(1) + memory usage: 278.0 bytes Prints a summary of columns count and its dtypes but not per column information: From 82a5fbae9081f392b73363756eac333d6427d6f5 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Thu, 20 Nov 2025 16:45:13 -0500 Subject: [PATCH 8/8] Finish up --- .github/workflows/code-checks.yml | 2 +- pandas/conftest.py | 5 ++-- pandas/core/frame.py | 39 ++++++++++++++++--------------- pandas/core/generic.py | 4 ++-- pandas/core/indexes/base.py | 8 ++++--- pandas/core/indexes/interval.py | 3 ++- pandas/core/indexes/multi.py | 11 ++++++--- pandas/io/formats/info.py | 24 +++++++++---------- pandas/tests/series/test_ufunc.py | 7 ++++-- 9 files changed, 58 insertions(+), 45 deletions(-) diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml index 656d0ed21ba05..2a26df5f5cde1 100644 --- a/.github/workflows/code-checks.yml +++ b/.github/workflows/code-checks.yml @@ -57,7 +57,7 @@ jobs: run: sudo apt-get update && sudo apt-get install -y libegl1 libopengl0 - name: Run doctests - run: cd ci && ./code_checks.sh doctests + run: cd ci && PANDAS_FUTURE_PYTHON_SCALARS="1" ./code_checks.sh doctests if: ${{ steps.build.outcome == 'success' && always() }} - name: Install pandas in editable mode diff --git a/pandas/conftest.py b/pandas/conftest.py index 73a9f7e040c48..e7d35dae038d3 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -149,14 +149,15 @@ def pytest_collection_modifyitems(items, config) -> None: # Warnings from doctests that can be ignored; place reason in comment above. # Each entry specifies (path, message) - see the ignore_doctest_warning function ignored_doctest_warnings = [ - ("api.interchange.from_dataframe", ".*Interchange Protocol is deprecated"), + ("api.interchange.from_dataframe", "The DataFrame Interchange Protocol"), ("is_int64_dtype", "is_int64_dtype is deprecated"), ("is_interval_dtype", "is_interval_dtype is deprecated"), ("is_period_dtype", "is_period_dtype is deprecated"), ("is_datetime64tz_dtype", "is_datetime64tz_dtype is deprecated"), ("is_categorical_dtype", "is_categorical_dtype is deprecated"), ("is_sparse", "is_sparse is deprecated"), - ("DataFrame.__dataframe__", "Interchange Protocol is deprecated"), + ("CategoricalDtype._from_values_or_dtype", "Constructing a Categorical"), + ("DataFrame.__dataframe__", "The DataFrame Interchange Protocol"), ("DataFrameGroupBy.fillna", "DataFrameGroupBy.fillna is deprecated"), ("DataFrameGroupBy.corrwith", "DataFrameGroupBy.corrwith is deprecated"), ("NDFrame.replace", "Series.replace without 'value'"), diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6e927fc4e8e44..a2b6e6435dcad 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -89,6 +89,7 @@ infer_dtype_from_scalar, invalidate_string_dtypes, maybe_downcast_to_dtype, + maybe_unbox_numpy_scalar, ) from pandas.core.dtypes.common import ( infer_dtype_from_object, @@ -3822,7 +3823,7 @@ def memory_usage(self, index: bool = True, deep: bool = False) -> Series: many repeated values. >>> df["object"].astype("category").memory_usage(deep=True) - 5136 + 5140 """ result = self._constructor_sliced( [c.memory_usage(index=False, deep=deep) for col, c in self.items()], @@ -4392,11 +4393,11 @@ def _setitem(self, key, value) -> None: >>> s = pd.Series([100, 200], index=["b", "d"]) >>> df["B"] = s >>> df - A B - a 1 NaN - b 2 100 - c 3 NaN - d 4 200 + A B + a 1 NaN + b 2 100.0 + c 3 NaN + d 4 200.0 Series index labels NOT in DataFrame, ignored: @@ -4408,7 +4409,6 @@ def _setitem(self, key, value) -> None: x 1 10 y 2 20 z 3 50 - # Values for 'a' and 'b' are completely ignored! """ key = com.apply_if_callable(key, self) @@ -5121,6 +5121,7 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None: 2 6 3 8 4 10 + Name: A, dtype: int64 """ from pandas.core.computation.eval import eval as _eval @@ -7810,10 +7811,10 @@ def value_counts( >>> df.value_counts(dropna=False) first_name middle_name + John Smith 1 Anne NaN 1 + John NaN 1 Beth Louise 1 - John Smith 1 - NaN 1 Name: count, dtype: int64 >>> df.value_counts("first_name") @@ -9095,10 +9096,10 @@ def combine( 2 3.0 NaN NaN >>> df2.combine(df1, take_smaller, overwrite=False) - A B C - 0 0.0 NaN NaN - 1 0.0 3.0 1.0 - 2 NaN 3.0 1.0 + B C A + 0 NaN NaN 0.0 + 1 3.0 1.0 0.0 + 2 3.0 1.0 NaN """ other_idxlen = len(other.index) # save for compare other_columns = other.columns @@ -10954,8 +10955,8 @@ def apply( ``apply`` has type stability (variables in the function do not change their type during the execution). - >>> import bodo - >>> df.apply(lambda x: x.A + x.B, axis=1, engine=bodo.jit) + >>> import bodo # doctest: +SKIP + >>> df.apply(lambda x: x.A + x.B, axis=1, engine=bodo.jit) # doctest: +SKIP Note that JIT compilation is only recommended for functions that take a significant amount of time to run. Fast functions are unlikely to run faster @@ -12131,7 +12132,7 @@ def _get_data() -> DataFrame: df = df.astype(dtype) arr = concat_compat(list(df._iter_column_arrays())) return arr._reduce(name, skipna=skipna, keepdims=False, **kwds) - return func(df.values) + return maybe_unbox_numpy_scalar(func(df.values)) elif axis == 1: if len(df.index) == 0: # Taking a transpose would result in no columns, losing the dtype. @@ -13283,8 +13284,8 @@ def kurt( With axis=None - >>> df.kurt(axis=None).round(6) - -0.988693 + >>> df.kurt(axis=None) + -0.9886927196984727 Using axis=1 @@ -13465,7 +13466,7 @@ def idxmin( Pork consumption Wheat Products co2_emissions Beef consumption - dtype: object + dtype: str """ axis = self._get_axis_number(axis) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bbfd18f8e42b4..4072341c65bd1 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -13116,8 +13116,8 @@ def make_doc(name: str, ndim: int) -> str: With axis=None - >>> df.kurt(axis=None).round(6) - -0.988693 + >>> df.kurt(axis=None) + -0.9886927196984727 Using axis=1 diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 2deed450c2072..db8c2ae9e8930 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -6792,7 +6792,7 @@ def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left" pos = self[::-1].searchsorted( label, side="right" if side == "left" else "left" ) - return len(self) - pos + return maybe_unbox_numpy_scalar(len(self) - pos) raise ValueError("index must be monotonic increasing or decreasing") @@ -6979,6 +6979,8 @@ def slice_locs( if start_slice == -1: start_slice -= len(self) + start_slice = maybe_unbox_numpy_scalar(start_slice) + end_slice = maybe_unbox_numpy_scalar(end_slice) return start_slice, end_slice def delete( @@ -7398,7 +7400,7 @@ def any(self, *args, **kwargs): # i.e. EA, call _reduce instead of "any" to get TypeError instead # of AttributeError return vals._reduce("any") - return np.any(vals) + return maybe_unbox_numpy_scalar(np.any(vals)) def all(self, *args, **kwargs): """ @@ -7446,7 +7448,7 @@ def all(self, *args, **kwargs): # i.e. EA, call _reduce instead of "all" to get TypeError instead # of AttributeError return vals._reduce("all") - return np.all(vals) + return maybe_unbox_numpy_scalar(np.all(vals)) @final def _maybe_disable_logical_methods(self, opname: str_t) -> None: diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 37b1838665ee9..8d26ff9bf9501 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -40,6 +40,7 @@ infer_dtype_from_scalar, maybe_box_datetimelike, maybe_downcast_numeric, + maybe_unbox_numpy_scalar, maybe_upcast_numeric_to_64bit, ) from pandas.core.dtypes.common import ( @@ -804,7 +805,7 @@ def get_loc(self, key) -> int | slice | np.ndarray: if matches == 0: raise KeyError(key) if matches == 1: - return mask.argmax() + return maybe_unbox_numpy_scalar(mask.argmax()) res = lib.maybe_booleans_to_slice(mask.view("u1")) if isinstance(res, slice) and res.stop is None: diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 4e8f74c1636d7..c4312b726fb1e 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -58,7 +58,10 @@ ) from pandas.util._exceptions import find_stack_level -from pandas.core.dtypes.cast import coerce_indexer_dtype +from pandas.core.dtypes.cast import ( + coerce_indexer_dtype, + maybe_unbox_numpy_scalar, +) from pandas.core.dtypes.common import ( ensure_int64, ensure_platform_int, @@ -3115,7 +3118,9 @@ def get_slice_bound( """ if not isinstance(label, tuple): label = (label,) - return self._partial_tup_index(label, side=side) + result = self._partial_tup_index(label, side=side) + result = maybe_unbox_numpy_scalar(result) + return result def slice_locs(self, start=None, end=None, step=None) -> tuple[int, int]: """ @@ -3702,7 +3707,7 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): if start == end: # The label is present in self.levels[level] but unused: raise KeyError(key) - return slice(start, end) + return slice(maybe_unbox_numpy_scalar(start), maybe_unbox_numpy_scalar(end)) def get_locs(self, seq) -> npt.NDArray[np.intp]: """ diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py index f21e89322fcc8..7b6600e699f63 100644 --- a/pandas/io/formats/info.py +++ b/pandas/io/formats/info.py @@ -90,8 +90,8 @@ RangeIndex: 5 entries, 0 to 4 Columns: 3 entries, int_col to float_col - dtypes: float64(1), int64(1), object(1) - memory usage: 248.0+ bytes + dtypes: float64(1), int64(1), str(1) + memory usage: 278.0 bytes Pipe output of DataFrame.info to buffer instead of sys.stdout, get buffer content and writes to a text file: @@ -120,11 +120,11 @@ Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- - 0 column_1 1000000 non-null object - 1 column_2 1000000 non-null object - 2 column_3 1000000 non-null object - dtypes: object(3) - memory usage: 22.9+ MB + 0 column_1 1000000 non-null str + 1 column_2 1000000 non-null str + 2 column_3 1000000 non-null str + dtypes: str(3) + memory usage: 25.7 MB >>> df.info(memory_usage='deep') @@ -132,11 +132,11 @@ Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- - 0 column_1 1000000 non-null object - 1 column_2 1000000 non-null object - 2 column_3 1000000 non-null object - dtypes: object(3) - memory usage: 165.9 MB""" + 0 column_1 1000000 non-null str + 1 column_2 1000000 non-null str + 2 column_3 1000000 non-null str + dtypes: str(3) + memory usage: 25.7 MB""" ) diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 348ea18d22907..0c071c173a298 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -433,10 +433,13 @@ def test_np_matmul(): @pytest.mark.parametrize("box", [pd.Index, pd.Series]) -def test_np_matmul_1D(box): +def test_np_matmul_1D(box, using_python_scalars): result = np.matmul(box([1, 2]), box([2, 3])) assert result == 8 - assert isinstance(result, np.int64) + if using_python_scalars: + assert isinstance(result, int) + else: + assert isinstance(result, np.int64) def test_array_ufuncs_for_many_arguments():