pandas-dev
diff --git a/‎ci/code_checks.sh
Lines changed: 30 additions & 0 deletions b/‎ci/code_checks.sh
Lines changed: 30 additions & 0 deletions
diff --git a/‎doc/source/whatsnew/v3.0.0.rst
Lines changed: 64 additions & 1 deletion b/‎doc/source/whatsnew/v3.0.0.rst
Lines changed: 64 additions & 1 deletion
diff --git a/‎pandas/core/arrays/_arrow_string_mixins.py
Lines changed: 27 additions & 17 deletions b/‎pandas/core/arrays/_arrow_string_mixins.py
Lines changed: 27 additions & 17 deletions
diff --git a/‎pandas/core/arrays/arrow/array.py
Lines changed: 3 additions & 1 deletion b/‎pandas/core/arrays/arrow/array.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎pandas/core/arrays/categorical.py
Lines changed: 16 additions & 4 deletions b/‎pandas/core/arrays/categorical.py
Lines changed: 16 additions & 4 deletions
diff --git a/‎pandas/core/arrays/string_.py
Lines changed: 20 additions & 11 deletions b/‎pandas/core/arrays/string_.py
Lines changed: 20 additions & 11 deletions
@@ -73,9 +73,30 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Period.freq GL08" \
         -i "pandas.Period.ordinal GL08" \
         -i "pandas.RangeIndex.from_range PR01,SA01" \
+        -i "pandas.Series.cat.add_categories PR01,PR02" \
+        -i "pandas.Series.cat.as_ordered PR01" \
+        -i "pandas.Series.cat.as_unordered PR01" \
+        -i "pandas.Series.cat.remove_categories PR01,PR02" \
+        -i "pandas.Series.cat.remove_unused_categories PR01" \
+        -i "pandas.Series.cat.rename_categories PR01,PR02" \
+        -i "pandas.Series.cat.reorder_categories PR01,PR02" \
+        -i "pandas.Series.cat.set_categories PR01,PR02" \
+        -i "pandas.Series.dt.as_unit PR01,PR02" \
+        -i "pandas.Series.dt.ceil PR01,PR02" \
+        -i "pandas.Series.dt.day_name PR01,PR02" \
+        -i "pandas.Series.dt.floor PR01,PR02" \
         -i "pandas.Series.dt.freq GL08" \
+        -i "pandas.Series.dt.month_name PR01,PR02" \
+        -i "pandas.Series.dt.normalize PR01" \
+        -i "pandas.Series.dt.round PR01,PR02" \
+        -i "pandas.Series.dt.strftime PR01,PR02" \
+        -i "pandas.Series.dt.to_period PR01,PR02" \
+        -i "pandas.Series.dt.total_seconds PR01" \
+        -i "pandas.Series.dt.tz_convert PR01,PR02" \
+        -i "pandas.Series.dt.tz_localize PR01,PR02" \
         -i "pandas.Series.dt.unit GL08" \
         -i "pandas.Series.pad PR01,SA01" \
+        -i "pandas.Series.sparse.from_coo PR07,SA01" \
         -i "pandas.Timedelta.max PR02" \
         -i "pandas.Timedelta.min PR02" \
         -i "pandas.Timedelta.resolution PR02" \
@@ -85,11 +106,13 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Timestamp.resolution PR02" \
         -i "pandas.Timestamp.tzinfo GL08" \
         -i "pandas.Timestamp.year GL08" \
+        -i "pandas.api.types.is_float PR01,SA01" \
         -i "pandas.api.types.is_integer PR01,SA01" \
         -i "pandas.api.types.is_iterator PR07,SA01" \
         -i "pandas.api.types.is_re_compilable PR07,SA01" \
         -i "pandas.api.types.pandas_dtype PR07,RT03,SA01" \
         -i "pandas.arrays.ArrowExtensionArray PR07,SA01" \
+        -i "pandas.arrays.DatetimeArray SA01" \
         -i "pandas.arrays.IntegerArray SA01" \
         -i "pandas.arrays.IntervalArray.left SA01" \
         -i "pandas.arrays.IntervalArray.length SA01" \
@@ -140,6 +163,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.errors.DuplicateLabelError SA01" \
         -i "pandas.errors.IntCastingNaNError SA01" \
         -i "pandas.errors.InvalidIndexError SA01" \
+        -i "pandas.errors.InvalidVersion SA01" \
         -i "pandas.errors.NullFrequencyError SA01" \
         -i "pandas.errors.NumExprClobberingError SA01" \
         -i "pandas.errors.NumbaUtilError SA01" \
@@ -148,18 +172,24 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.errors.PerformanceWarning SA01" \
         -i "pandas.errors.PossibleDataLossError SA01" \
         -i "pandas.errors.PossiblePrecisionLoss SA01" \
+        -i "pandas.errors.SpecificationError SA01" \
         -i "pandas.errors.UndefinedVariableError PR01,SA01" \
         -i "pandas.errors.UnsortedIndexError SA01" \
         -i "pandas.errors.UnsupportedFunctionCall SA01" \
         -i "pandas.errors.ValueLabelTypeMismatch SA01" \
         -i "pandas.infer_freq SA01" \
         -i "pandas.io.json.build_table_schema PR07,RT03,SA01" \
+        -i "pandas.io.stata.StataReader.data_label SA01" \
+        -i "pandas.io.stata.StataReader.value_labels RT03,SA01" \
         -i "pandas.io.stata.StataReader.variable_labels RT03,SA01" \
         -i "pandas.io.stata.StataWriter.write_file SA01" \
         -i "pandas.json_normalize RT03,SA01" \
+        -i "pandas.period_range RT03,SA01" \
         -i "pandas.plotting.andrews_curves RT03,SA01" \
+        -i "pandas.plotting.lag_plot RT03,SA01" \
         -i "pandas.plotting.scatter_matrix PR07,SA01" \
         -i "pandas.set_eng_float_format RT03,SA01" \
+        -i "pandas.testing.assert_extension_array_equal SA01" \
         -i "pandas.tseries.offsets.BDay PR02,SA01" \
         -i "pandas.tseries.offsets.BQuarterBegin.is_on_offset GL08" \
         -i "pandas.tseries.offsets.BQuarterBegin.n GL08" \
 
@@ -203,6 +203,67 @@ In cases with mixed-resolution inputs, the highest resolution is used:
     In [2]: pd.to_datetime([pd.Timestamp("2024-03-22 11:43:01"), "2024-03-22 11:43:01.002"]).dtype
     Out[2]: dtype('<M8[ns]')
 
+.. _whatsnew_300.api_breaking.value_counts_sorting:
+
+Changed behavior in :meth:`DataFrame.value_counts` and :meth:`DataFrameGroupBy.value_counts` when ``sort=False``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In previous versions of pandas, :meth:`DataFrame.value_counts` with ``sort=False`` would sort the result by row labels (as was documented). This was nonintuitive and inconsistent with :meth:`Series.value_counts` which would maintain the order of the input. Now :meth:`DataFrame.value_counts` will maintain the order of the input.
+
+.. ipython:: python
+
+    df = pd.DataFrame(
+        {
+            "a": [2, 2, 2, 2, 1, 1, 1, 1],
+            "b": [2, 1, 3, 1, 2, 3, 1, 1],
+        }
+    )
+    df
+
+*Old behavior*
+
+.. code-block:: ipython
+
+    In [3]: df.value_counts(sort=False)
+    Out[3]:
+    a  b
+    1  1    2
+       2    1
+       3    1
+    2  1    2
+       2    1
+       3    1
+    Name: count, dtype: int64
+
+*New behavior*
+
+.. ipython:: python
+
+    df.value_counts(sort=False)
+
+This change also applies to :meth:`.DataFrameGroupBy.value_counts`. Here, there are two options for sorting: one ``sort`` passed to :meth:`DataFrame.groupby` and one passed directly to :meth:`.DataFrameGroupBy.value_counts`. The former will determine whether to sort the groups, the latter whether to sort the counts. All non-grouping columns will maintain the order of the input *within groups*.
+
+*Old behavior*
+
+.. code-block:: ipython
+
+    In [5]: df.groupby("a", sort=True).value_counts(sort=False)
+    Out[5]:
+    a  b
+    1  1    2
+       2    1
+       3    1
+    2  1    2
+       2    1
+       3    1
+    dtype: int64
+
+*New behavior*
+
+.. ipython:: python
+
+    df.groupby("a", sort=True).value_counts(sort=False)
+
 .. _whatsnew_300.api_breaking.deps:
 
 Increased minimum version for Python
@@ -544,7 +605,7 @@ Bug fixes
 
 Categorical
 ^^^^^^^^^^^
--
+- Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`)
 -
 
 Datetimelike
@@ -682,6 +743,7 @@ Sparse
 ^^^^^^
 - Bug in :class:`SparseDtype` for equal comparison with na fill value. (:issue:`54770`)
 - Bug in :meth:`DataFrame.sparse.from_spmatrix` which hard coded an invalid ``fill_value`` for certain subtypes. (:issue:`59063`)
+- Bug in :meth:`DataFrame.sparse.to_dense` which ignored subclassing and always returned an instance of :class:`DataFrame` (:issue:`59913`)
 
 ExtensionArray
 ^^^^^^^^^^^^^^
@@ -700,6 +762,7 @@ Other
 - Bug in :func:`eval` on :class:`ExtensionArray` on including division ``/`` failed with a ``TypeError``. (:issue:`58748`)
 - Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`)
 - Bug in :func:`eval` with ``engine="numexpr"`` returning unexpected result for float division. (:issue:`59736`)
+- Bug in :func:`to_numeric` raising ``TypeError`` when ``arg`` is a :class:`Timedelta` or :class:`Timestamp` scalar. (:issue:`59944`)
 - Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`)
 - Bug in :meth:`DataFrame.apply` where passing ``engine="numba"`` ignored ``args`` passed to the applied function (:issue:`58712`)
 - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`)
 
@@ -10,15 +10,14 @@
 
 import numpy as np
 
+from pandas._libs import lib
 from pandas.compat import (
     pa_version_under10p1,
     pa_version_under11p0,
     pa_version_under13p0,
     pa_version_under17p0,
 )
 
-from pandas.core.dtypes.missing import isna
-
 if not pa_version_under10p1:
     import pyarrow as pa
     import pyarrow.compute as pc
@@ -38,7 +37,7 @@ class ArrowStringArrayMixin:
     def __init__(self, *args, **kwargs) -> None:
         raise NotImplementedError
 
-    def _convert_bool_result(self, result):
+    def _convert_bool_result(self, result, na=lib.no_default, method_name=None):
         # Convert a bool-dtype result to the appropriate result type
         raise NotImplementedError
 
@@ -212,7 +211,9 @@ def _str_removesuffix(self, suffix: str):
         result = pc.if_else(ends_with, removed, self._pa_array)
         return type(self)(result)
 
-    def _str_startswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
+    def _str_startswith(
+        self, pat: str | tuple[str, ...], na: Scalar | lib.NoDefault = lib.no_default
+    ):
         if isinstance(pat, str):
             result = pc.starts_with(self._pa_array, pattern=pat)
         else:
@@ -225,11 +226,11 @@ def _str_startswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
 
                 for p in pat[1:]:
                     result = pc.or_(result, pc.starts_with(self._pa_array, pattern=p))
-        if not isna(na):  # pyright: ignore [reportGeneralTypeIssues]
-            result = result.fill_null(na)
-        return self._convert_bool_result(result)
+        return self._convert_bool_result(result, na=na, method_name="startswith")
 
-    def _str_endswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
+    def _str_endswith(
+        self, pat: str | tuple[str, ...], na: Scalar | lib.NoDefault = lib.no_default
+    ):
         if isinstance(pat, str):
             result = pc.ends_with(self._pa_array, pattern=pat)
         else:
@@ -242,9 +243,7 @@ def _str_endswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
 
                 for p in pat[1:]:
                     result = pc.or_(result, pc.ends_with(self._pa_array, pattern=p))
-        if not isna(na):  # pyright: ignore [reportGeneralTypeIssues]
-            result = result.fill_null(na)
-        return self._convert_bool_result(result)
+        return self._convert_bool_result(result, na=na, method_name="endswith")
 
     def _str_isalnum(self):
         result = pc.utf8_is_alnum(self._pa_array)
@@ -283,7 +282,12 @@ def _str_isupper(self):
         return self._convert_bool_result(result)
 
     def _str_contains(
-        self, pat, case: bool = True, flags: int = 0, na=None, regex: bool = True
+        self,
+        pat,
+        case: bool = True,
+        flags: int = 0,
+        na: Scalar | lib.NoDefault = lib.no_default,
+        regex: bool = True,
     ):
         if flags:
             raise NotImplementedError(f"contains not implemented with {flags=}")
@@ -293,19 +297,25 @@ def _str_contains(
         else:
             pa_contains = pc.match_substring
         result = pa_contains(self._pa_array, pat, ignore_case=not case)
-        if not isna(na):  # pyright: ignore [reportGeneralTypeIssues]
-            result = result.fill_null(na)
-        return self._convert_bool_result(result)
+        return self._convert_bool_result(result, na=na, method_name="contains")
 
     def _str_match(
-        self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None
+        self,
+        pat: str,
+        case: bool = True,
+        flags: int = 0,
+        na: Scalar | lib.NoDefault = lib.no_default,
     ):
         if not pat.startswith("^"):
             pat = f"^{pat}"
         return self._str_contains(pat, case, flags, na, regex=True)
 
     def _str_fullmatch(
-        self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None
+        self,
+        pat,
+        case: bool = True,
+        flags: int = 0,
+        na: Scalar | lib.NoDefault = lib.no_default,
     ):
         if not pat.endswith("$") or pat.endswith("\\$"):
             pat = f"{pat}$"
 
@@ -2318,7 +2318,9 @@ def _apply_elementwise(self, func: Callable) -> list[list[Any]]:
             for chunk in self._pa_array.iterchunks()
         ]
 
-    def _convert_bool_result(self, result):
+    def _convert_bool_result(self, result, na=lib.no_default, method_name=None):
+        if na is not lib.no_default and not isna(na):  # pyright: ignore [reportGeneralTypeIssues]
+            result = result.fill_null(na)
         return type(self)(result)
 
     def _convert_int_result(self, result):
 
@@ -2679,16 +2679,28 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
     # ------------------------------------------------------------------------
     # String methods interface
     def _str_map(
-        self, f, na_value=np.nan, dtype=np.dtype("object"), convert: bool = True
+        self, f, na_value=lib.no_default, dtype=np.dtype("object"), convert: bool = True
     ):
         # Optimization to apply the callable `f` to the categories once
         # and rebuild the result by `take`ing from the result with the codes.
         # Returns the same type as the object-dtype implementation though.
-        from pandas.core.arrays import NumpyExtensionArray
-
         categories = self.categories
         codes = self.codes
-        result = NumpyExtensionArray(categories.to_numpy())._str_map(f, na_value, dtype)
+        if categories.dtype == "string":
+            result = categories.array._str_map(f, na_value, dtype)  # type: ignore[attr-defined]
+            if (
+                categories.dtype.na_value is np.nan  # type: ignore[union-attr]
+                and is_bool_dtype(dtype)
+                and (na_value is lib.no_default or isna(na_value))
+            ):
+                # NaN propagates as False for functions with boolean return type
+                na_value = False
+        else:
+            from pandas.core.arrays import NumpyExtensionArray
+
+            result = NumpyExtensionArray(categories.to_numpy())._str_map(
+                f, na_value, dtype
+            )
         return take_nd(result, codes, fill_value=na_value)
 
     def _str_get_dummies(self, sep: str = "|", dtype: NpDtype | None = None):
 
@@ -381,7 +381,11 @@ def _from_scalars(cls, scalars, dtype: DtypeObj) -> Self:
         return cls._from_sequence(scalars, dtype=dtype)
 
     def _str_map(
-        self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True
+        self,
+        f,
+        na_value=lib.no_default,
+        dtype: Dtype | None = None,
+        convert: bool = True,
     ):
         if self.dtype.na_value is np.nan:
             return self._str_map_nan_semantics(f, na_value=na_value, dtype=dtype)
@@ -390,7 +394,7 @@ def _str_map(
 
         if dtype is None:
             dtype = self.dtype
-        if na_value is None:
+        if na_value is lib.no_default:
             na_value = self.dtype.na_value
 
         mask = isna(self)
@@ -459,11 +463,17 @@ def _str_map_str_or_object(
             # -> We don't know the result type. E.g. `.get` can return anything.
             return lib.map_infer_mask(arr, f, mask.view("uint8"))
 
-    def _str_map_nan_semantics(self, f, na_value=None, dtype: Dtype | None = None):
+    def _str_map_nan_semantics(
+        self, f, na_value=lib.no_default, dtype: Dtype | None = None
+    ):
         if dtype is None:
             dtype = self.dtype
-        if na_value is None:
-            na_value = self.dtype.na_value
+        if na_value is lib.no_default:
+            if is_bool_dtype(dtype):
+                # NaN propagates as False
+                na_value = False
+            else:
+                na_value = self.dtype.na_value
 
         mask = isna(self)
         arr = np.asarray(self)
@@ -474,7 +484,8 @@ def _str_map_nan_semantics(self, f, na_value=None, dtype: Dtype | None = None):
                 if is_integer_dtype(dtype):
                     na_value = 0
                 else:
-                    na_value = True
+                    # NaN propagates as False
+                    na_value = False
 
             result = lib.map_infer_mask(
                 arr,
@@ -484,15 +495,13 @@ def _str_map_nan_semantics(self, f, na_value=None, dtype: Dtype | None = None):
                 na_value=na_value,
                 dtype=np.dtype(cast(type, dtype)),
             )
-            if na_value_is_na and mask.any():
+            if na_value_is_na and is_integer_dtype(dtype) and mask.any():
                 # TODO: we could alternatively do this check before map_infer_mask
                 #  and adjust the dtype/na_value we pass there. Which is more
                 #  performant?
-                if is_integer_dtype(dtype):
-                    result = result.astype("float64")
-                else:
-                    result = result.astype("object")
+                result = result.astype("float64")
                 result[mask] = np.nan
+
             return result
 
         else:
Original file line number	Diff line number	Diff line change
`@@ -2318,7 +2318,9 @@ def _apply_elementwise(self, func: Callable) -> list[list[Any]]:`
`2318`	`2318`	`for chunk in self._pa_array.iterchunks()`
`2319`	`2319`	`]`
`2320`	`2320`
`2321`		`- def _convert_bool_result(self, result):`
	`2321`	`+ def _convert_bool_result(self, result, na=lib.no_default, method_name=None):`
	`2322`	`+ if na is not lib.no_default and not isna(na): # pyright: ignore [reportGeneralTypeIssues]`
	`2323`	`+ result = result.fill_null(na)`
`2322`	`2324`	`return type(self)(result)`
`2323`	`2325`
`2324`	`2326`	`def _convert_int_result(self, result):`