Merge branch 'main' into bugfix-spss-kwargs

astronights · web-flow · commit ac2c2b56f5d0 · 2024-01-31T17:18:50.000+08:00
diff --git a/doc/source/whatsnew/v2.2.1.rst b/doc/source/whatsnew/v2.2.1.rst
@@ -18,6 +18,7 @@ Fixed regressions
 - Fixed regression in :func:`merge_ordered` raising ``TypeError`` for ``fill_method="ffill"`` and ``how="left"`` (:issue:`57010`)
 - Fixed regression in :func:`wide_to_long` raising an ``AttributeError`` for string columns (:issue:`57066`)
 - Fixed regression in :meth:`DataFrame.loc` raising ``IndexError`` for non-unique, masked dtype indexes where result has more than 10,000 rows (:issue:`57027`)
+- Fixed regression in :meth:`DataFrame.to_dict` with ``orient='list'`` and datetime or timedelta types returning integers (:issue:`54824`)
 - Fixed regression in :meth:`DataFrameGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax` ignoring the ``skipna`` argument (:issue:`57040`)
 - Fixed regression in :meth:`DataFrameGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax` where values containing the minimum or maximum value for the dtype could produce incorrect results (:issue:`57040`)
 - Fixed regression in :meth:`Index.join` raising ``TypeError`` when joining an empty index to a non-empty index containing mixed dtype values (:issue:`57048`)
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -96,6 +96,13 @@ Deprecations
 - Deprecated :meth:`Timestamp.utcnow`, use ``Timestamp.now("UTC")`` instead (:issue:`56680`)
 -
 
+.. ---------------------------------------------------------------------------
+.. _whatsnew_300.prior_deprecations:
+
+Removal of prior version deprecations/changes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+- Removed deprecated argument ``obj`` in :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` (:issue:`53545`)
+
 .. ---------------------------------------------------------------------------
 .. _whatsnew_300.performance:
 
diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx
@@ -607,7 +607,8 @@ cdef ndarray[int64_t] _get_dst_hours(
         ndarray[uint8_t, cast=True] mismatch
         ndarray[int64_t] delta, dst_hours
         ndarray[intp_t] switch_idxs, trans_idx, grp, a_idx, b_idx, one_diff
-        list trans_grp
+        # TODO: Can uncomment when numpy >=2 is the minimum
+        # tuple trans_grp
         intp_t switch_idx
         int64_t left, right
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -89,7 +89,6 @@
     find_common_type,
     infer_dtype_from_scalar,
     invalidate_string_dtypes,
-    maybe_box_native,
     maybe_downcast_to_dtype,
 )
 from pandas.core.dtypes.common import (
@@ -1983,28 +1982,6 @@ def to_numpy(
 
         return result
 
-    def _create_data_for_split_and_tight_to_dict(
-        self, are_all_object_dtype_cols: bool, object_dtype_indices: list[int]
-    ) -> list:
-        """
-        Simple helper method to create data for to ``to_dict(orient="split")`` and
-        ``to_dict(orient="tight")`` to create the main output data
-        """
-        if are_all_object_dtype_cols:
-            data = [
-                list(map(maybe_box_native, t))
-                for t in self.itertuples(index=False, name=None)
-            ]
-        else:
-            data = [list(t) for t in self.itertuples(index=False, name=None)]
-            if object_dtype_indices:
-                # If we have object_dtype_cols, apply maybe_box_naive after list
-                # comprehension for perf
-                for row in data:
-                    for i in object_dtype_indices:
-                        row[i] = maybe_box_native(row[i])
-        return data
-
     @overload
     def to_dict(
         self,
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -1049,27 +1049,18 @@ def pipe(
         return com.pipe(self, func, *args, **kwargs)
 
     @final
-    def get_group(self, name, obj=None) -> DataFrame | Series:
+    def get_group(self, name) -> DataFrame | Series:
         """
         Construct DataFrame from group with provided name.
 
         Parameters
         ----------
         name : object
             The name of the group to get as a DataFrame.
-        obj : DataFrame, default None
-            The DataFrame to take the DataFrame out of.  If
-            it is None, the object groupby was called on will
-            be used.
-
-            .. deprecated:: 2.1.0
-                The obj is deprecated and will be removed in a future version.
-                Do ``df.iloc[gb.indices.get(name)]``
-                instead of ``gb.get_group(name, obj=df)``.
 
         Returns
         -------
-        same type as obj
+        DataFrame or Series
 
         Examples
         --------
@@ -1142,18 +1133,8 @@ def get_group(self, name, obj=None) -> DataFrame | Series:
         if not len(inds):
             raise KeyError(name)
 
-        if obj is None:
-            indexer = inds if self.axis == 0 else (slice(None), inds)
-            return self._selected_obj.iloc[indexer]
-        else:
-            warnings.warn(
-                "obj is deprecated and will be removed in a future version. "
-                "Do ``df.iloc[gb.indices.get(name)]`` "
-                "instead of ``gb.get_group(name, obj=df)``.",
-                FutureWarning,
-                stacklevel=find_stack_level(),
-            )
-            return obj._take_with_is_copy(inds, axis=self.axis)
+        indexer = inds if self.axis == 0 else (slice(None), inds)
+        return self._selected_obj.iloc[indexer]
 
     @final
     def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]:
diff --git a/pandas/core/methods/to_dict.py b/pandas/core/methods/to_dict.py
@@ -24,11 +24,34 @@
 from pandas.core import common as com
 
 if TYPE_CHECKING:
+    from collections.abc import Generator
+
     from pandas._typing import MutableMappingT
 
     from pandas import DataFrame
 
 
+def create_data_for_split(
+    df: DataFrame, are_all_object_dtype_cols: bool, object_dtype_indices: list[int]
+) -> Generator[list, None, None]:
+    """
+    Simple helper method to create data for to ``to_dict(orient="split")``
+    to create the main output data
+    """
+    if are_all_object_dtype_cols:
+        for tup in df.itertuples(index=False, name=None):
+            yield list(map(maybe_box_native, tup))
+    else:
+        for tup in df.itertuples(index=False, name=None):
+            data = list(tup)
+            if object_dtype_indices:
+                # If we have object_dtype_cols, apply maybe_box_naive after
+                # for perf
+                for i in object_dtype_indices:
+                    data[i] = maybe_box_native(data[i])
+            yield data
+
+
 @overload
 def to_dict(
     df: DataFrame,
@@ -152,39 +175,38 @@ def to_dict(
         # GH46470 Return quickly if orient series to avoid creating dtype objects
         return into_c((k, v) for k, v in df.items())
 
+    if orient == "dict":
+        return into_c((k, v.to_dict(into=into)) for k, v in df.items())
+
     box_native_indices = [
         i
         for i, col_dtype in enumerate(df.dtypes.values)
         if col_dtype == np.dtype(object) or isinstance(col_dtype, ExtensionDtype)
     ]
-    box_na_values = [
-        lib.no_default if not isinstance(col_dtype, BaseMaskedDtype) else libmissing.NA
-        for i, col_dtype in enumerate(df.dtypes.values)
-    ]
-    are_all_object_dtype_cols = len(box_native_indices) == len(df.dtypes)
 
-    if orient == "dict":
-        return into_c((k, v.to_dict(into=into)) for k, v in df.items())
+    are_all_object_dtype_cols = len(box_native_indices) == len(df.dtypes)
 
-    elif orient == "list":
+    if orient == "list":
         object_dtype_indices_as_set: set[int] = set(box_native_indices)
+        box_na_values = (
+            lib.no_default
+            if not isinstance(col_dtype, BaseMaskedDtype)
+            else libmissing.NA
+            for col_dtype in df.dtypes.values
+        )
         return into_c(
             (
                 k,
-                list(
-                    map(
-                        maybe_box_native, v.to_numpy(na_value=box_na_values[i]).tolist()
-                    )
-                )
+                list(map(maybe_box_native, v.to_numpy(na_value=box_na_value)))
                 if i in object_dtype_indices_as_set
-                else v.to_numpy().tolist(),
+                else list(map(maybe_box_native, v.to_numpy())),
             )
-            for i, (k, v) in enumerate(df.items())
+            for i, (box_na_value, (k, v)) in enumerate(zip(box_na_values, df.items()))
         )
 
     elif orient == "split":
-        data = df._create_data_for_split_and_tight_to_dict(
-            are_all_object_dtype_cols, box_native_indices
+        data = list(
+            create_data_for_split(df, are_all_object_dtype_cols, box_native_indices)
         )
 
         return into_c(
@@ -196,10 +218,6 @@ def to_dict(
         )
 
     elif orient == "tight":
-        data = df._create_data_for_split_and_tight_to_dict(
-            are_all_object_dtype_cols, box_native_indices
-        )
-
         return into_c(
             ((("index", df.index.tolist()),) if index else ())
             + (
@@ -219,11 +237,9 @@ def to_dict(
     elif orient == "records":
         columns = df.columns.tolist()
         if are_all_object_dtype_cols:
-            rows = (
-                dict(zip(columns, row)) for row in df.itertuples(index=False, name=None)
-            )
             return [
-                into_c((k, maybe_box_native(v)) for k, v in row.items()) for row in rows
+                into_c(zip(columns, map(maybe_box_native, row)))
+                for row in df.itertuples(index=False, name=None)
             ]
         else:
             data = [
@@ -239,7 +255,7 @@ def to_dict(
                 for row in data:
                     for col in object_dtype_cols:
                         row[col] = maybe_box_native(row[col])
-            return data
+            return data  # type: ignore[return-value]
 
     elif orient == "index":
         if not df.index.is_unique:
@@ -252,24 +268,21 @@ def to_dict(
             )
         elif box_native_indices:
             object_dtype_indices_as_set = set(box_native_indices)
-            is_object_dtype_by_index = [
-                i in object_dtype_indices_as_set for i in range(len(df.columns))
-            ]
             return into_c(
                 (
                     t[0],
                     {
-                        columns[i]: maybe_box_native(v)
-                        if is_object_dtype_by_index[i]
+                        column: maybe_box_native(v)
+                        if i in object_dtype_indices_as_set
                         else v
-                        for i, v in enumerate(t[1:])
+                        for i, (column, v) in enumerate(zip(columns, t[1:]))
                     },
                 )
                 for t in df.itertuples(name=None)
             )
         else:
             return into_c(
-                (t[0], dict(zip(df.columns, t[1:]))) for t in df.itertuples(name=None)
+                (t[0], dict(zip(columns, t[1:]))) for t in df.itertuples(name=None)
             )
 
     else:
diff --git a/pandas/tests/frame/methods/test_to_dict.py b/pandas/tests/frame/methods/test_to_dict.py
@@ -12,8 +12,11 @@
     NA,
     DataFrame,
     Index,
+    Interval,
     MultiIndex,
+    Period,
     Series,
+    Timedelta,
     Timestamp,
 )
 import pandas._testing as tm
@@ -519,3 +522,14 @@ def test_to_dict_pos_args_deprecation(self):
         )
         with tm.assert_produces_warning(FutureWarning, match=msg):
             df.to_dict("records", {})
+
+
+@pytest.mark.parametrize(
+    "val", [Timestamp(2020, 1, 1), Timedelta(1), Period("2020"), Interval(1, 2)]
+)
+def test_to_dict_list_pd_scalars(val):
+    # GH 54824
+    df = DataFrame({"a": [val]})
+    result = df.to_dict(orient="list")
+    expected = {"a": [val]}
+    assert result == expected
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -526,16 +526,6 @@ def test_as_index_select_column():
     tm.assert_series_equal(result, expected)
 
 
-def test_obj_arg_get_group_deprecated():
-    depr_msg = "obj is deprecated"
-
-    df = DataFrame({"a": [1, 1, 2], "b": [3, 4, 5]})
-    expected = df.iloc[df.groupby("b").indices.get(4)]
-    with tm.assert_produces_warning(FutureWarning, match=depr_msg):
-        result = df.groupby("b").get_group(4, obj=df)
-        tm.assert_frame_equal(result, expected)
-
-
 def test_groupby_as_index_select_column_sum_empty_df():
     # GH 35246
     df = DataFrame(columns=Index(["A", "B", "C"], name="alpha"))