Skip to content

Commit ac2c2b5

Browse files
authored
Merge branch 'main' into bugfix-spss-kwargs
2 parents 5718c91 + db11e25 commit ac2c2b5

File tree

8 files changed

+74
-90
lines changed

8 files changed

+74
-90
lines changed

doc/source/whatsnew/v2.2.1.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ Fixed regressions
1818
- Fixed regression in :func:`merge_ordered` raising ``TypeError`` for ``fill_method="ffill"`` and ``how="left"`` (:issue:`57010`)
1919
- Fixed regression in :func:`wide_to_long` raising an ``AttributeError`` for string columns (:issue:`57066`)
2020
- Fixed regression in :meth:`DataFrame.loc` raising ``IndexError`` for non-unique, masked dtype indexes where result has more than 10,000 rows (:issue:`57027`)
21+
- Fixed regression in :meth:`DataFrame.to_dict` with ``orient='list'`` and datetime or timedelta types returning integers (:issue:`54824`)
2122
- Fixed regression in :meth:`DataFrameGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax` ignoring the ``skipna`` argument (:issue:`57040`)
2223
- Fixed regression in :meth:`DataFrameGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax` where values containing the minimum or maximum value for the dtype could produce incorrect results (:issue:`57040`)
2324
- Fixed regression in :meth:`Index.join` raising ``TypeError`` when joining an empty index to a non-empty index containing mixed dtype values (:issue:`57048`)

doc/source/whatsnew/v3.0.0.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,13 @@ Deprecations
9696
- Deprecated :meth:`Timestamp.utcnow`, use ``Timestamp.now("UTC")`` instead (:issue:`56680`)
9797
-
9898

99+
.. ---------------------------------------------------------------------------
100+
.. _whatsnew_300.prior_deprecations:
101+
102+
Removal of prior version deprecations/changes
103+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
104+
- Removed deprecated argument ``obj`` in :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` (:issue:`53545`)
105+
99106
.. ---------------------------------------------------------------------------
100107
.. _whatsnew_300.performance:
101108

pandas/_libs/tslibs/tzconversion.pyx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -607,7 +607,8 @@ cdef ndarray[int64_t] _get_dst_hours(
607607
ndarray[uint8_t, cast=True] mismatch
608608
ndarray[int64_t] delta, dst_hours
609609
ndarray[intp_t] switch_idxs, trans_idx, grp, a_idx, b_idx, one_diff
610-
list trans_grp
610+
# TODO: Can uncomment when numpy >=2 is the minimum
611+
# tuple trans_grp
611612
intp_t switch_idx
612613
int64_t left, right
613614

pandas/core/frame.py

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,6 @@
8989
find_common_type,
9090
infer_dtype_from_scalar,
9191
invalidate_string_dtypes,
92-
maybe_box_native,
9392
maybe_downcast_to_dtype,
9493
)
9594
from pandas.core.dtypes.common import (
@@ -1983,28 +1982,6 @@ def to_numpy(
19831982

19841983
return result
19851984

1986-
def _create_data_for_split_and_tight_to_dict(
1987-
self, are_all_object_dtype_cols: bool, object_dtype_indices: list[int]
1988-
) -> list:
1989-
"""
1990-
Simple helper method to create data for to ``to_dict(orient="split")`` and
1991-
``to_dict(orient="tight")`` to create the main output data
1992-
"""
1993-
if are_all_object_dtype_cols:
1994-
data = [
1995-
list(map(maybe_box_native, t))
1996-
for t in self.itertuples(index=False, name=None)
1997-
]
1998-
else:
1999-
data = [list(t) for t in self.itertuples(index=False, name=None)]
2000-
if object_dtype_indices:
2001-
# If we have object_dtype_cols, apply maybe_box_naive after list
2002-
# comprehension for perf
2003-
for row in data:
2004-
for i in object_dtype_indices:
2005-
row[i] = maybe_box_native(row[i])
2006-
return data
2007-
20081985
@overload
20091986
def to_dict(
20101987
self,

pandas/core/groupby/groupby.py

Lines changed: 4 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,27 +1049,18 @@ def pipe(
10491049
return com.pipe(self, func, *args, **kwargs)
10501050

10511051
@final
1052-
def get_group(self, name, obj=None) -> DataFrame | Series:
1052+
def get_group(self, name) -> DataFrame | Series:
10531053
"""
10541054
Construct DataFrame from group with provided name.
10551055
10561056
Parameters
10571057
----------
10581058
name : object
10591059
The name of the group to get as a DataFrame.
1060-
obj : DataFrame, default None
1061-
The DataFrame to take the DataFrame out of. If
1062-
it is None, the object groupby was called on will
1063-
be used.
1064-
1065-
.. deprecated:: 2.1.0
1066-
The obj is deprecated and will be removed in a future version.
1067-
Do ``df.iloc[gb.indices.get(name)]``
1068-
instead of ``gb.get_group(name, obj=df)``.
10691060
10701061
Returns
10711062
-------
1072-
same type as obj
1063+
DataFrame or Series
10731064
10741065
Examples
10751066
--------
@@ -1142,18 +1133,8 @@ def get_group(self, name, obj=None) -> DataFrame | Series:
11421133
if not len(inds):
11431134
raise KeyError(name)
11441135

1145-
if obj is None:
1146-
indexer = inds if self.axis == 0 else (slice(None), inds)
1147-
return self._selected_obj.iloc[indexer]
1148-
else:
1149-
warnings.warn(
1150-
"obj is deprecated and will be removed in a future version. "
1151-
"Do ``df.iloc[gb.indices.get(name)]`` "
1152-
"instead of ``gb.get_group(name, obj=df)``.",
1153-
FutureWarning,
1154-
stacklevel=find_stack_level(),
1155-
)
1156-
return obj._take_with_is_copy(inds, axis=self.axis)
1136+
indexer = inds if self.axis == 0 else (slice(None), inds)
1137+
return self._selected_obj.iloc[indexer]
11571138

11581139
@final
11591140
def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]:

pandas/core/methods/to_dict.py

Lines changed: 46 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,34 @@
2424
from pandas.core import common as com
2525

2626
if TYPE_CHECKING:
27+
from collections.abc import Generator
28+
2729
from pandas._typing import MutableMappingT
2830

2931
from pandas import DataFrame
3032

3133

34+
def create_data_for_split(
35+
df: DataFrame, are_all_object_dtype_cols: bool, object_dtype_indices: list[int]
36+
) -> Generator[list, None, None]:
37+
"""
38+
Simple helper method to create data for to ``to_dict(orient="split")``
39+
to create the main output data
40+
"""
41+
if are_all_object_dtype_cols:
42+
for tup in df.itertuples(index=False, name=None):
43+
yield list(map(maybe_box_native, tup))
44+
else:
45+
for tup in df.itertuples(index=False, name=None):
46+
data = list(tup)
47+
if object_dtype_indices:
48+
# If we have object_dtype_cols, apply maybe_box_naive after
49+
# for perf
50+
for i in object_dtype_indices:
51+
data[i] = maybe_box_native(data[i])
52+
yield data
53+
54+
3255
@overload
3356
def to_dict(
3457
df: DataFrame,
@@ -152,39 +175,38 @@ def to_dict(
152175
# GH46470 Return quickly if orient series to avoid creating dtype objects
153176
return into_c((k, v) for k, v in df.items())
154177

178+
if orient == "dict":
179+
return into_c((k, v.to_dict(into=into)) for k, v in df.items())
180+
155181
box_native_indices = [
156182
i
157183
for i, col_dtype in enumerate(df.dtypes.values)
158184
if col_dtype == np.dtype(object) or isinstance(col_dtype, ExtensionDtype)
159185
]
160-
box_na_values = [
161-
lib.no_default if not isinstance(col_dtype, BaseMaskedDtype) else libmissing.NA
162-
for i, col_dtype in enumerate(df.dtypes.values)
163-
]
164-
are_all_object_dtype_cols = len(box_native_indices) == len(df.dtypes)
165186

166-
if orient == "dict":
167-
return into_c((k, v.to_dict(into=into)) for k, v in df.items())
187+
are_all_object_dtype_cols = len(box_native_indices) == len(df.dtypes)
168188

169-
elif orient == "list":
189+
if orient == "list":
170190
object_dtype_indices_as_set: set[int] = set(box_native_indices)
191+
box_na_values = (
192+
lib.no_default
193+
if not isinstance(col_dtype, BaseMaskedDtype)
194+
else libmissing.NA
195+
for col_dtype in df.dtypes.values
196+
)
171197
return into_c(
172198
(
173199
k,
174-
list(
175-
map(
176-
maybe_box_native, v.to_numpy(na_value=box_na_values[i]).tolist()
177-
)
178-
)
200+
list(map(maybe_box_native, v.to_numpy(na_value=box_na_value)))
179201
if i in object_dtype_indices_as_set
180-
else v.to_numpy().tolist(),
202+
else list(map(maybe_box_native, v.to_numpy())),
181203
)
182-
for i, (k, v) in enumerate(df.items())
204+
for i, (box_na_value, (k, v)) in enumerate(zip(box_na_values, df.items()))
183205
)
184206

185207
elif orient == "split":
186-
data = df._create_data_for_split_and_tight_to_dict(
187-
are_all_object_dtype_cols, box_native_indices
208+
data = list(
209+
create_data_for_split(df, are_all_object_dtype_cols, box_native_indices)
188210
)
189211

190212
return into_c(
@@ -196,10 +218,6 @@ def to_dict(
196218
)
197219

198220
elif orient == "tight":
199-
data = df._create_data_for_split_and_tight_to_dict(
200-
are_all_object_dtype_cols, box_native_indices
201-
)
202-
203221
return into_c(
204222
((("index", df.index.tolist()),) if index else ())
205223
+ (
@@ -219,11 +237,9 @@ def to_dict(
219237
elif orient == "records":
220238
columns = df.columns.tolist()
221239
if are_all_object_dtype_cols:
222-
rows = (
223-
dict(zip(columns, row)) for row in df.itertuples(index=False, name=None)
224-
)
225240
return [
226-
into_c((k, maybe_box_native(v)) for k, v in row.items()) for row in rows
241+
into_c(zip(columns, map(maybe_box_native, row)))
242+
for row in df.itertuples(index=False, name=None)
227243
]
228244
else:
229245
data = [
@@ -239,7 +255,7 @@ def to_dict(
239255
for row in data:
240256
for col in object_dtype_cols:
241257
row[col] = maybe_box_native(row[col])
242-
return data
258+
return data # type: ignore[return-value]
243259

244260
elif orient == "index":
245261
if not df.index.is_unique:
@@ -252,24 +268,21 @@ def to_dict(
252268
)
253269
elif box_native_indices:
254270
object_dtype_indices_as_set = set(box_native_indices)
255-
is_object_dtype_by_index = [
256-
i in object_dtype_indices_as_set for i in range(len(df.columns))
257-
]
258271
return into_c(
259272
(
260273
t[0],
261274
{
262-
columns[i]: maybe_box_native(v)
263-
if is_object_dtype_by_index[i]
275+
column: maybe_box_native(v)
276+
if i in object_dtype_indices_as_set
264277
else v
265-
for i, v in enumerate(t[1:])
278+
for i, (column, v) in enumerate(zip(columns, t[1:]))
266279
},
267280
)
268281
for t in df.itertuples(name=None)
269282
)
270283
else:
271284
return into_c(
272-
(t[0], dict(zip(df.columns, t[1:]))) for t in df.itertuples(name=None)
285+
(t[0], dict(zip(columns, t[1:]))) for t in df.itertuples(name=None)
273286
)
274287

275288
else:

pandas/tests/frame/methods/test_to_dict.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,11 @@
1212
NA,
1313
DataFrame,
1414
Index,
15+
Interval,
1516
MultiIndex,
17+
Period,
1618
Series,
19+
Timedelta,
1720
Timestamp,
1821
)
1922
import pandas._testing as tm
@@ -519,3 +522,14 @@ def test_to_dict_pos_args_deprecation(self):
519522
)
520523
with tm.assert_produces_warning(FutureWarning, match=msg):
521524
df.to_dict("records", {})
525+
526+
527+
@pytest.mark.parametrize(
528+
"val", [Timestamp(2020, 1, 1), Timedelta(1), Period("2020"), Interval(1, 2)]
529+
)
530+
def test_to_dict_list_pd_scalars(val):
531+
# GH 54824
532+
df = DataFrame({"a": [val]})
533+
result = df.to_dict(orient="list")
534+
expected = {"a": [val]}
535+
assert result == expected

pandas/tests/groupby/test_groupby.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -526,16 +526,6 @@ def test_as_index_select_column():
526526
tm.assert_series_equal(result, expected)
527527

528528

529-
def test_obj_arg_get_group_deprecated():
530-
depr_msg = "obj is deprecated"
531-
532-
df = DataFrame({"a": [1, 1, 2], "b": [3, 4, 5]})
533-
expected = df.iloc[df.groupby("b").indices.get(4)]
534-
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
535-
result = df.groupby("b").get_group(4, obj=df)
536-
tm.assert_frame_equal(result, expected)
537-
538-
539529
def test_groupby_as_index_select_column_sum_empty_df():
540530
# GH 35246
541531
df = DataFrame(columns=Index(["A", "B", "C"], name="alpha"))

0 commit comments

Comments
 (0)