diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 9210f1e0082f0..6e547f8077348 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1068,6 +1068,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupBy.agg` that raises ``AttributeError`` when there is dictionary input and duplicated columns, instead of returning a DataFrame with the aggregation of all duplicate columns. (:issue:`55041`) - Bug in :meth:`DataFrameGroupBy.agg` where applying a user-defined function to an empty DataFrame returned a Series instead of an empty DataFrame. (:issue:`61503`) - Bug in :meth:`DataFrameGroupBy.apply` and :meth:`SeriesGroupBy.apply` for empty data frame with ``group_keys=False`` still creating output index using group keys. (:issue:`60471`) +- Bug in :meth:`DataFrameGroupBy.apply` and :meth:`SeriesGroupBy.apply` not preserving ``_metadata`` attributes from subclassed DataFrames and Series (:issue:`62134`) - Bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`) - Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`) - Bug in :meth:`DataFrameGroupBy.cumsum` and :meth:`DataFrameGroupBy.cumprod` where ``numeric_only`` parameter was passed indirectly through kwargs instead of passing directly. (:issue:`58811`) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 39607d74c0dc8..1ffe9a05764cd 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -621,7 +621,7 @@ def _wrap_applied_output( if not self.as_index and not_indexed_same: result = self._insert_inaxis_grouper(result) result.index = default_index(len(result)) - return result + return result.__finalize__(self.obj, method="groupby") else: # GH #6265 #24880 result = self.obj._constructor( @@ -630,7 +630,7 @@ def _wrap_applied_output( if not self.as_index: result = self._insert_inaxis_grouper(result) result.index = default_index(len(result)) - return result + return result.__finalize__(self.obj, method="groupby") __examples_series_doc = dedent( """ @@ -2169,7 +2169,7 @@ def _wrap_applied_output_series( if not self.as_index: result = self._insert_inaxis_grouper(result) - return result + return result.__finalize__(self.obj, method="groupby") def _cython_transform( self, diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index f9789c82a1536..1fc2d5535ad9e 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1188,7 +1188,7 @@ def _concat_objects( if isinstance(result, Series) and name is not None: result.name = name - return result + return result.__finalize__(self.obj, method="groupby") @final def _set_result_index_ordered( diff --git a/pandas/tests/groupby/test_groupby_subclass.py b/pandas/tests/groupby/test_groupby_subclass.py index 5ffb3bc147fdf..e1dfb3aabdaf0 100644 --- a/pandas/tests/groupby/test_groupby_subclass.py +++ b/pandas/tests/groupby/test_groupby_subclass.py @@ -98,6 +98,35 @@ def func2(group): tm.assert_series_equal(result, expected) +def test_groupby_apply_preserves_metadata(): + # GH#62134 - Test that apply() preserves metadata when returning DataFrames/Series + custom_df = tm.SubclassedDataFrame({"a": [1, 2, 3], "b": [1, 1, 2], "c": [7, 8, 9]}) + custom_df.testattr = "hello" + + def sum_func(group): + assert isinstance(group, tm.SubclassedDataFrame) + assert hasattr(group, "testattr") + assert group.testattr == "hello" + return group.sum() + + result = custom_df.groupby("c").apply(sum_func) + assert hasattr(result, "testattr"), "DataFrame apply() should preserve metadata" + assert result.testattr == "hello" + + custom_series = tm.SubclassedSeries([1, 2, 3]) + custom_series.testattr = "hello" + + def sum_series_func(group): + assert isinstance(group, tm.SubclassedSeries) + assert hasattr(group, "testattr") + assert group.testattr == "hello" + return group.sum() + + result = custom_series.groupby(custom_df["c"]).apply(sum_series_func) + assert hasattr(result, "testattr"), "Series apply() should preserve metadata" + assert result.testattr == "hello" + + @pytest.mark.parametrize("obj", [DataFrame, tm.SubclassedDataFrame]) def test_groupby_resample_preserves_subclass(obj): # GH28330 -- preserve subclass through groupby.resample()