Merge branch 'main' into add-bodo-to-pd-ecosystem

ritwika314 · web-flow · commit 828c469adf7a · 2024-07-24T11:13:41.000-04:00
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -236,6 +236,7 @@ Other API changes
 - All classes inheriting from builtin ``tuple`` (including types created with :func:`collections.namedtuple`) are now hashed and compared as builtin ``tuple`` during indexing operations (:issue:`57922`)
 - Made ``dtype`` a required argument in :meth:`ExtensionArray._from_sequence_of_strings` (:issue:`56519`)
 - Passing a :class:`Series` input to :func:`json_normalize` will now retain the :class:`Series` :class:`Index`, previously output had a new :class:`RangeIndex` (:issue:`51452`)
+- Removed :meth:`Index.sort` which always raised a ``TypeError``. This attribute is not defined and will raise an ``AttributeError`` (:issue:`59283`)
 - Updated :meth:`DataFrame.to_excel` so that the output spreadsheet has no styling. Custom styling can still be done using :meth:`Styler.to_excel` (:issue:`54154`)
 - pickle and HDF (``.h5``) files created with Python 2 are no longer explicitly supported (:issue:`57387`)
 - pickled objects from pandas version less than ``1.0.0`` are no longer supported (:issue:`57155`)
@@ -283,6 +284,7 @@ Other Deprecations
 - Deprecated allowing non-keyword arguments in :meth:`DataFrame.all`, :meth:`DataFrame.min`, :meth:`DataFrame.max`, :meth:`DataFrame.sum`, :meth:`DataFrame.prod`, :meth:`DataFrame.mean`, :meth:`DataFrame.median`, :meth:`DataFrame.sem`, :meth:`DataFrame.var`, :meth:`DataFrame.std`, :meth:`DataFrame.skew`, :meth:`DataFrame.kurt`, :meth:`Series.all`,  :meth:`Series.min`, :meth:`Series.max`, :meth:`Series.sum`, :meth:`Series.prod`, :meth:`Series.mean`, :meth:`Series.median`, :meth:`Series.sem`, :meth:`Series.var`, :meth:`Series.std`, :meth:`Series.skew`, and :meth:`Series.kurt`. (:issue:`57087`)
 - Deprecated allowing non-keyword arguments in :meth:`Series.to_markdown` except ``buf``. (:issue:`57280`)
 - Deprecated allowing non-keyword arguments in :meth:`Series.to_string` except ``buf``. (:issue:`57280`)
+- Deprecated behavior of :meth:`.DataFrameGroupBy.groups` and :meth:`.SeriesGroupBy.groups`, in a future version ``groups`` by one element list will return tuple instead of scalar. (:issue:`58858`)
 - Deprecated behavior of :meth:`Series.dt.to_pytimedelta`, in a future version this will return a :class:`Series` containing python ``datetime.timedelta`` objects instead of an ``ndarray`` of timedelta; this matches the behavior of other :meth:`Series.dt` properties. (:issue:`57463`)
 - Deprecated lowercase strings ``d``, ``b`` and ``c`` denoting frequencies in :class:`Day`, :class:`BusinessDay` and :class:`CustomBusinessDay` in favour of ``D``, ``B`` and ``C`` (:issue:`58998`)
 - Deprecated lowercase strings ``w``, ``w-mon``, ``w-tue``, etc. denoting frequencies in :class:`Week` in favour of ``W``, ``W-MON``, ``W-TUE``, etc. (:issue:`58998`)
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -634,7 +634,7 @@ def groups(self) -> dict[Hashable, Index]:
         0  1  2  3
         1  1  5  6
         2  7  8  9
-        >>> df.groupby(by=["a"]).groups
+        >>> df.groupby(by="a").groups
         {1: [0, 1], 7: [2]}
 
         For Resampler:
@@ -654,6 +654,15 @@ def groups(self) -> dict[Hashable, Index]:
         >>> ser.resample("MS").groups
         {Timestamp('2023-01-01 00:00:00'): 2, Timestamp('2023-02-01 00:00:00'): 4}
         """
+        if isinstance(self.keys, list) and len(self.keys) == 1:
+            warnings.warn(
+                "`groups` by one element list returns scalar is deprecated "
+                "and will be removed. In a future version `groups` by one element "
+                "list will return tuple. Use ``df.groupby(by='a').groups`` "
+                "instead of ``df.groupby(by=['a']).groups`` to avoid this warning",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
         return self._grouper.groups
 
     @final
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -5785,13 +5785,6 @@ def sort_values(
         else:
             return sorted_index
 
-    @final
-    def sort(self, *args, **kwargs):
-        """
-        Use sort_values instead.
-        """
-        raise TypeError("cannot sort an Index object in-place, use sort_values instead")
-
     def shift(self, periods: int = 1, freq=None) -> Self:
         """
         Shift index by desired number of time frequency increments.
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
@@ -545,31 +545,38 @@ def test_multiindex_columns_empty_level(self):
 
         df = DataFrame([[1, "A"]], columns=midx)
 
+        msg = "`groups` by one element list returns scalar is deprecated"
         grouped = df.groupby("to filter").groups
         assert grouped["A"] == [0]
 
-        grouped = df.groupby([("to filter", "")]).groups
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            grouped = df.groupby([("to filter", "")]).groups
         assert grouped["A"] == [0]
 
         df = DataFrame([[1, "A"], [2, "B"]], columns=midx)
 
         expected = df.groupby("to filter").groups
-        result = df.groupby([("to filter", "")]).groups
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df.groupby([("to filter", "")]).groups
         assert result == expected
 
         df = DataFrame([[1, "A"], [2, "A"]], columns=midx)
 
         expected = df.groupby("to filter").groups
-        result = df.groupby([("to filter", "")]).groups
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df.groupby([("to filter", "")]).groups
         tm.assert_dict_equal(result, expected)
 
     def test_groupby_multiindex_tuple(self):
-        # GH 17979
+        # GH 17979, GH#59179
         df = DataFrame(
             [[1, 2, 3, 4], [3, 4, 5, 6], [1, 4, 2, 3]],
             columns=MultiIndex.from_arrays([["a", "b", "b", "c"], [1, 1, 2, 2]]),
         )
-        expected = df.groupby([("b", 1)]).groups
+
+        msg = "`groups` by one element list returns scalar is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = df.groupby([("b", 1)]).groups
         result = df.groupby(("b", 1)).groups
         tm.assert_dict_equal(expected, result)
 
@@ -579,17 +586,21 @@ def test_groupby_multiindex_tuple(self):
                 [["a", "b", "b", "c"], ["d", "d", "e", "e"]]
             ),
         )
-        expected = df2.groupby([("b", "d")]).groups
+
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = df2.groupby([("b", "d")]).groups
         result = df.groupby(("b", 1)).groups
         tm.assert_dict_equal(expected, result)
 
         df3 = DataFrame(df.values, columns=[("a", "d"), ("b", "d"), ("b", "e"), "c"])
-        expected = df3.groupby([("b", "d")]).groups
+
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = df3.groupby([("b", "d")]).groups
         result = df.groupby(("b", 1)).groups
         tm.assert_dict_equal(expected, result)
 
     def test_groupby_multiindex_partial_indexing_equivalence(self):
-        # GH 17977
+        # GH 17977, GH#59179
         df = DataFrame(
             [[1, 2, 3, 4], [3, 4, 5, 6], [1, 4, 2, 3]],
             columns=MultiIndex.from_arrays([["a", "b", "b", "c"], [1, 1, 2, 2]]),
@@ -615,8 +626,10 @@ def test_groupby_multiindex_partial_indexing_equivalence(self):
         result_max = df.groupby([("a", 1)])["b"].max()
         tm.assert_frame_equal(expected_max, result_max)
 
-        expected_groups = df.groupby([("a", 1)])[[("b", 1), ("b", 2)]].groups
-        result_groups = df.groupby([("a", 1)])["b"].groups
+        msg = "`groups` by one element list returns scalar is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected_groups = df.groupby([("a", 1)])[[("b", 1), ("b", 2)]].groups
+            result_groups = df.groupby([("a", 1)])["b"].groups
         tm.assert_dict_equal(expected_groups, result_groups)
 
     def test_groupby_level(self, sort, multiindex_dataframe_random_data, df):
@@ -719,15 +732,18 @@ def test_grouping_labels(self, multiindex_dataframe_random_data):
         tm.assert_almost_equal(grouped._grouper.codes[0], exp_labels)
 
     def test_list_grouper_with_nat(self):
-        # GH 14715
+        # GH 14715, GH#59179
         df = DataFrame({"date": date_range("1/1/2011", periods=365, freq="D")})
         df.iloc[-1] = pd.NaT
         grouper = Grouper(key="date", freq="YS")
+        msg = "`groups` by one element list returns scalar is deprecated"
 
         # Grouper in a list grouping
-        result = df.groupby([grouper])
+        gb = df.groupby([grouper])
         expected = {Timestamp("2011-01-01"): Index(list(range(364)))}
-        tm.assert_dict_equal(result.groups, expected)
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = gb.groups
+        tm.assert_dict_equal(result, expected)
 
         # Test case without a list
         result = df.groupby(grouper)
@@ -994,17 +1010,20 @@ def test_gb_key_len_equal_axis_len(self):
 class TestIteration:
     def test_groups(self, df):
         grouped = df.groupby(["A"])
-        groups = grouped.groups
-        assert groups is grouped.groups  # caching works
+        msg = "`groups` by one element list returns scalar is deprecated"
+
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            groups = grouped.groups
+            assert groups is grouped.groups  # caching works
 
-        for k, v in grouped.groups.items():
+        for k, v in groups.items():
             assert (df.loc[v]["A"] == k).all()
 
         grouped = df.groupby(["A", "B"])
         groups = grouped.groups
         assert groups is grouped.groups  # caching works
 
-        for k, v in grouped.groups.items():
+        for k, v in groups.items():
             assert (df.loc[v]["A"] == k[0]).all()
             assert (df.loc[v]["B"] == k[1]).all()
 
diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py
@@ -534,6 +534,9 @@ def test_groupby_raises_category_np(
     _call_and_check(klass, msg, how, gb, groupby_func_np, ())
 
 
+@pytest.mark.filterwarnings(
+    "ignore:`groups` by one element list returns scalar is deprecated"
+)
 @pytest.mark.parametrize("how", ["method", "agg", "transform"])
 def test_groupby_raises_category_on_category(
     how,
diff --git a/pandas/tests/indexes/test_any_index.py b/pandas/tests/indexes/test_any_index.py
@@ -22,12 +22,6 @@ def test_boolean_context_compat(index):
         bool(index)
 
 
-def test_sort(index):
-    msg = "cannot sort an Index object in-place, use sort_values instead"
-    with pytest.raises(TypeError, match=msg):
-        index.sort()
-
-
 def test_hash_error(index):
     with pytest.raises(TypeError, match=f"unhashable type: '{type(index).__name__}'"):
         hash(index)