Add test for when by is in intersection of index names and columns

Rehan Durrani · Rehan Durrani · commit a5bb81e5c337 · 2022-06-07T11:32:04.000-07:00
Signed-off-by: Rehan Durrani &lt;rehan@ponder.io&gt;
diff --git a/modin/core/dataframe/algebra/groupby.py b/modin/core/dataframe/algebra/groupby.py
@@ -137,6 +137,8 @@ def map(
                 # grouping by columns that were recently added to the data via
                 # `from_labels`. The internal dataframe doesn't know what to do when
                 # the label matches a column name.
+                # We ensure that the columns, index, and by don't intersect in the API level,
+                # so if we hit this if statement, we know its a result of a deferred re-index.
                 if len(df.columns.intersection(df.index.names)) > 0:
                     df = df.reset_index(drop=True)
             by_part = other
diff --git a/modin/pandas/test/test_groupby.py b/modin/pandas/test/test_groupby.py
@@ -1566,7 +1566,7 @@ def test_agg_exceptions(operation):
         },
     ],
 )
-def test_to_pandas_convertion(kwargs):
+def test_to_pandas_conversion(kwargs):
     data = {"a": [1, 2], "b": [3, 4], "c": [5, 6]}
     by = ["a", "b"]
 
@@ -2025,3 +2025,48 @@ def test_reset_index_groupby():
         pandas_df,
         lambda df: df.reset_index().groupby(["index_0", "index_1"]).count(),
     )
+
+def test_by_in_index_and_columns():
+    pandas_df = pandas.DataFrame(
+        [[1, 2, 3]], index=pd.Index([0], name="a"), columns=['a', 'b', 'c']
+    )
+    modin_df = from_pandas(pandas_df)
+    eval_general(
+        modin_df,
+        pandas_df,
+        lambda df: df.groupby(by='a').count(),
+        raising_exceptions=True,
+        check_exception_type=True,
+    )
+    eval_general(
+        modin_df,
+        pandas_df,
+        lambda df: df.groupby(by=['a', 'b']).count(),
+        raising_exceptions=True,
+        check_exception_type=True,
+    )
+    pandas_df = pandas.DataFrame(
+        [[1, 2, 3]], index=pd.Index([(0, 1)], names=["a", 'b']), columns=['a', 'b', 'c']
+    )
+    modin_df = from_pandas(pandas_df)
+    eval_general(
+        modin_df,
+        pandas_df,
+        lambda df: df.groupby(by='a').count(),
+        raising_exceptions=True,
+        check_exception_type=True,
+    )
+    eval_general(
+        modin_df,
+        pandas_df,
+        lambda df: df.groupby(by=['a', 'c']).count(),
+        raising_exceptions=True,
+        check_exception_type=True,
+    )
+    eval_general(
+        modin_df,
+        pandas_df,
+        lambda df: df.groupby(by=['a', 'b']).count(),
+        raising_exceptions=True,
+        check_exception_type=True,
+    )