Skip to content

Commit a5bb81e

Browse files
author
Rehan Durrani
committed
Add test for when by is in intersection of index names and columns
Signed-off-by: Rehan Durrani <[email protected]>
1 parent d2e8c5f commit a5bb81e

File tree

2 files changed

+48
-1
lines changed

2 files changed

+48
-1
lines changed

modin/core/dataframe/algebra/groupby.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,8 @@ def map(
137137
# grouping by columns that were recently added to the data via
138138
# `from_labels`. The internal dataframe doesn't know what to do when
139139
# the label matches a column name.
140+
# We ensure that the columns, index, and by don't intersect in the API level,
141+
# so if we hit this if statement, we know its a result of a deferred re-index.
140142
if len(df.columns.intersection(df.index.names)) > 0:
141143
df = df.reset_index(drop=True)
142144
by_part = other

modin/pandas/test/test_groupby.py

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1566,7 +1566,7 @@ def test_agg_exceptions(operation):
15661566
},
15671567
],
15681568
)
1569-
def test_to_pandas_convertion(kwargs):
1569+
def test_to_pandas_conversion(kwargs):
15701570
data = {"a": [1, 2], "b": [3, 4], "c": [5, 6]}
15711571
by = ["a", "b"]
15721572

@@ -2025,3 +2025,48 @@ def test_reset_index_groupby():
20252025
pandas_df,
20262026
lambda df: df.reset_index().groupby(["index_0", "index_1"]).count(),
20272027
)
2028+
2029+
def test_by_in_index_and_columns():
2030+
pandas_df = pandas.DataFrame(
2031+
[[1, 2, 3]], index=pd.Index([0], name="a"), columns=['a', 'b', 'c']
2032+
)
2033+
modin_df = from_pandas(pandas_df)
2034+
eval_general(
2035+
modin_df,
2036+
pandas_df,
2037+
lambda df: df.groupby(by='a').count(),
2038+
raising_exceptions=True,
2039+
check_exception_type=True,
2040+
)
2041+
eval_general(
2042+
modin_df,
2043+
pandas_df,
2044+
lambda df: df.groupby(by=['a', 'b']).count(),
2045+
raising_exceptions=True,
2046+
check_exception_type=True,
2047+
)
2048+
pandas_df = pandas.DataFrame(
2049+
[[1, 2, 3]], index=pd.Index([(0, 1)], names=["a", 'b']), columns=['a', 'b', 'c']
2050+
)
2051+
modin_df = from_pandas(pandas_df)
2052+
eval_general(
2053+
modin_df,
2054+
pandas_df,
2055+
lambda df: df.groupby(by='a').count(),
2056+
raising_exceptions=True,
2057+
check_exception_type=True,
2058+
)
2059+
eval_general(
2060+
modin_df,
2061+
pandas_df,
2062+
lambda df: df.groupby(by=['a', 'c']).count(),
2063+
raising_exceptions=True,
2064+
check_exception_type=True,
2065+
)
2066+
eval_general(
2067+
modin_df,
2068+
pandas_df,
2069+
lambda df: df.groupby(by=['a', 'b']).count(),
2070+
raising_exceptions=True,
2071+
check_exception_type=True,
2072+
)

0 commit comments

Comments
 (0)