Skip to content

Commit 0e4a521

Browse files
author
Rehan Durrani
committed
fix typo and add solution for series + index
Signed-off-by: Rehan Durrani <[email protected]>
1 parent 3ce19ea commit 0e4a521

File tree

2 files changed

+63
-20
lines changed

2 files changed

+63
-20
lines changed

modin/pandas/dataframe.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -425,17 +425,21 @@ def groupby(
425425
# groupby takes place.
426426
drop = False
427427
# Check that there is no ambiguity in the parameter we were given.
428-
_by_check = by if is_list_like(by) else [by]
429-
for k in _by_check:
430-
if k in self.index.names and k in self.axes[axis]:
431-
level_name, index_name = "an index", "a column"
432-
if axis == 1:
433-
level_name, index_name = index_name, level_name
434-
raise ValueError(
435-
f"{k} is both {level_name} level and {index_name} label, which is ambiguous."
436-
)
428+
# We don't need to check if `by` is a Series or Index, since those
429+
# won't be referencing labels
430+
if not isinstance(by, (pandas.Series, Series, pandas.Index)):
431+
_by_check = by if is_list_like(by) else [by]
432+
for k in _by_check:
433+
if not isinstance(k, (Series, pandas.Series, pandas.Index)):
434+
if k in self.index.names and k in self.axes[axis ^ 1]:
435+
level_name, index_name = "an index", "a column"
436+
if axis == 1:
437+
level_name, index_name = index_name, level_name
438+
raise ValueError(
439+
f"{k} is both {level_name} level and {index_name} label, which is ambiguous."
440+
)
437441
if (
438-
not isinstance(by, (pandas.Series, Series))
442+
not isinstance(by, (pandas.Series, Series, pandas.Index))
439443
and is_list_like(by)
440444
and len(by) == 1
441445
):
@@ -452,6 +456,11 @@ def groupby(
452456
level, by = by, None
453457
elif level is None:
454458
by = self.__getitem__(by)._query_compiler
459+
elif isinstance(by, (pandas.Series, pandas.Index)):
460+
if isinstance(by, pandas.Index) and len(by) != len(self.axes[axis]):
461+
raise ValueError("Grouper and axis must be same length")
462+
idx_name = by.name
463+
by = Series(by)._query_compiler
455464
elif isinstance(by, Series):
456465
drop = by._parent is self
457466
idx_name = by.name

modin/pandas/test/test_groupby.py

Lines changed: 44 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2076,15 +2076,16 @@ def test_by_in_index_and_columns():
20762076
modin_df,
20772077
pandas_df,
20782078
lambda df: df.groupby(by="a").count(),
2079-
raising_exceptions=True,
2080-
check_exception_type=True,
20812079
)
20822080
eval_general(
20832081
modin_df,
20842082
pandas_df,
20852083
lambda df: df.groupby(by=["a", "b"]).count(),
2086-
raising_exceptions=True,
2087-
check_exception_type=True,
2084+
)
2085+
eval_general(
2086+
modin_df,
2087+
pandas_df,
2088+
lambda df: df.groupby(by=[df["b"], "a"]).count(),
20882089
)
20892090
pandas_df = pandas.DataFrame(
20902091
[[1, 2, 3]], index=pd.Index([(0, 1)], names=["a", "b"]), columns=["a", "b", "c"]
@@ -2094,20 +2095,53 @@ def test_by_in_index_and_columns():
20942095
modin_df,
20952096
pandas_df,
20962097
lambda df: df.groupby(by="a").count(),
2097-
raising_exceptions=True,
2098-
check_exception_type=True,
20992098
)
21002099
eval_general(
21012100
modin_df,
21022101
pandas_df,
21032102
lambda df: df.groupby(by=["a", "c"]).count(),
2104-
raising_exceptions=True,
2105-
check_exception_type=True,
21062103
)
21072104
eval_general(
21082105
modin_df,
21092106
pandas_df,
21102107
lambda df: df.groupby(by=["a", "b"]).count(),
2111-
raising_exceptions=True,
2112-
check_exception_type=True,
2108+
)
2109+
2110+
2111+
def test_by_series():
2112+
pandas_df = pandas.DataFrame(
2113+
[[1, 2, 3]], index=pd.Index([0], name="a"), columns=["a", "b", "c"]
2114+
)
2115+
modin_df = from_pandas(pandas_df)
2116+
2117+
def make_appropriately_typed_series(df, values=["a"]):
2118+
"""Return a Series from either pandas or modin.pandas depending on type of `df`."""
2119+
if isinstance(df, pd.DataFrame):
2120+
return pd.Series(values)
2121+
return pandas.Series(values)
2122+
2123+
eval_general(
2124+
modin_df,
2125+
pandas_df,
2126+
lambda df: df.groupby(by=make_appropriately_typed_series(df)).count(),
2127+
)
2128+
eval_general(
2129+
modin_df,
2130+
pandas_df,
2131+
lambda df: df.groupby(
2132+
by=make_appropriately_typed_series(df, ["a", "b"])
2133+
).count(),
2134+
)
2135+
2136+
2137+
def test_by_index():
2138+
pandas_df = pandas.DataFrame(
2139+
[[1, 2, 3]], index=pd.Index([0], name="a"), columns=["a", "b", "c"]
2140+
)
2141+
modin_df = from_pandas(pandas_df)
2142+
eval_general(modin_df, pandas_df, lambda df: df.groupby(by=pd.Index(["a"])).count())
2143+
eval_general(
2144+
modin_df,
2145+
pandas_df,
2146+
lambda df: df.groupby(by=pd.Index(["a", "b"])).count(),
21132147
)

0 commit comments

Comments
 (0)