-
-
Notifications
You must be signed in to change notification settings - Fork 145
GH456 First attempt GroupBy.transform improved typing #1242
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
020f93d
106a6f5
3bba101
053b7e7
4141a06
f9863d0
e26b4c1
96abf3b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,56 @@ | ||
from collections.abc import Hashable | ||
import dataclasses | ||
from typing import ( | ||
Literal, | ||
TypeAlias, | ||
) | ||
|
||
@dataclasses.dataclass(order=True, frozen=True) | ||
class OutputKey: | ||
label: Hashable | ||
position: int | ||
|
||
ReductionKernelType: TypeAlias = Literal[ | ||
"all", | ||
"any", | ||
"corrwith", | ||
"count", | ||
"first", | ||
"idxmax", | ||
"idxmin", | ||
"last", | ||
"max", | ||
"mean", | ||
"median", | ||
"min", | ||
"nunique", | ||
"prod", | ||
# as long as `quantile`'s signature accepts only | ||
# a single quantile value, it's a reduction. | ||
# GH#27526 might change that. | ||
"quantile", | ||
"sem", | ||
"size", | ||
"skew", | ||
"std", | ||
"sum", | ||
"var", | ||
] | ||
|
||
TransformationKernelType: TypeAlias = Literal[ | ||
"bfill", | ||
"cumcount", | ||
"cummax", | ||
"cummin", | ||
"cumprod", | ||
"cumsum", | ||
"diff", | ||
"ffill", | ||
"fillna", | ||
"ngroup", | ||
"pct_change", | ||
"rank", | ||
"shift", | ||
] | ||
|
||
TransformReductionListType: TypeAlias = ReductionKernelType | TransformationKernelType |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1081,7 +1081,7 @@ def test_types_groupby_agg() -> None: | |
|
||
def sum_sr(s: pd.Series[int]) -> int: | ||
# type of `sum` not well inferred by mypy | ||
return sum(s) | ||
return s.sum() | ||
|
||
check( | ||
assert_type(s.groupby(level=0).agg(sum_sr), "pd.Series[int]"), | ||
|
@@ -1119,6 +1119,20 @@ def transform_func( | |
pd.Series, | ||
float, | ||
) | ||
check( | ||
assert_type( | ||
s.groupby(lambda x: x).transform("mean"), | ||
"pd.Series", | ||
), | ||
pd.Series, | ||
) | ||
check( | ||
assert_type( | ||
s.groupby(lambda x: x).transform("first"), | ||
"pd.Series", | ||
), | ||
pd.Series, | ||
) | ||
|
||
|
||
def test_types_groupby_aggregate() -> None: | ||
|
@@ -1133,7 +1147,11 @@ def func(s: pd.Series[int]) -> float: | |
return s.astype(float).min() | ||
|
||
s = pd.Series([1, 2, 3, 4]) | ||
s.groupby([1, 1, 2, 2]).agg(lambda x: x.astype(float).min()) | ||
check( | ||
assert_type(s.groupby([1, 1, 2, 2]).agg(func), "pd.Series[float]"), | ||
pd.Series, | ||
np.floating, | ||
) | ||
check( | ||
assert_type(s.groupby(level=0).aggregate(func), "pd.Series[float]"), | ||
pd.Series, | ||
|
@@ -1147,6 +1165,9 @@ def func(s: pd.Series[int]) -> float: | |
np.floating, | ||
) | ||
|
||
# test below passes with mypy but pyright correctly sees it as pd.Series[float] | ||
# check(assert_type(s.groupby([1,1,2,2]).agg(lambda x: x.astype(float).min()), pd.Series), pd.Series, float) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Keep the commented test in there so it is still there and executes, since it works for both type checkers, but comment out the one that is "better" that has There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am forced to comment it out because There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess you have to keep it commented out. Do you have a test like this that passes both checkers: func: Callable[[pd.Series], float] = lambda x: x.astype(float).min()
check(assert_type(s.groupby([1,1,2,2]).agg(func), "pd.Series[float]"), pd.Series, float) So you can have the "preferred" version in there commented out, but I think the above test would pass both type checkers. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually that also fails to pass with mypy (pyright is fine with it). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried a bunch of ideas and couldn't get it to work. It's probably a mypy bug, but I couldn't come up with a simple example that illustrates the problem. |
||
|
||
with pytest_warns_bounded( | ||
FutureWarning, | ||
r"The provided callable <built-in function (min|sum)> is currently using", | ||
|
@@ -1155,7 +1176,7 @@ def func(s: pd.Series[int]) -> float: | |
|
||
def sum_sr(s: pd.Series[int]) -> int: | ||
# type of `sum` not well inferred by mypy | ||
return sum(s) | ||
return s.sum() | ||
|
||
check( | ||
assert_type(s.groupby(level=0).aggregate(sum_sr), "pd.Series[int]"), | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just have to change the comment to say "fails with mypy"