Skip to content

Commit 290378f

Browse files
committed
ENH: Address review comments
1 parent 7712840 commit 290378f

File tree

3 files changed

+62
-17
lines changed

3 files changed

+62
-17
lines changed

pandas/_libs/groupby.pyx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -910,7 +910,7 @@ def group_var(
910910
@cython.wraparound(False)
911911
@cython.boundscheck(False)
912912
@cython.cdivision(True)
913-
@cython.cpow
913+
@cython.cpow(True)
914914
def group_skew(
915915
float64_t[:, ::1] out,
916916
int64_t[::1] counts,
@@ -961,7 +961,7 @@ def group_skew(
961961
isna_entry = _treat_as_na(val, False)
962962

963963
if not isna_entry:
964-
# Based on RunningStats::Push from
964+
# Running stats update based on RunningStats::Push from
965965
# https://www.johndcook.com/blog/skewness_kurtosis/
966966
n1 = nobs[lab, j]
967967
n = n1 + 1
@@ -998,7 +998,7 @@ def group_skew(
998998
@cython.wraparound(False)
999999
@cython.boundscheck(False)
10001000
@cython.cdivision(True)
1001-
@cython.cpow
1001+
@cython.cpow(True)
10021002
def group_kurt(
10031003
float64_t[:, ::1] out,
10041004
int64_t[::1] counts,
@@ -1050,7 +1050,7 @@ def group_kurt(
10501050
isna_entry = _treat_as_na(val, False)
10511051

10521052
if not isna_entry:
1053-
# Based on RunningStats::Push from
1053+
# Running stats update based on RunningStats::Push from
10541054
# https://www.johndcook.com/blog/skewness_kurtosis/
10551055
n1 = nobs[lab, j]
10561056
n = n1 + 1

pandas/core/groupby/generic.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1270,13 +1270,8 @@ def skew(
12701270
Name: Max Speed, dtype: float64
12711271
"""
12721272

1273-
def alt(obj):
1274-
# This should not be reached since the cython path should raise
1275-
# TypeError and not NotImplementedError.
1276-
raise TypeError(f"'skew' is not supported for dtype={obj.dtype}")
1277-
12781273
return self._cython_agg_general(
1279-
"skew", alt=alt, skipna=skipna, numeric_only=numeric_only, **kwargs
1274+
"skew", alt=None, skipna=skipna, numeric_only=numeric_only, **kwargs
12801275
)
12811276

12821277
def kurt(
@@ -3084,13 +3079,8 @@ def kurt(
30843079
mammal 0.204125
30853080
"""
30863081

3087-
def alt(obj):
3088-
# This should not be reached since the cython path should raise
3089-
# TypeError and not NotImplementedError.
3090-
raise TypeError(f"'kurt' is not supported for dtype={obj.dtype}")
3091-
30923082
return self._cython_agg_general(
3093-
"kurt", alt=alt, skipna=skipna, numeric_only=numeric_only, **kwargs
3083+
"kurt", alt=None, skipna=skipna, numeric_only=numeric_only, **kwargs
30943084
)
30953085

30963086
@property

pandas/tests/groupby/methods/test_kurt.py

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66

77
def test_groupby_kurt_equivalence():
8+
# GH#40139
89
# Test that that groupby kurt method (which uses libgroupby.group_kurt)
910
# matches the results of operating group-by-group (which uses nanops.nankurt)
1011
nrows = 1000
@@ -23,5 +24,59 @@ def test_groupby_kurt_equivalence():
2324

2425
grpwise = [grp.kurt().to_frame(i).T for i, grp in gb]
2526
expected = pd.concat(grpwise, axis=0)
26-
expected.index = expected.index.astype(result.index.dtype) # 32bit builds
27+
expected.index = expected.index.astype(np.intp) # 32bit builds
2728
tm.assert_frame_equal(result, expected)
29+
30+
31+
def test_groupby_kurt_arrow_float64():
32+
# GH#40139
33+
# Test groupby.kurt() with skipna = False
34+
df = pd.DataFrame(
35+
{
36+
"x": [1.0, np.nan, 3.2, 4.8, 2.3, 1.9, 8.9],
37+
"y": [1.6, 3.3, 3.2, 6.8, 1.3, 2.9, 9.0],
38+
},
39+
dtype="float64[pyarrow]",
40+
)
41+
gb = df.groupby(by=lambda x: 0)
42+
43+
result = gb.kurt()
44+
expected = pd.DataFrame(
45+
{"x": [2.1644713], "y": [0.1513969]}, dtype="float64[pyarrow]"
46+
)
47+
tm.assert_almost_equal(result, expected)
48+
49+
50+
def test_groupby_kurt_noskipna():
51+
# GH#40139
52+
# Test groupby.kurt() with skipna = False
53+
df = pd.DataFrame(
54+
{
55+
"x": [1.0, np.nan, 3.2, 4.8, 2.3, 1.9, 8.9],
56+
"y": [1.6, 3.3, 3.2, 6.8, 1.3, 2.9, 9.0],
57+
}
58+
)
59+
gb = df.groupby(by=lambda x: 0)
60+
61+
result = gb.kurt(skipna=False)
62+
expected = pd.DataFrame({"x": [np.nan], "y": [0.1513969]})
63+
tm.assert_almost_equal(result, expected)
64+
65+
66+
def test_groupby_kurt_all_ones():
67+
# GH#40139
68+
# Test groupby.kurt() with skipna = False
69+
df = pd.DataFrame(
70+
{
71+
"x": [1.0] * 10,
72+
}
73+
)
74+
gb = df.groupby(by=lambda x: 0)
75+
76+
result = gb.kurt(skipna=False)
77+
expected = pd.DataFrame(
78+
{
79+
"x": [0.0], # Same behavior as pd.DataFrame.kurt()
80+
}
81+
)
82+
tm.assert_almost_equal(result, expected)

0 commit comments

Comments
 (0)