Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.5.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ Fixed regressions
- Fixed regression in :meth:`DataFrame.fillna` not filling ``NaN`` after other operations such as :meth:`DataFrame.pivot` (:issue:`36495`).
- Fixed performance regression in ``df.groupby(..).rolling(..)`` (:issue:`38038`)
- Fixed regression in :meth:`MultiIndex.intersection` returning duplicates when at least one of the indexes had duplicates (:issue:`36915`)
- Fixed regression in :meth:`.GroupBy.first`, :meth:`.GroupBy.last`, and :meth:`.GroupBy.nth` where ``None`` was considered a non-NA value (:issue:`38286`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this doesn't affect nth right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed, thanks. Fixed.


.. ---------------------------------------------------------------------------

Expand Down
12 changes: 4 additions & 8 deletions pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -928,9 +928,7 @@ def group_last(rank_t[:, :] out,
for j in range(K):
val = values[i, j]

# None should not be treated like other NA-like
# so that it won't be converted to nan
if not checknull(val) or val is None:
if not checknull(val):
# NB: use _treat_as_na here once
# conditional-nogil is available.
nobs[lab, j] += 1
Expand All @@ -939,7 +937,7 @@ def group_last(rank_t[:, :] out,
for i in range(ncounts):
for j in range(K):
if nobs[i, j] < min_count:
out[i, j] = NAN
out[i, j] = None
else:
out[i, j] = resx[i, j]
else:
Expand Down Expand Up @@ -1023,9 +1021,7 @@ def group_nth(rank_t[:, :] out,
for j in range(K):
val = values[i, j]

# None should not be treated like other NA-like
# so that it won't be converted to nan
if not checknull(val) or val is None:
if not checknull(val):
# NB: use _treat_as_na here once
# conditional-nogil is available.
nobs[lab, j] += 1
Expand All @@ -1035,7 +1031,7 @@ def group_nth(rank_t[:, :] out,
for i in range(ncounts):
for j in range(K):
if nobs[i, j] < min_count:
out[i, j] = NAN
out[i, j] = None
else:
out[i, j] = resx[i, j]

Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/groupby/test_nth.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,26 @@ def test_first_last_with_None(method):
tm.assert_frame_equal(result, df)


@pytest.mark.parametrize("method", ["first", "last"])
@pytest.mark.parametrize(
"df, expected",
[
(
DataFrame({"id": "a", "value": [None, "foo", np.nan]}),
DataFrame({"value": ["foo"]}, index=Index(["a"], name="id")),
),
(
DataFrame({"id": "a", "value": [np.nan]}, dtype=object),
DataFrame({"value": [None]}, index=Index(["a"], name="id")),
),
],
)
def test_first_last_with_None_expanded(method, df, expected):
# GH 32800, 38286
result = getattr(df.groupby("id"), method)()
tm.assert_frame_equal(result, expected)


def test_first_last_nth_dtypes(df_mixed_floats):

df = df_mixed_floats.copy()
Expand Down