Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ Bug fixes
- Bug in :class:`Series` constructor raising a ``TypeError`` when constructing sparse datetime64 dtypes (:issue:`35762`)
- Bug in :meth:`DataFrame.apply` with ``result_type="reduce"`` returning with incorrect index (:issue:`35683`)
- Bug in :meth:`DateTimeIndex.format` and :meth:`PeriodIndex.format` with ``name=True`` setting the first item to ``"None"`` where it should bw ``""`` (:issue:`35712`)
- Bug in :meth:`DataFrame.groupby(...).apply(...)` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`)
-

.. ---------------------------------------------------------------------------
Expand Down
10 changes: 5 additions & 5 deletions pandas/core/reshape/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from pandas.core.dtypes.concat import concat_compat
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
from pandas.core.dtypes.missing import isna

from pandas.core.arrays.categorical import (
factorize_from_iterable,
Expand Down Expand Up @@ -619,17 +620,16 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde
codes_list = []

# things are potentially different sizes, so compute the exact codes
# for each level and pass those to MultiIndex.from_arrays

# for each level and pass those to MultiIndex.from_arrays.
for hlevel, level in zip(zipped, levels):
to_concat = []
for key, index in zip(hlevel, indexes):
mask = level == key
mask = (isna(level) & isna(key)) | (level == key)
if not mask.any():
raise ValueError(f"Key {key} not in level {level}")
i = np.nonzero(level == key)[0][0]

i = np.nonzero(mask)[0][0]
to_concat.append(np.repeat(i, len(index)))

codes_list.append(np.concatenate(to_concat))

concat_index = _concat_indexes(indexes)
Expand Down
55 changes: 55 additions & 0 deletions pandas/tests/groupby/test_groupby_dropna.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,3 +276,58 @@ def test_groupby_dropna_datetime_like_data(
expected = pd.DataFrame({"values": values}, index=pd.Index(indexes, name="dt"))

tm.assert_frame_equal(grouped, expected)


@pytest.mark.parametrize(
"dropna, data, selected_data, levels",
[
pytest.param(
False,
{"groups": ["a", "a", "b", np.nan], "values": [10, 10, 20, 30]},
{"values": [0, 1, 0, 0]},
["a", "b", np.nan],
id="dropna_false_has_nan",
),
pytest.param(
True,
{"groups": ["a", "a", "b", np.nan], "values": [10, 10, 20, 30]},
{"values": [0, 1, 0]},
None,
id="dropna_true_has_nan",
),
pytest.param(
# no nan in "groups"; dropna=True|False should be same.
False,
{"groups": ["a", "a", "b", "c"], "values": [10, 10, 20, 30]},
{"values": [0, 1, 0, 0]},
None,
id="dropna_false_no_nan",
),
pytest.param(
# no nan in "groups"; dropna=True|False should be same.
True,
{"groups": ["a", "a", "b", "c"], "values": [10, 10, 20, 30]},
{"values": [0, 1, 0, 0]},
None,
id="dropna_true_no_nan",
),
],
)
def test_groupby_apply_with_dropna_for_multi_index(
dropna, data, selected_data, levels
):
# GH 35889

df = pd.DataFrame(data)
gb = df.groupby("groups", dropna=dropna)
result = gb.apply(lambda grp: pd.DataFrame({"values": range(len(grp))}))

mi_tuples = tuple(zip(data["groups"], selected_data["values"]))
mi = pd.MultiIndex.from_tuples(mi_tuples, names=["groups", None])
# Since right now, by default MI will drop NA from levels when we create MI
# via `from_*`, so we need to add NA for level manually afterwards.
if not dropna and levels:
mi = mi.set_levels(levels, level="groups")

expected = pd.DataFrame(selected_data, index=mi)
tm.assert_frame_equal(result, expected)