Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ Bug fixes
- Bug in :class:`Series` constructor raising a ``TypeError`` when constructing sparse datetime64 dtypes (:issue:`35762`)
- Bug in :meth:`DataFrame.apply` with ``result_type="reduce"`` returning with incorrect index (:issue:`35683`)
- Bug in :meth:`DateTimeIndex.format` and :meth:`PeriodIndex.format` with ``name=True`` setting the first item to ``"None"`` where it should bw ``""`` (:issue:`35712`)
- Bug in :meth:`DataFrame.groupby(...).apply(...)` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`)
-

.. ---------------------------------------------------------------------------
Expand Down
10 changes: 5 additions & 5 deletions pandas/core/reshape/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from pandas.core.dtypes.concat import concat_compat
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
from pandas.core.dtypes.missing import isna

from pandas.core.arrays.categorical import (
factorize_from_iterable,
Expand Down Expand Up @@ -619,17 +620,16 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde
codes_list = []

# things are potentially different sizes, so compute the exact codes
# for each level and pass those to MultiIndex.from_arrays

# for each level and pass those to MultiIndex.from_arrays.
for hlevel, level in zip(zipped, levels):
to_concat = []
for key, index in zip(hlevel, indexes):
mask = level == key
mask = (isna(level) & isna(key)) | (level == key)
if not mask.any():
raise ValueError(f"Key {key} not in level {level}")
i = np.nonzero(level == key)[0][0]

i = np.nonzero(mask)[0][0]
to_concat.append(np.repeat(i, len(index)))

codes_list.append(np.concatenate(to_concat))

concat_index = _concat_indexes(indexes)
Expand Down
53 changes: 53 additions & 0 deletions pandas/tests/groupby/test_groupby_dropna.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,3 +276,56 @@ def test_groupby_dropna_datetime_like_data(
expected = pd.DataFrame({"values": values}, index=pd.Index(indexes, name="dt"))

tm.assert_frame_equal(grouped, expected)


@pytest.mark.parametrize(
"dropna, data, selected_data, levels",
[
pytest.param(
False,
{"groups": ["a", "a", "b", np.nan], "values": [10, 10, 20, 30]},
{"values": [0, 1, 0, 0]},
["a", "b", np.nan],
id="dropna_false_has_nan",
),
pytest.param(
True,
{"groups": ["a", "a", "b", np.nan], "values": [10, 10, 20, 30]},
{"values": [0, 1, 0]},
None,
id="dropna_true_has_nan",
),
pytest.param(
# no nan in "groups"; dropna=True|False should be same.
False,
{"groups": ["a", "a", "b", "c"], "values": [10, 10, 20, 30]},
{"values": [0, 1, 0, 0]},
None,
id="dropna_false_no_nan",
),
pytest.param(
# no nan in "groups"; dropna=True|False should be same.
True,
{"groups": ["a", "a", "b", "c"], "values": [10, 10, 20, 30]},
{"values": [0, 1, 0, 0]},
None,
id="dropna_true_no_nan",
),
],
)
def test_groupby_apply_with_dropna_for_multi_index(dropna, data, selected_data, levels):
# GH 35889

df = pd.DataFrame(data)
gb = df.groupby("groups", dropna=dropna)
result = gb.apply(lambda grp: pd.DataFrame({"values": range(len(grp))}))

mi_tuples = tuple(zip(data["groups"], selected_data["values"]))
mi = pd.MultiIndex.from_tuples(mi_tuples, names=["groups", None])
# Since right now, by default MI will drop NA from levels when we create MI
# via `from_*`, so we need to add NA for level manually afterwards.
if not dropna and levels:
mi = mi.set_levels(levels, level="groups")

expected = pd.DataFrame(selected_data, index=mi)
tm.assert_frame_equal(result, expected)