Skip to content

Commit 3be5083

Browse files
committed
BUG: groupby.idxmin/idxmax with all NA values should raise
1 parent 9ff14a3 commit 3be5083

File tree

6 files changed

+44
-19
lines changed

6 files changed

+44
-19
lines changed

pandas/_libs/groupby.pyx

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2048,9 +2048,8 @@ def group_idxmin_idxmax(
20482048
group_min_or_max = np.empty_like(out, dtype=values.dtype)
20492049
seen = np.zeros_like(out, dtype=np.uint8)
20502050

2051-
# When using transform, we need a valid value for take in the case
2052-
# a category is not observed; these values will be dropped
2053-
out[:] = 0
2051+
# Sentinel for no valid values.
2052+
out[:] = -1
20542053

20552054
with nogil(numeric_object_t is not object):
20562055
for i in range(N):

pandas/core/groupby/groupby.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1784,7 +1784,7 @@ def array_func(values: ArrayLike) -> ArrayLike:
17841784
new_mgr = data.grouped_reduce(array_func)
17851785
res = self._wrap_agged_manager(new_mgr)
17861786
if how in ["idxmin", "idxmax"]:
1787-
res = self._wrap_idxmax_idxmin(res)
1787+
res = self._wrap_idxmax_idxmin(res, how=how, skipna=kwargs["skipna"])
17881788
out = self._wrap_aggregated_output(res)
17891789
return out
17901790

@@ -5715,10 +5715,17 @@ def _idxmax_idxmin(
57155715
)
57165716
return result
57175717

5718-
def _wrap_idxmax_idxmin(self, res: NDFrameT) -> NDFrameT:
5718+
def _wrap_idxmax_idxmin(
5719+
self, res: NDFrameT, how: Literal["idxmax", "idxmin"], skipna: bool
5720+
) -> NDFrameT:
57195721
index = self.obj.index
57205722
if res.size == 0:
57215723
result = res.astype(index.dtype)
5724+
elif skipna and res.lt(0).any(axis=None):
5725+
raise ValueError(
5726+
f"{type(self).__name__}.{how} with skipna=True encountered all NA "
5727+
f"values in a group."
5728+
)
57225729
else:
57235730
if isinstance(index, MultiIndex):
57245731
index = index.to_flat_index()

pandas/core/groupby/grouper.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -286,18 +286,22 @@ def __init__(
286286
self._indexer: npt.NDArray[np.intp] | None = None
287287

288288
def _get_grouper(
289-
self, obj: NDFrameT, validate: bool = True
289+
self, obj: NDFrameT, validate: bool = True, observed: bool = True
290290
) -> tuple[ops.BaseGrouper, NDFrameT]:
291291
"""
292292
Parameters
293293
----------
294294
obj : Series or DataFrame
295+
Object being grouped.
295296
validate : bool, default True
296-
if True, validate the grouper
297+
If True, validate the grouper.
298+
observed : bool, default True
299+
Whether only observed groups should be in the result. Only
300+
has an impact when grouping on categorical data.
297301
298302
Returns
299303
-------
300-
a tuple of grouper, obj (possibly sorted)
304+
A tuple of grouper, obj (possibly sorted)
301305
"""
302306
obj, _, _ = self._set_grouper(obj)
303307
grouper, _, obj = get_grouper(
@@ -307,6 +311,7 @@ def _get_grouper(
307311
sort=self.sort,
308312
validate=validate,
309313
dropna=self.dropna,
314+
observed=observed,
310315
)
311316

312317
return grouper, obj
@@ -787,7 +792,7 @@ def get_grouper(
787792

788793
# a passed-in Grouper, directly convert
789794
if isinstance(key, Grouper):
790-
grouper, obj = key._get_grouper(obj, validate=False)
795+
grouper, obj = key._get_grouper(obj, validate=False, observed=observed)
791796
if key.key is None:
792797
return grouper, frozenset(), obj
793798
else:

pandas/core/groupby/ops.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -761,7 +761,8 @@ def result_index(self) -> Index:
761761
def ids(self) -> npt.NDArray[np.intp]:
762762
return self.result_index_and_ids[1]
763763

764-
@cache_readonly
764+
# @cache_readonly
765+
@property
765766
def result_index_and_ids(self) -> tuple[Index, npt.NDArray[np.intp]]:
766767
levels = [Index._with_infer(ping.uniques) for ping in self.groupings]
767768
obs = [

pandas/core/resample.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2305,8 +2305,22 @@ def _get_resampler(self, obj: NDFrame) -> Resampler:
23052305
)
23062306

23072307
def _get_grouper(
2308-
self, obj: NDFrameT, validate: bool = True
2308+
self, obj: NDFrameT, validate: bool = True, observed: bool = True
23092309
) -> tuple[BinGrouper, NDFrameT]:
2310+
"""
2311+
Parameters
2312+
----------
2313+
obj : Series or DataFrame
2314+
Object being grouped.
2315+
validate : bool, default True
2316+
Unused. Only for compatibility with ``Grouper._get_grouper``.
2317+
observed : bool, default True
2318+
Unused. Only for compatibility with ``Grouper._get_grouper``.
2319+
2320+
Returns
2321+
-------
2322+
A tuple of grouper, obj (possibly sorted)
2323+
"""
23102324
# create the resampler and return our binner
23112325
r = self._get_resampler(obj)
23122326
return r._grouper, cast(NDFrameT, r.obj)

pandas/tests/groupby/test_reductions.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ def test_idxmin_idxmax_extremes_skipna(skipna, how, float_numpy_dtype):
272272
max_value = np.finfo(float_numpy_dtype).max
273273
df = DataFrame(
274274
{
275-
"a": Series(np.repeat(range(1, 6), repeats=2), dtype="intp"),
275+
"a": Series(np.repeat(range(1, 5), repeats=2), dtype="intp"),
276276
"b": Series(
277277
[
278278
np.nan,
@@ -283,8 +283,6 @@ def test_idxmin_idxmax_extremes_skipna(skipna, how, float_numpy_dtype):
283283
np.nan,
284284
max_value,
285285
np.nan,
286-
np.nan,
287-
np.nan,
288286
],
289287
dtype=float_numpy_dtype,
290288
),
@@ -299,7 +297,7 @@ def test_idxmin_idxmax_extremes_skipna(skipna, how, float_numpy_dtype):
299297
return
300298
result = getattr(gb, how)(skipna=skipna)
301299
expected = DataFrame(
302-
{"b": [1, 3, 4, 6, np.nan]}, index=pd.Index(range(1, 6), name="a", dtype="intp")
300+
{"b": [1, 3, 4, 6]}, index=pd.Index(range(1, 5), name="a", dtype="intp")
303301
)
304302
tm.assert_frame_equal(result, expected)
305303

@@ -1003,8 +1001,6 @@ def test_string_dtype_all_na(
10031001
else:
10041002
expected_dtype = "int64"
10051003
expected_value = 1 if reduction_func == "size" else 0
1006-
elif reduction_func in ["idxmin", "idxmax"]:
1007-
expected_dtype, expected_value = "float64", np.nan
10081004
elif not skipna or min_count > 0:
10091005
expected_value = pd.NA
10101006
elif reduction_func == "sum":
@@ -1032,8 +1028,11 @@ def test_string_dtype_all_na(
10321028
with pytest.raises(TypeError, match=msg):
10331029
method(*args, **kwargs)
10341030
return
1035-
elif reduction_func in ["idxmin", "idxmax"] and not skipna:
1036-
msg = f"{reduction_func} with skipna=False encountered an NA value."
1031+
elif reduction_func in ["idxmin", "idxmax"]:
1032+
if skipna:
1033+
msg = f"{reduction_func} with skipna=True encountered all NA values"
1034+
else:
1035+
msg = f"{reduction_func} with skipna=False encountered an NA value."
10371036
with pytest.raises(ValueError, match=msg):
10381037
method(*args, **kwargs)
10391038
return

0 commit comments

Comments
 (0)