Skip to content

Commit f618d87

Browse files
committed
Merge
2 parents c0038fd + fbfff81 commit f618d87

File tree

6 files changed

+71
-20
lines changed

6 files changed

+71
-20
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -507,7 +507,7 @@ Renamed the following offset aliases (:issue:`57986`):
507507

508508
Other Removals
509509
^^^^^^^^^^^^^^
510-
- :class:`.DataFrameGroupBy.idxmin`, :class:`.DataFrameGroupBy.idxmax`, :class:`.SeriesGroupBy.idxmin`, and :class:`.SeriesGroupBy.idxmax` will now raise a ``ValueError`` when used with ``skipna=False`` and an NA value is encountered (:issue:`10694`, :issue:`57745`)
510+
- :class:`.DataFrameGroupBy.idxmin`, :class:`.DataFrameGroupBy.idxmax`, :class:`.SeriesGroupBy.idxmin`, and :class:`.SeriesGroupBy.idxmax` will now raise a ``ValueError`` when a group has all NA values, or when used with ``skipna=False`` and any NA value is encountered (:issue:`10694`, :issue:`57745`)
511511
- :func:`concat` no longer ignores empty objects when determining output dtypes (:issue:`39122`)
512512
- :func:`concat` with all-NA entries no longer ignores the dtype of those entries when determining the result dtype (:issue:`40893`)
513513
- :func:`read_excel`, :func:`read_json`, :func:`read_html`, and :func:`read_xml` no longer accept raw string or byte representation of the data. That type of data must be wrapped in a :py:class:`StringIO` or :py:class:`BytesIO` (:issue:`53767`)

pandas/core/groupby/generic.py

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1404,7 +1404,15 @@ def idxmin(self, skipna: bool = True) -> Series:
14041404
Raises
14051405
------
14061406
ValueError
1407-
If the Series is empty or skipna=False and any value is NA.
1407+
When there are no valid values for a group. Then can happen if:
1408+
1409+
* There is an unobserved group and ``observed=False``.
1410+
* All values for a group are NA.
1411+
* Some values for a group are NA and ``skipna=False``.
1412+
1413+
.. versionchanged:: 3.0.0
1414+
Previously if all values for a group are NA or some values for a group are
1415+
NA and ``skipna=False``, this method would return NA. Now it raises instead.
14081416
14091417
See Also
14101418
--------
@@ -1457,7 +1465,15 @@ def idxmax(self, skipna: bool = True) -> Series:
14571465
Raises
14581466
------
14591467
ValueError
1460-
If the Series is empty or skipna=False and any value is NA.
1468+
When there are no valid values for a group. Then can happen if:
1469+
1470+
* There is an unobserved group and ``observed=False``.
1471+
* All values for a group are NA.
1472+
* Some values for a group are NA and ``skipna=False``.
1473+
1474+
.. versionchanged:: 3.0.0
1475+
Previously if all values for a group are NA or some values for a group are
1476+
NA and ``skipna=False``, this method would return NA. Now it raises instead.
14611477
14621478
See Also
14631479
--------
@@ -2597,7 +2613,15 @@ def idxmax(
25972613
Raises
25982614
------
25992615
ValueError
2600-
* If a column is empty or skipna=False and any value is NA.
2616+
When there are no valid values for a group. Then can happen if:
2617+
2618+
* There is an unobserved group and ``observed=False``.
2619+
* All values for a group are NA.
2620+
* Some values for a group are NA and ``skipna=False``.
2621+
2622+
.. versionchanged:: 3.0.0
2623+
Previously if all values for a group are NA or some values for a group are
2624+
NA and ``skipna=False``, this method would return NA. Now it raises instead.
26012625
26022626
See Also
26032627
--------
@@ -2663,7 +2687,15 @@ def idxmin(
26632687
Raises
26642688
------
26652689
ValueError
2666-
* If a column is empty or skipna=False and any value is NA.
2690+
When there are no valid values for a group. Then can happen if:
2691+
2692+
* There is an unobserved group and ``observed=False``.
2693+
* All values for a group are NA.
2694+
* Some values for a group are NA and ``skipna=False``.
2695+
2696+
.. versionchanged:: 3.0.0
2697+
Previously if all values for a group are NA or some values for a group are
2698+
NA and ``skipna=False``, this method would return NA. Now it raises instead.
26672699
26682700
See Also
26692701
--------

pandas/core/groupby/groupby.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1784,7 +1784,8 @@ def array_func(values: ArrayLike) -> ArrayLike:
17841784
new_mgr = data.grouped_reduce(array_func)
17851785
res = self._wrap_agged_manager(new_mgr)
17861786
if how in ["idxmin", "idxmax"]:
1787-
res = self._wrap_idxmax_idxmin(res, how=how, skipna=kwargs["skipna"])
1787+
# mypy expects how to be Literal["idxmin", "idxmax"].
1788+
res = self._wrap_idxmax_idxmin(res, how=how, skipna=kwargs["skipna"]) # type: ignore[arg-type]
17881789
out = self._wrap_aggregated_output(res)
17891790
return out
17901791

@@ -5724,7 +5725,7 @@ def _wrap_idxmax_idxmin(
57245725
elif skipna and res.lt(0).any(axis=None):
57255726
raise ValueError(
57265727
f"{type(self).__name__}.{how} with skipna=True encountered all NA "
5727-
f"value in a group."
5728+
f"values in a group."
57285729
)
57295730
else:
57305731
if isinstance(index, MultiIndex):

pandas/core/groupby/grouper.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -286,18 +286,22 @@ def __init__(
286286
self._indexer: npt.NDArray[np.intp] | None = None
287287

288288
def _get_grouper(
289-
self, obj: NDFrameT, validate: bool = True
289+
self, obj: NDFrameT, validate: bool = True, observed: bool = True
290290
) -> tuple[ops.BaseGrouper, NDFrameT]:
291291
"""
292292
Parameters
293293
----------
294294
obj : Series or DataFrame
295+
Object being grouped.
295296
validate : bool, default True
296-
if True, validate the grouper
297+
If True, validate the grouper.
298+
observed : bool, default True
299+
Whether only observed groups should be in the result. Only
300+
has an impact when grouping on categorical data.
297301
298302
Returns
299303
-------
300-
a tuple of grouper, obj (possibly sorted)
304+
A tuple of grouper, obj (possibly sorted)
301305
"""
302306
obj, _, _ = self._set_grouper(obj)
303307
grouper, _, obj = get_grouper(
@@ -307,6 +311,7 @@ def _get_grouper(
307311
sort=self.sort,
308312
validate=validate,
309313
dropna=self.dropna,
314+
observed=observed,
310315
)
311316

312317
return grouper, obj
@@ -787,7 +792,7 @@ def get_grouper(
787792

788793
# a passed-in Grouper, directly convert
789794
if isinstance(key, Grouper):
790-
grouper, obj = key._get_grouper(obj, validate=False)
795+
grouper, obj = key._get_grouper(obj, validate=False, observed=observed)
791796
if key.key is None:
792797
return grouper, frozenset(), obj
793798
else:

pandas/core/resample.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2305,8 +2305,22 @@ def _get_resampler(self, obj: NDFrame) -> Resampler:
23052305
)
23062306

23072307
def _get_grouper(
2308-
self, obj: NDFrameT, validate: bool = True
2308+
self, obj: NDFrameT, validate: bool = True, observed: bool = True
23092309
) -> tuple[BinGrouper, NDFrameT]:
2310+
"""
2311+
Parameters
2312+
----------
2313+
obj : Series or DataFrame
2314+
Object being grouped.
2315+
validate : bool, default True
2316+
Unused. Only for compatibility with ``Grouper._get_grouper``.
2317+
observed : bool, default True
2318+
Unused. Only for compatibility with ``Grouper._get_grouper``.
2319+
2320+
Returns
2321+
-------
2322+
A tuple of grouper, obj (possibly sorted)
2323+
"""
23102324
# create the resampler and return our binner
23112325
r = self._get_resampler(obj)
23122326
return r._grouper, cast(NDFrameT, r.obj)

pandas/tests/groupby/test_reductions.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ def test_idxmin_idxmax_extremes_skipna(skipna, how, float_numpy_dtype):
272272
max_value = np.finfo(float_numpy_dtype).max
273273
df = DataFrame(
274274
{
275-
"a": Series(np.repeat(range(1, 6), repeats=2), dtype="intp"),
275+
"a": Series(np.repeat(range(1, 5), repeats=2), dtype="intp"),
276276
"b": Series(
277277
[
278278
np.nan,
@@ -283,8 +283,6 @@ def test_idxmin_idxmax_extremes_skipna(skipna, how, float_numpy_dtype):
283283
np.nan,
284284
max_value,
285285
np.nan,
286-
np.nan,
287-
np.nan,
288286
],
289287
dtype=float_numpy_dtype,
290288
),
@@ -299,7 +297,7 @@ def test_idxmin_idxmax_extremes_skipna(skipna, how, float_numpy_dtype):
299297
return
300298
result = getattr(gb, how)(skipna=skipna)
301299
expected = DataFrame(
302-
{"b": [1, 3, 4, 6, np.nan]}, index=pd.Index(range(1, 6), name="a", dtype="intp")
300+
{"b": [1, 3, 4, 6]}, index=pd.Index(range(1, 5), name="a", dtype="intp")
303301
)
304302
tm.assert_frame_equal(result, expected)
305303

@@ -1003,8 +1001,6 @@ def test_string_dtype_all_na(
10031001
else:
10041002
expected_dtype = "int64"
10051003
expected_value = 1 if reduction_func == "size" else 0
1006-
elif reduction_func in ["idxmin", "idxmax"]:
1007-
expected_dtype, expected_value = "float64", np.nan
10081004
elif not skipna or min_count > 0:
10091005
expected_value = pd.NA
10101006
elif reduction_func == "sum":
@@ -1032,8 +1028,11 @@ def test_string_dtype_all_na(
10321028
with pytest.raises(TypeError, match=msg):
10331029
method(*args, **kwargs)
10341030
return
1035-
elif reduction_func in ["idxmin", "idxmax"] and not skipna:
1036-
msg = f"{reduction_func} with skipna=False encountered an NA value."
1031+
elif reduction_func in ["idxmin", "idxmax"]:
1032+
if skipna:
1033+
msg = f"{reduction_func} with skipna=True encountered all NA values"
1034+
else:
1035+
msg = f"{reduction_func} with skipna=False encountered an NA value."
10371036
with pytest.raises(ValueError, match=msg):
10381037
method(*args, **kwargs)
10391038
return

0 commit comments

Comments
 (0)