Skip to content

Commit b741d8c

Browse files
authored
GroupBy: Finish eagerly_compute_group deprecation (pydata#10253)
1 parent 5101a52 commit b741d8c

File tree

6 files changed

+76
-107
lines changed

6 files changed

+76
-107
lines changed

doc/whats-new.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@ Breaking changes
6262
Deprecations
6363
~~~~~~~~~~~~
6464

65+
- The deprecation cycle for the ``eagerly_compute_group`` kwarg to ``groupby`` and ``groupby_bins``
66+
is now complete.
67+
By `Deepak Cherian <https://github.com/dcherian>`_.
6568

6669
Bug fixes
6770
~~~~~~~~~

xarray/core/common.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1118,7 +1118,7 @@ def _resample(
11181118
f"Received {type(freq)} instead."
11191119
)
11201120

1121-
rgrouper = ResolvedGrouper(grouper, group, self, eagerly_compute_group=False)
1121+
rgrouper = ResolvedGrouper(grouper, group, self)
11221122

11231123
return resample_cls(
11241124
self,

xarray/core/dataarray.py

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6805,7 +6805,7 @@ def groupby(
68056805
*,
68066806
squeeze: Literal[False] = False,
68076807
restore_coord_dims: bool = False,
6808-
eagerly_compute_group: bool = True,
6808+
eagerly_compute_group: Literal[False] | None = None,
68096809
**groupers: Grouper,
68106810
) -> DataArrayGroupBy:
68116811
"""Returns a DataArrayGroupBy object for performing grouped operations.
@@ -6821,11 +6821,8 @@ def groupby(
68216821
restore_coord_dims : bool, default: False
68226822
If True, also restore the dimension order of multi-dimensional
68236823
coordinates.
6824-
eagerly_compute_group: bool
6825-
Whether to eagerly compute ``group`` when it is a chunked array.
6826-
This option is to maintain backwards compatibility. Set to False
6827-
to opt-in to future behaviour, where ``group`` is not automatically loaded
6828-
into memory.
6824+
eagerly_compute_group: bool, optional
6825+
This argument is deprecated.
68296826
**groupers : Mapping of str to Grouper or Resampler
68306827
Mapping of variable name to group by to :py:class:`Grouper` or :py:class:`Resampler` object.
68316828
One of ``group`` or ``groupers`` must be provided.
@@ -6957,7 +6954,7 @@ def groupby_bins(
69576954
squeeze: Literal[False] = False,
69586955
restore_coord_dims: bool = False,
69596956
duplicates: Literal["raise", "drop"] = "raise",
6960-
eagerly_compute_group: bool = True,
6957+
eagerly_compute_group: Literal[False] | None = None,
69616958
) -> DataArrayGroupBy:
69626959
"""Returns a DataArrayGroupBy object for performing grouped operations.
69636960
@@ -6994,11 +6991,8 @@ def groupby_bins(
69946991
coordinates.
69956992
duplicates : {"raise", "drop"}, default: "raise"
69966993
If bin edges are not unique, raise ValueError or drop non-uniques.
6997-
eagerly_compute_group: bool
6998-
Whether to eagerly compute ``group`` when it is a chunked array.
6999-
This option is to maintain backwards compatibility. Set to False
7000-
to opt-in to future behaviour, where ``group`` is not automatically loaded
7001-
into memory.
6994+
eagerly_compute_group: bool, optional
6995+
This argument is deprecated.
70026996
70036997
Returns
70046998
-------

xarray/core/dataset.py

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9847,7 +9847,7 @@ def groupby(
98479847
*,
98489848
squeeze: Literal[False] = False,
98499849
restore_coord_dims: bool = False,
9850-
eagerly_compute_group: bool = True,
9850+
eagerly_compute_group: Literal[False] | None = None,
98519851
**groupers: Grouper,
98529852
) -> DatasetGroupBy:
98539853
"""Returns a DatasetGroupBy object for performing grouped operations.
@@ -9863,11 +9863,8 @@ def groupby(
98639863
restore_coord_dims : bool, default: False
98649864
If True, also restore the dimension order of multi-dimensional
98659865
coordinates.
9866-
eagerly_compute_group: bool
9867-
Whether to eagerly compute ``group`` when it is a chunked array.
9868-
This option is to maintain backwards compatibility. Set to False
9869-
to opt-in to future behaviour, where ``group`` is not automatically loaded
9870-
into memory.
9866+
eagerly_compute_group: False, optional
9867+
This argument is deprecated.
98719868
**groupers : Mapping of str to Grouper or Resampler
98729869
Mapping of variable name to group by to :py:class:`Grouper` or :py:class:`Resampler` object.
98739870
One of ``group`` or ``groupers`` must be provided.
@@ -9968,7 +9965,7 @@ def groupby_bins(
99689965
squeeze: Literal[False] = False,
99699966
restore_coord_dims: bool = False,
99709967
duplicates: Literal["raise", "drop"] = "raise",
9971-
eagerly_compute_group: bool = True,
9968+
eagerly_compute_group: Literal[False] | None = None,
99729969
) -> DatasetGroupBy:
99739970
"""Returns a DatasetGroupBy object for performing grouped operations.
99749971
@@ -10005,11 +10002,8 @@ def groupby_bins(
1000510002
coordinates.
1000610003
duplicates : {"raise", "drop"}, default: "raise"
1000710004
If bin edges are not unique, raise ValueError or drop non-uniques.
10008-
eagerly_compute_group: bool
10009-
Whether to eagerly compute ``group`` when it is a chunked array.
10010-
This option is to maintain backwards compatibility. Set to False
10011-
to opt-in to future behaviour, where ``group`` is not automatically loaded
10012-
into memory.
10005+
eagerly_compute_group: False, optional
10006+
This argument is deprecated.
1001310007
1001410008
Returns
1001510009
-------

xarray/core/groupby.py

Lines changed: 30 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ class ResolvedGrouper(Generic[T_DataWithCoords]):
295295
grouper: Grouper
296296
group: T_Group
297297
obj: T_DataWithCoords
298-
eagerly_compute_group: bool = field(repr=False)
298+
eagerly_compute_group: Literal[False] | None = field(repr=False, default=None)
299299

300300
# returned by factorize:
301301
encoded: EncodedGroups = field(init=False, repr=False)
@@ -324,39 +324,38 @@ def __post_init__(self) -> None:
324324

325325
self.group = _resolve_group(self.obj, self.group)
326326

327+
if self.eagerly_compute_group:
328+
raise ValueError(
329+
f""""Eagerly computing the DataArray you're grouping by ({self.group.name!r}) "
330+
has been removed.
331+
Please load this array's data manually using `.compute` or `.load`.
332+
To intentionally avoid eager loading, either (1) specify
333+
`.groupby({self.group.name}=UniqueGrouper(labels=...))`
334+
or (2) pass explicit bin edges using ``bins`` or
335+
`.groupby({self.group.name}=BinGrouper(bins=...))`; as appropriate."""
336+
)
337+
if self.eagerly_compute_group is not None:
338+
emit_user_level_warning(
339+
"Passing `eagerly_compute_group` is now deprecated. It has no effect.",
340+
DeprecationWarning,
341+
)
342+
327343
if not isinstance(self.group, _DummyGroup) and is_chunked_array(
328344
self.group.variable._data
329345
):
330-
if self.eagerly_compute_group is False:
331-
# This requires a pass to discover the groups present
332-
if (
333-
isinstance(self.grouper, UniqueGrouper)
334-
and self.grouper.labels is None
335-
):
336-
raise ValueError(
337-
"Please pass `labels` to UniqueGrouper when grouping by a chunked array."
338-
)
339-
# this requires a pass to compute the bin edges
340-
if isinstance(self.grouper, BinGrouper) and isinstance(
341-
self.grouper.bins, int
342-
):
343-
raise ValueError(
344-
"Please pass explicit bin edges to BinGrouper using the ``bins`` kwarg"
345-
"when grouping by a chunked array."
346-
)
347-
348-
if self.eagerly_compute_group:
349-
emit_user_level_warning(
350-
f""""Eagerly computing the DataArray you're grouping by ({self.group.name!r}) "
351-
is deprecated and will raise an error in v2025.05.0.
352-
Please load this array's data manually using `.compute` or `.load`.
353-
To intentionally avoid eager loading, either (1) specify
354-
`.groupby({self.group.name}=UniqueGrouper(labels=...), eagerly_load_group=False)`
355-
or (2) pass explicit bin edges using or `.groupby({self.group.name}=BinGrouper(bins=...),
356-
eagerly_load_group=False)`; as appropriate.""",
357-
DeprecationWarning,
346+
# This requires a pass to discover the groups present
347+
if isinstance(self.grouper, UniqueGrouper) and self.grouper.labels is None:
348+
raise ValueError(
349+
"Please pass `labels` to UniqueGrouper when grouping by a chunked array."
350+
)
351+
# this requires a pass to compute the bin edges
352+
if isinstance(self.grouper, BinGrouper) and isinstance(
353+
self.grouper.bins, int
354+
):
355+
raise ValueError(
356+
"Please pass explicit bin edges to BinGrouper using the ``bins`` kwarg"
357+
"when grouping by a chunked array."
358358
)
359-
self.group = self.group.compute()
360359

361360
self.encoded = self.grouper.factorize(self.group)
362361

@@ -382,7 +381,7 @@ def _parse_group_and_groupers(
382381
group: GroupInput,
383382
groupers: dict[str, Grouper],
384383
*,
385-
eagerly_compute_group: bool,
384+
eagerly_compute_group: Literal[False] | None,
386385
) -> tuple[ResolvedGrouper, ...]:
387386
from xarray.core.dataarray import DataArray
388387
from xarray.core.variable import Variable

xarray/tests/test_groupby.py

Lines changed: 30 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -2930,33 +2930,21 @@ def test_multiple_groupers(use_flox: bool, shuffle: bool) -> None:
29302930

29312931
if has_dask:
29322932
b["xy"] = b["xy"].chunk()
2933-
for eagerly_compute_group in [True, False]:
2934-
kwargs = dict(
2935-
x=UniqueGrouper(),
2936-
xy=UniqueGrouper(labels=["a", "b", "c"]),
2937-
eagerly_compute_group=eagerly_compute_group,
2938-
)
2939-
expected = xr.DataArray(
2940-
[[[1, 1, 1], [np.nan, 1, 2]]] * 4,
2941-
dims=("z", "x", "xy"),
2942-
coords={"xy": ("xy", ["a", "b", "c"], {"foo": "bar"})},
2943-
)
2944-
if eagerly_compute_group:
2945-
with raise_if_dask_computes(max_computes=1):
2946-
with pytest.warns(DeprecationWarning):
2947-
gb = b.groupby(**kwargs) # type: ignore[arg-type]
2948-
assert_identical(gb.count(), expected)
2949-
else:
2950-
with raise_if_dask_computes(max_computes=0):
2951-
gb = b.groupby(**kwargs) # type: ignore[arg-type]
2952-
assert is_chunked_array(gb.encoded.codes.data)
2953-
assert not gb.encoded.group_indices
2954-
if has_flox:
2955-
with raise_if_dask_computes(max_computes=1):
2956-
assert_identical(gb.count(), expected)
2957-
else:
2958-
with pytest.raises(ValueError, match="when lazily grouping"):
2959-
gb.count()
2933+
expected = xr.DataArray(
2934+
[[[1, 1, 1], [np.nan, 1, 2]]] * 4,
2935+
dims=("z", "x", "xy"),
2936+
coords={"xy": ("xy", ["a", "b", "c"], {"foo": "bar"})},
2937+
)
2938+
with raise_if_dask_computes(max_computes=0):
2939+
gb = b.groupby(x=UniqueGrouper(), xy=UniqueGrouper(labels=["a", "b", "c"]))
2940+
assert is_chunked_array(gb.encoded.codes.data)
2941+
assert not gb.encoded.group_indices
2942+
if has_flox:
2943+
with raise_if_dask_computes(max_computes=1):
2944+
assert_identical(gb.count(), expected)
2945+
else:
2946+
with pytest.raises(ValueError, match="when lazily grouping"):
2947+
gb.count()
29602948

29612949

29622950
@pytest.mark.parametrize("use_flox", [True, False])
@@ -3117,9 +3105,7 @@ def test_lazy_grouping(grouper, expect_index):
31173105

31183106
if has_flox:
31193107
lazy = (
3120-
xr.Dataset({"foo": data}, coords={"zoo": data})
3121-
.groupby(zoo=grouper, eagerly_compute_group=False)
3122-
.count()
3108+
xr.Dataset({"foo": data}, coords={"zoo": data}).groupby(zoo=grouper).count()
31233109
)
31243110
assert_identical(eager, lazy)
31253111

@@ -3135,9 +3121,7 @@ def test_lazy_grouping_errors() -> None:
31353121
coords={"y": ("x", dask.array.arange(20, chunks=3))},
31363122
)
31373123

3138-
gb = data.groupby(
3139-
y=UniqueGrouper(labels=np.arange(5, 10)), eagerly_compute_group=False
3140-
)
3124+
gb = data.groupby(y=UniqueGrouper(labels=np.arange(5, 10)))
31413125
message = "not supported when lazily grouping by"
31423126
with pytest.raises(ValueError, match=message):
31433127
gb.map(lambda x: x)
@@ -3280,32 +3264,27 @@ def test_groupby_dask_eager_load_warnings() -> None:
32803264
coords={"x": ("z", np.arange(12)), "y": ("z", np.arange(12))},
32813265
).chunk(z=6)
32823266

3283-
with pytest.warns(DeprecationWarning):
3284-
ds.groupby(x=UniqueGrouper())
3285-
3286-
with pytest.warns(DeprecationWarning):
3287-
ds.groupby("x")
3288-
3289-
with pytest.warns(DeprecationWarning):
3290-
ds.groupby(ds.x)
3291-
32923267
with pytest.raises(ValueError, match="Please pass"):
3293-
ds.groupby("x", eagerly_compute_group=False)
3268+
with pytest.warns(DeprecationWarning):
3269+
ds.groupby("x", eagerly_compute_group=False)
3270+
with pytest.raises(ValueError, match="Eagerly computing"):
3271+
ds.groupby("x", eagerly_compute_group=True) # type: ignore[arg-type]
32943272

32953273
# This is technically fine but anyone iterating over the groupby object
32963274
# will see an error, so let's warn and have them opt-in.
3297-
with pytest.warns(DeprecationWarning):
3298-
ds.groupby(x=UniqueGrouper(labels=[1, 2, 3]))
3299-
3300-
ds.groupby(x=UniqueGrouper(labels=[1, 2, 3]), eagerly_compute_group=False)
3275+
ds.groupby(x=UniqueGrouper(labels=[1, 2, 3]))
33013276

33023277
with pytest.warns(DeprecationWarning):
3303-
ds.groupby_bins("x", bins=3)
3278+
ds.groupby(x=UniqueGrouper(labels=[1, 2, 3]), eagerly_compute_group=False)
3279+
33043280
with pytest.raises(ValueError, match="Please pass"):
3305-
ds.groupby_bins("x", bins=3, eagerly_compute_group=False)
3281+
with pytest.warns(DeprecationWarning):
3282+
ds.groupby_bins("x", bins=3, eagerly_compute_group=False)
3283+
with pytest.raises(ValueError, match="Eagerly computing"):
3284+
ds.groupby_bins("x", bins=3, eagerly_compute_group=True) # type: ignore[arg-type]
3285+
ds.groupby_bins("x", bins=[1, 2, 3])
33063286
with pytest.warns(DeprecationWarning):
3307-
ds.groupby_bins("x", bins=[1, 2, 3])
3308-
ds.groupby_bins("x", bins=[1, 2, 3], eagerly_compute_group=False)
3287+
ds.groupby_bins("x", bins=[1, 2, 3], eagerly_compute_group=False)
33093288

33103289

33113290
# TODO: Possible property tests to add to this module

0 commit comments

Comments
 (0)