Skip to content

Commit 475cb06

Browse files
committed
Add FlagGrouper
Closes #472
1 parent 6d81913 commit 475cb06

File tree

5 files changed

+83
-2
lines changed

5 files changed

+83
-2
lines changed

cf_xarray/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,6 @@
99
from .options import set_options # noqa
1010
from .utils import _get_version
1111

12-
from . import geometry # noqa
12+
from . import geometry, groupers # noqa
1313

1414
__version__ = _get_version()

cf_xarray/groupers.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import numpy as np
2+
import pandas as pd
3+
from xarray.groupers import EncodedGroups, Grouper
4+
5+
6+
class FlagGrouper(Grouper):
7+
def factorize(self, group) -> EncodedGroups:
8+
assert "flag_values" in group.attrs
9+
assert "flag_meanings" in group.attrs
10+
11+
values = np.array(group.attrs["flag_values"])
12+
full_index = pd.Index(group.attrs["flag_meanings"].split(" "))
13+
14+
if group.dtype.kind in "iu" and (np.diff(values) == 1).all():
15+
# optimize
16+
codes = group.data - group.data[0].astype(int)
17+
else:
18+
codes, _ = pd.factorize(group.data.ravel())
19+
20+
codes_da = group.copy(data=codes.reshape(group.shape))
21+
codes_da.attrs.pop("flag_values")
22+
codes_da.attrs.pop("flag_meanings")
23+
24+
return EncodedGroups(codes=codes_da, full_index=full_index)
25+
26+
def reset(self):
27+
pass

cf_xarray/tests/test_groupers.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import numpy as np
2+
from xarray.testing import assert_identical
3+
4+
from cf_xarray.datasets import flag_excl
5+
from cf_xarray.groupers import FlagGrouper
6+
7+
8+
def test_flag_grouper():
9+
ds = flag_excl.to_dataset().set_coords("flag_var")
10+
ds["foo"] = ("time", np.arange(8))
11+
actual = ds.groupby(flag_var=FlagGrouper()).mean()
12+
expected = ds.groupby("flag_var").mean()
13+
expected["flag_var"] = ["flag_1", "flag_2", "flag_3"]
14+
expected["flag_var"].attrs["standard_name"] = "flag_mutual_exclusive"
15+
assert_identical(actual, expected)

doc/api.rst

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,20 @@ Geometries
2121
----------
2222
.. autosummary::
2323
:toctree: generated/
24-
2524
geometry.decode_geometries
25+
2626
geometry.encode_geometries
2727
geometry.shapely_to_cf
2828
geometry.cf_to_shapely
2929
geometry.GeometryNames
3030

31+
32+
Groupers
33+
--------
34+
.. autosummary::
35+
:toctree: generated/
36+
groupers.FlagGrouper
37+
3138
.. currentmodule:: xarray
3239

3340
DataArray

doc/flags.md

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,38 @@ You can also check whether a DataArray has the appropriate attributes to be reco
6060
da.cf.is_flag_variable
6161
```
6262

63+
## GroupBy
64+
65+
Flag variables, such as that above, are naturally used for GroupBy operations.
66+
cf-xarray provides a `FlagGrouper` that understands the `flag_meanings` and `flag_values` attributes.
67+
68+
Let's load an example dataset where the `flag_var` array has the needed attributes.
69+
70+
```{code-cell}
71+
import cf_xarray as cfxr
72+
import numpy as np
73+
74+
from cf_xarray.datasets import flag_excl
75+
76+
ds = flag_excl.to_dataset().set_coords('flag_var')
77+
ds["foo"] = ("time", np.arange(8))
78+
ds.flag_var
79+
```
80+
81+
Now use the :py:class:`~cf_xarray.groupers.FlagGrouper` to group by this flag variable:
82+
83+
```{code-cell}
84+
from cf_xarray.groupers import FlagGrouper
85+
86+
ds.groupby(flag_var=FlagGrouper()).mean()
87+
```
88+
89+
Note how the output coordinate has the values from `flag_meanings`!
90+
91+
```{seealso}
92+
See the Xarray docs on using [Grouper objects](https://docs.xarray.dev/en/stable/user-guide/groupby.html#grouper-objects).
93+
```
94+
6395
## Flag Masks
6496

6597
```{warning}

0 commit comments

Comments
 (0)