Fix most_common test, improve docstring.

BSchilperoort · BSchilperoort · commit 4107a3abffd7 · 2023-10-02T09:17:32.000+02:00
diff --git a/src/xarray_regrid/most_common.py b/src/xarray_regrid/most_common.py
@@ -153,6 +153,7 @@ def most_common(data: xr.Dataset, target_ds: xr.Dataset, time_dim: str) -> xr.Da
     Returns:
         xarray.dataset with regridded land cover categorical data.
     """
+    dim_order = data.dims
     coords = utils.common_coords(data, target_ds, remove_coord=time_dim)
     bounds = tuple(
         _construct_intervals(target_ds[coord].to_numpy()) for coord in coords
@@ -189,7 +190,7 @@ def most_common(data: xr.Dataset, target_ds: xr.Dataset, time_dim: str) -> xr.Da
     for coord in coords:
         ds_regrid[coord] = target_ds[coord]
 
-    return ds_regrid
+    return ds_regrid.transpose(*dim_order)
 
 
 def _construct_intervals(coord: np.ndarray) -> pd.IntervalIndex:
diff --git a/src/xarray_regrid/regrid.py b/src/xarray_regrid/regrid.py
@@ -99,7 +99,12 @@ def most_common(
     ) -> xr.DataArray | xr.Dataset:
         """Regrid by taking the most common value within the new grid cells.
 
-        To be used for regridding data to a much coarser resolution.
+        To be used for regridding data to a much coarser resolution, not for regridding
+        when the source and target grids are of a similar resolution.
+
+        Note that in the case of two unqiue values with the same count, the behaviour
+        is not deterministic, and the resulting "most common" one will randomly be
+        either of the two.
 
         Args:
             ds_target_grid: Target grid dataset
diff --git a/src/xarray_regrid/utils.py b/src/xarray_regrid/utils.py
@@ -1,4 +1,3 @@
-from collections.abc import Hashable
 from dataclasses import dataclass
 
 import numpy as np
@@ -157,9 +156,9 @@ def common_coords(
     data1: xr.DataArray | xr.Dataset,
     data2: xr.DataArray | xr.Dataset,
     remove_coord: str | None = None,
-) -> set[Hashable]:
+) -> list[str]:
     """Return a set of coords which two dataset/arrays have in common."""
     coords = set(data1.coords).intersection(set(data2.coords))
     if remove_coord in coords:
         coords.remove(remove_coord)
-    return coords
+    return sorted([str(coord) for coord in coords])
diff --git a/tests/test_most_common.py b/tests/test_most_common.py
@@ -7,14 +7,27 @@
 
 @pytest.fixture
 def dummy_lc_data():
-    np.random.seed(0)
-    data = np.random.randint(0, 3, size=(11, 11))
+    data = np.array(
+        [
+            [2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0],
+            [2, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            [3, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1],
+            [3, 3, 3, 3, 0, 0, 0, 0, 1, 1, 1],
+            [3, 3, 0, 3, 0, 0, 0, 0, 1, 1, 1],
+        ]
+    )
     lat_coords = np.linspace(0, 40, num=11)
     lon_coords = np.linspace(0, 40, num=11)
 
     return xr.Dataset(
         data_vars={
-            "lc": (["latitude", "longitude"], data),
+            "lc": (["longitude", "latitude"], data),
         },
         coords={
             "longitude": (["longitude"], lon_coords),
@@ -37,20 +50,30 @@ def dummy_target_grid():
 
 
 def test_most_common(dummy_lc_data, dummy_target_grid):
-    expected = np.array(
+    expected_data = np.array(
         [
-            [0, 0, 1, 0, 0, 0],
-            [0, 1, 0, 1, 1, 0],
-            [0, 1, 2, 0, 0, 2],
-            [1, 0, 1, 2, 0, 0],
-            [0, 0, 0, 1, 0, 1],
-            [1, 2, 2, 0, 2, 2],
+            [2, 2, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0],
+            [3, 3, 0, 0, 0, 1],
         ]
     )
 
-    np.testing.assert_array_equal(
-        dummy_lc_data.regrid.most_common(
-            dummy_target_grid,
-        )["lc"].values,
-        expected,
+    lat_coords = np.linspace(0, 40, num=6)
+    lon_coords = np.linspace(0, 40, num=6)
+
+    expected = xr.Dataset(
+        data_vars={
+            "lc": (["longitude", "latitude"], expected_data),
+        },
+        coords={
+            "longitude": (["longitude"], lon_coords),
+            "latitude": (["latitude"], lat_coords),
+        },
+    )
+    xr.testing.assert_equal(
+        dummy_lc_data.regrid.most_common(dummy_target_grid)["lc"],
+        expected["lc"],
     )