Add simple test for most_common. Fix typing issues

BSchilperoort · BSchilperoort · commit 05223b3a563e · 2023-10-02T09:17:32.000+02:00
diff --git a/src/xarray_regrid/most_common.py b/src/xarray_regrid/most_common.py
@@ -1,11 +1,11 @@
 """Implementation of the "most common value" regridding method."""
 
 from itertools import product
-from typing import overload
+from typing import Any, overload
 
 import flox.xarray
 import numpy as np
-import numpy_groupies as npg
+import numpy_groupies as npg  # type: ignore
 import pandas as pd
 import xarray as xr
 from flox import Aggregation
@@ -78,7 +78,7 @@ def most_common_wrapper(
 
 
 def split_combine_most_common(
-    data: xr.Dataset, target_ds: xr.Dataset, time_dim: str, max_mem: int = 1e9
+    data: xr.Dataset, target_ds: xr.Dataset, time_dim: str, max_mem: int = int(1e9)
 ) -> xr.Dataset:
     """Use a split-combine strategy to reduce the memory use of the most_common regrid.
 
@@ -173,7 +173,7 @@ def most_common(data: xr.Dataset, target_ds: xr.Dataset, time_dim: str) -> xr.Da
 
     most_common = Aggregation(
         name="most_common",
-        numpy=_custom_grouped_reduction,
+        numpy=_custom_grouped_reduction,  # type: ignore
         chunk=None,
         combine=None,
     )
@@ -208,14 +208,7 @@ def _most_common_label(neighbors: np.ndarray) -> np.ndarray:
     then the first label in the list will be picked.
     """
     unique_labels, counts = np.unique(neighbors, return_counts=True)
-    return unique_labels[np.argmax(counts)]
-
-
-def most_common_chunked(multi_values: np.ndarray, multi_counts: np.ndarray):
-    all_values, index = np.unique(multi_values, return_inverse=True)
-    all_counts = np.zeros(all_values.size, np.int64)
-    np.add.at(all_counts, index, multi_counts.ravel())  # inplace
-    return all_values[all_counts.argmax()]
+    return unique_labels[np.argmax(counts)]  # type: ignore
 
 
 def _custom_grouped_reduction(
@@ -224,8 +217,8 @@ def _custom_grouped_reduction(
     *,
     axis: int = -1,
     size: int | None = None,
-    fill_value=None,
-    dtype=None,
+    fill_value: Any = None,
+    dtype: Any = None,
 ) -> np.ndarray:
     """Custom grouped reduction for flox.Aggregation to get most common label.
 
@@ -242,7 +235,7 @@ def _custom_grouped_reduction(
     Returns:
         np.ndarray with array.shape[-1] == size, containing a single value per group
     """
-    return npg.aggregate_numpy.aggregate(
+    agg: np.ndarray = npg.aggregate_numpy.aggregate(
         group_idx,
         array,
         func=_most_common_label,
@@ -251,3 +244,4 @@ def _custom_grouped_reduction(
         fill_value=fill_value,
         dtype=dtype,
     )
+    return agg
diff --git a/src/xarray_regrid/regrid.py b/src/xarray_regrid/regrid.py
@@ -95,7 +95,7 @@ def most_common(
         self,
         ds_target_grid: xr.Dataset,
         time_dim: str = "time",
-        max_mem: int = 1e9
+        max_mem: int = int(1e9),
     ) -> xr.DataArray | xr.Dataset:
         """Regrid by taking the most common value within the new grid cells.
 
diff --git a/src/xarray_regrid/utils.py b/src/xarray_regrid/utils.py
@@ -1,3 +1,4 @@
+from collections.abc import Hashable
 from dataclasses import dataclass
 
 import numpy as np
@@ -156,7 +157,7 @@ def common_coords(
     data1: xr.DataArray | xr.Dataset,
     data2: xr.DataArray | xr.Dataset,
     remove_coord: str | None = None,
-) -> set[str]:
+) -> set[Hashable]:
     """Return a set of coords which two dataset/arrays have in common."""
     coords = set(data1.coords).intersection(set(data2.coords))
     if remove_coord in coords:
diff --git a/tests/test_most_common.py b/tests/test_most_common.py
@@ -0,0 +1,56 @@
+import numpy as np
+import pytest
+import xarray as xr
+
+from xarray_regrid import Grid, create_regridding_dataset
+
+
+@pytest.fixture
+def dummy_lc_data():
+    np.random.seed(0)
+    data = np.random.randint(0, 3, size=(11, 11))
+    lat_coords = np.linspace(0, 40, num=11)
+    lon_coords = np.linspace(0, 40, num=11)
+
+    return xr.Dataset(
+        data_vars={
+            "lc": (["latitude", "longitude"], data),
+        },
+        coords={
+            "longitude": (["longitude"], lon_coords),
+            "latitude": (["latitude"], lat_coords),
+        },
+    )
+
+
+@pytest.fixture
+def dummy_target_grid():
+    new_grid = Grid(
+        north=40,
+        east=40,
+        south=0,
+        west=0,
+        resolution_lat=8,
+        resolution_lon=8,
+    )
+    return create_regridding_dataset(new_grid)
+
+
+def test_most_common(dummy_lc_data, dummy_target_grid):
+    expected = np.array(
+        [
+            [0, 0, 1, 0, 0, 0],
+            [0, 1, 0, 1, 1, 0],
+            [0, 1, 2, 0, 0, 2],
+            [1, 0, 1, 2, 0, 0],
+            [0, 0, 0, 1, 0, 1],
+            [1, 2, 2, 0, 2, 2],
+        ]
+    )
+
+    np.testing.assert_array_equal(
+        dummy_lc_data.regrid.most_common(
+            dummy_target_grid,
+        )["lc"].values,
+        expected,
+    )