xcube-dev · konstntokas · Jun 19, 2025 · Jul 1, 2025
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -35,7 +35,7 @@ cubes from EO data.
    Dataset Convention <cubespec>
    Multi-Resolution Datasets <mldatasets>
    Data Store Conventions <storeconv>
-   Spatial Rectification <rectify>
+   Spatial Resampling <rectify>
    Developer Guide <devguide>
 
 

diff --git a/docs/source/rectify.md → docs/source/spatial_resampling.md b/docs/source/rectify.md → docs/source/spatial_resampling.md
@@ -1,15 +1,44 @@
-# Spatial Rectification Algorithm
+# Spatial Resampling Algorithms
 
-This chapter describes the algorithm used in the function [`rectify_dataset()`](api.html#xcube.core.resampling.rectify_dataset) 
-of module `xcube.core.resampling`. The function geometrically transforms 
+This chapter describes the algorithms used in the functions 
+[`resampling_in_space()`](api.html#xcube.core.resampling.resampling_in_space) 
+of module `xcube.core.resampling`. Depending on the grid-mapping of the input dataset 
+and the desired target grid-mapping, three different algorithms are performed: 
+
+1. affine transformation: if the CRS of the source and target is the same, only a 
+   resampling via an affine transfomration is applied using 
+   [`dask_image.ndinterp.affine_transform`](https://image.dask.org/en/latest/dask_image.ndinterp.html);
+   up-sampling (finer resolution) is governed by the spline-order, down-sampling
+   (coarser resolution) is governed by the aggregation methods. 
+2. reprojection: if the CRS of the source and target are different and both source dataset
+   and target gridmapping are regular, then reprojection can be applied. 
+3. rectification: if the source data set is given by a 2d spatial irregular gridmapping
+   and the target girdmapping shall be regular.
+
+## Spatial Reprojection Algorithm
+
+The `reproject_dataset` function reprojects a spatial dataset from its original 
+coordinate reference system (CRS) to a new CRS, assuming that both the source and 
+target grid mappings are regular grids. The performance is lazy and thus can be applied
+to large chunked datasets. 
+
+This method uses spatial interpolation techniques and can handle data with one 
+additional leading dimension, such as time or bands.
+
+
+
+
+## Spatial Rectification Algorithm
+
+The function geometrically transforms 
 spatial [data variables](https://docs.xarray.dev/en/stable/user-guide/terminology.html#term-Variable) 
 of a given [dataset](https://docs.xarray.dev/en/stable/user-guide/terminology.html#term-Dataset) 
 from an irregular source 
 [grid mapping](https://cfconventions.org/Data/cf-conventions/cf-conventions-1.11/cf-conventions.html#grid-mappings-and-projections) 
 into new data variables for a given regular target grid mapping and returns 
 them as a new dataset. 
 
-## Problem Description
+### Problem Description
 
 The following figure shows a Sentinel-3 OLCI Level-1b scene in its original 
 satellite perspective. In addition to the measured reflectances, the data 
@@ -42,7 +71,7 @@ is shown here:
 ![OLCI L1C Output](rectify/olci-output.png)
 
 
-## Algorithm Description
+### Algorithm Description
 
 The input to the rectification algorithm is satellite imagery in satellite 
 viewing perspective. In addition, two images – one for each spatial 
@@ -148,7 +177,7 @@ with
 *VA = V1 + u (V2 − V1)*  
 *VB = V3 + u (V4 − V3)*  
 
-## Remarks
+### Remarks
 
 **(1)** The target pixel size should be less or equal the source 
 pixel size, so that triangle patches in the source refer to multiple pixels 

diff --git a/xcube/core/resampling/_utils.py b/xcube/core/resampling/_utils.py
@@ -0,0 +1,45 @@
+# Copyright (c) 2018-2025 by xcube team and contributors
+# Permissions are hereby granted under the terms of the MIT License:
+# https://opensource.org/licenses/MIT.
+
+from typing import Callable, Literal, TypeAlias
+
+import numpy as np
+
+AggMethod: TypeAlias = Literal[
+    "center",
+    "count",
+    "first",
+    "last",
+    "max",
+    "mean",
+    "median",
+    "mode",
+    "min",
+    "prod",
+    "std",
+    "sum",
+    "var",
+]
+AggMethods: TypeAlias = AggMethod | dict[AggMethod, list[str | np.dtype]]
+
+AggFunction: TypeAlias = Callable[[np.ndarray, tuple[int, ...] | None], np.ndarray]
+
+AGG_METHODS: dict[AggMethod, AggFunction] = {
+    "center": xec.center,
+    "count": np.count_nonzero,
+    "first": xec.first,
+    "last": xec.last,
+    "prod": np.nanprod,
+    "max": np.nanmax,
+    "mean": xec.mean,
+    "median": xec.median,
+    "min": np.nanmin,
+    "mode": xec.mode,
+    "std": xec.std,
+    "sum": np.nansum,
+    "var": xec.var,
+}
+
+SplineOrder: TypeAlias = Literal[0, 1, 2, 3]
+SplineOrders: TypeAlias = SplineOrder | dict[SplineOrder, list[str | np.dtype]]
diff --git a/xcube/core/resampling/reproject.py b/xcube/core/resampling/reproject.py
@@ -180,9 +180,10 @@ def reproject_dataset(
         )
         slices_reprojected = []
         # calculate reprojection of each chunk along the 1st (non-spatial) dimension.
-        for idx, chunk_size in enumerate(data_array.chunks[0]):
-            dim0_start = idx * chunk_size
-            dim0_end = (idx + 1) * chunk_size
+        dim0_end = 0
+        for chunk_size in data_array.chunks[0]:
+            dim0_start = dim0_end
+            dim0_end = dim0_start + chunk_size
 
             data_reprojected = da.map_blocks(
                 _reproject_block,
@@ -323,7 +324,7 @@ def _downscale_source_dataset(
             tile_size=source_gm.tile_size,
         )
         source_ds = affine_transform_dataset(
-            source_ds, source_gm, downscale_target_gm, var_configs
+            source_ds, source_gm, downscale_target_gm, var_configs=var_configs
         )
         var_bounds = [
             source_ds[var_name].attrs["bounds"]

diff --git a/xcube/core/resampling/spatial.py b/xcube/core/resampling/spatial.py
@@ -31,109 +31,58 @@ def resample_in_space(
     source_gm: Optional[GridMapping] = None,
     target_gm: Optional[GridMapping] = None,
     ref_ds: Optional[xr.Dataset] = None,
-    var_configs: Optional[Mapping[Hashable, Mapping[str, Any]]] = None,
-    encode_cf: bool = True,
-    gm_name: Optional[str] = None,
-    rectify_kwargs: Optional[dict] = None,
+    spline_orders: Optional[int | Mapping[type | str, int]] = None,
+    agg_methods: Optional[str | Mapping[type | str, str]] = None,
+    recover_nan: Optional[bool | Mapping[type | str, bool]] = False,
 ):
     """
     Resample a dataset *source_ds* in the spatial dimensions.
 
-    If the source grid mapping *source_gm* is not given,
-    it is derived from *dataset*:
-    ``source_gm = GridMapping.from_dataset(source_ds)``.
-
-    If the target grid mapping *target_gm* is not given,
-    it is derived from *source_gm* as
-    ``target_gm = source_gm.to_regular()``,
-    or if target dataset *ref_ds* is given as
-    ``target_gm = GridMapping.from_dataset(ref_ds)``.
-
-    New in 1.6: If *ref_ds* is given, its coordinate
-    variables are copied by reference into the returned
-    dataset.
-
-    If *source_gm* is almost equal to *target_gm*, this
-    function is a no-op and *dataset* is returned unchanged.
-
-    Otherwise, the function computes a spatially
-    resampled version of *dataset* and returns it.
-
-    Using *var_configs*, the resampling of individual
-    variables can be configured. If given, *var_configs*
-    must be a mapping from variable names to configuration
-    dictionaries which can have the following properties:
-
-    * ``spline_order`` (int) - The order of spline polynomials
-        used for interpolating. It is used for up-sampling only.
-        Possible values are 0 to 5.
-        Default is 1 (bi-linear) for floating point variables,
-        and 0 (= nearest neighbor) for integer and bool variables.
-    * ``aggregator`` (str) - An optional aggregating
-        function. It is used for down-sampling only.
-        Examples are ``numpy.nanmean``, ``numpy.nanmin``,
-        ``numpy.nanmax``.
-        Default is ``numpy.nanmean`` for floating point variables,
-        and None (= nearest neighbor) for integer and bool variables.
-    * ``recover_nan`` (bool) - whether a special algorithm
-        shall be used that is able to recover values that would
-        otherwise yield NaN during resampling.
-        Default is False for all variable types since this
-        may require considerable CPU resources on top.
-
-    Note that *var_configs* is only used if the resampling involves
-    an affine transformation. This is true if the CRS of
-    *source_gm* and CRS of *target_gm* are equal and one of two
-    cases is given:
-
-    1. *source_gm* is regular.
-       In this case the resampling is the affine transformation.
-       and the result is returned directly.
-    2. *source_gm* is not regular and has a lower resolution
-       than *target_cm*.
-       In this case *dataset* is down-sampled first using an affine
-       transformation. Then the result is rectified.
-
-    In all other cases, no affine transformation is applied and
-    the resampling is a direct rectification.
-
     Args:
-        source_ds: The source dataset. Data variables must have 
-            dimensions in the following order: optional `time` followed
-            by the y-dimension (e.g., `y` or `lat`) followed by the 
+        source_ds: The source dataset. Data variables must have
+            dimensions in the following order: optional 3rd dimension followed
+            by the y-dimension (e.g., `y` or `lat`) followed by the
             x-dimension (e.g., `x` or `lon`).
         source_gm: The source grid mapping.
         target_gm: The target grid mapping. Must be regular.
         ref_ds: An optional dataset that provides the
             target grid mapping if *target_gm* is not provided.
             If *ref_ds* is given, its coordinate variables are copied
             by reference into the returned dataset.
-        var_configs: Optional resampling configurations
-            for individual variables.
-        encode_cf: Whether to encode the target grid mapping
-            into the resampled dataset in a CF-compliant way.
-            Defaults to ``True``.
-        gm_name: Name for the grid mapping variable.
-            Defaults to "crs". Used only if *encode_cf* is ``True``.
-        rectify_kwargs: Keyword arguments passed func:`rectify_dataset`
-            should a rectification be required.
-
-
-    Returns: The spatially resampled dataset, or None if the requested
-        output area does not intersect with *dataset*.
+        spline_orders: Spline orders to be used for upsampling
+            spatial data variables. It can be a single spline order
+            for all variables or a dictionary that maps a variable name or a data dtype
+            to the spline order. A spline order is given by one of `0`
+            (nearest neighbor), `1` (linear), `2` (bi-linear), or `3` (cubic).
+            The default is `3` fo floating point datasets and `0` for integer datasets.
+        agg_methods: Aggregation methods to be used for downsampling
+            spatial data variables. It can be a single aggregation method for all
+            variables or a dictionary that maps a variable name or a data dtype to the
+            aggregation method. The aggregation method is a function like `np.sum`,
+            `np.mean` which is propagated to [`dask.array.coarsen`](https://docs.dask.org/en/stable/generated/dask.array.coarsen.html).
+        recover_nan: If true, whether a special algorithm shall be used that is able
+            to recover values that would otherwise yield NaN during resampling. Default
+            is False for all variable types since this may require considerable CPU
+            resources on top. It can be a single aggregation method for all
+            variables or a dictionary that maps a variable name or a data dtype to a
+            boolean.
+
+    Returns:
+        The spatially resampled dataset, or None if the requested output area does
+        not intersect with *dataset*.
 
     Notes:
-        The method `xcube.core.resampling.reproject_dataset` is a high-performance
-        alternative to `xcube.core.resampling.resample_in_space` for reprojecting
-        datasets to different coordinate reference systems (CRS). It is ideal for
-        reprojection between regular grids. It improves computational efficiency
-        and simplifies the reprojection process.
-
-        The methods `reproject_dataset` and `resample_in_space` produce nearly identical
-        results when reprojecting to a different CRS, with only negligible differences.
-        `resample_in_space` remains available to preserve compatibility with existing
-        services. Once `reproject_dataset` proves stable in production use, it may be
-        integrated into `resample_in_space`.
+        - If the source grid mapping *source_gm* is not given, it is derived from *dataset*:
+          `source_gm = GridMapping.from_dataset(source_ds)`.
+        - If the target grid mapping *target_gm* is not given, it is derived from
+          *ref_ds* as `target_gm = GridMapping.from_dataset(ref_ds)`; if *ref_ds* is
+          not given, *target_gm* is derived from *source_gm* as
+          `target_gm = source_gm.to_regular()`.
+        - New in 1.6: If *ref_ds* is given, its coordinate variables are copied by
+          reference into the returned dataset.
+        - If *source_gm* is almost equal to *target_gm*, this function is a no-op
+          and *dataset* is returned unchanged.
+        - further information is given in the [xcube documentation](https://xcube.readthedocs.io/en/latest/rectify.html)
     """
     if source_gm is None:
         # No source grid mapping given, so do derive it from dataset.
@@ -149,9 +98,6 @@ def resample_in_space(
 
     if source_gm.is_close(target_gm):
         # If source and target grid mappings are almost equal.
-        # NOTE: Actually we should only return input here if
-        # encode_cf == False and gm_name is None and target_ds is None.
-        # Otherwise, create a copy and apply encoding and coords copy.
         return source_ds
 
     # target_gm must be regular