diff --git a/docs/source/index.rst b/docs/source/index.rst index 63fb952e4..9a6fcf31b 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -35,7 +35,7 @@ cubes from EO data. Dataset Convention Multi-Resolution Datasets Data Store Conventions - Spatial Rectification + Spatial Resampling Developer Guide diff --git a/docs/source/rectify.md b/docs/source/spatial_resampling.md similarity index 85% rename from docs/source/rectify.md rename to docs/source/spatial_resampling.md index 2d86a8a12..3704b4941 100644 --- a/docs/source/rectify.md +++ b/docs/source/spatial_resampling.md @@ -1,7 +1,36 @@ -# Spatial Rectification Algorithm +# Spatial Resampling Algorithms -This chapter describes the algorithm used in the function [`rectify_dataset()`](api.html#xcube.core.resampling.rectify_dataset) -of module `xcube.core.resampling`. The function geometrically transforms +This chapter describes the algorithms used in the functions +[`resampling_in_space()`](api.html#xcube.core.resampling.resampling_in_space) +of module `xcube.core.resampling`. Depending on the grid-mapping of the input dataset +and the desired target grid-mapping, three different algorithms are performed: + +1. affine transformation: if the CRS of the source and target is the same, only a + resampling via an affine transfomration is applied using + [`dask_image.ndinterp.affine_transform`](https://image.dask.org/en/latest/dask_image.ndinterp.html); + up-sampling (finer resolution) is governed by the spline-order, down-sampling + (coarser resolution) is governed by the aggregation methods. +2. reprojection: if the CRS of the source and target are different and both source dataset + and target gridmapping are regular, then reprojection can be applied. +3. rectification: if the source data set is given by a 2d spatial irregular gridmapping + and the target girdmapping shall be regular. + +## Spatial Reprojection Algorithm + +The `reproject_dataset` function reprojects a spatial dataset from its original +coordinate reference system (CRS) to a new CRS, assuming that both the source and +target grid mappings are regular grids. The performance is lazy and thus can be applied +to large chunked datasets. + +This method uses spatial interpolation techniques and can handle data with one +additional leading dimension, such as time or bands. + + + + +## Spatial Rectification Algorithm + +The function geometrically transforms spatial [data variables](https://docs.xarray.dev/en/stable/user-guide/terminology.html#term-Variable) of a given [dataset](https://docs.xarray.dev/en/stable/user-guide/terminology.html#term-Dataset) from an irregular source @@ -9,7 +38,7 @@ from an irregular source into new data variables for a given regular target grid mapping and returns them as a new dataset. -## Problem Description +### Problem Description The following figure shows a Sentinel-3 OLCI Level-1b scene in its original satellite perspective. In addition to the measured reflectances, the data @@ -42,7 +71,7 @@ is shown here: ![OLCI L1C Output](rectify/olci-output.png) -## Algorithm Description +### Algorithm Description The input to the rectification algorithm is satellite imagery in satellite viewing perspective. In addition, two images – one for each spatial @@ -148,7 +177,7 @@ with *VA = V1 + u (V2 − V1)* *VB = V3 + u (V4 − V3)* -## Remarks +### Remarks **(1)** The target pixel size should be less or equal the source pixel size, so that triangle patches in the source refer to multiple pixels diff --git a/xcube/core/resampling/_utils.py b/xcube/core/resampling/_utils.py new file mode 100644 index 000000000..26aa8d49d --- /dev/null +++ b/xcube/core/resampling/_utils.py @@ -0,0 +1,45 @@ +# Copyright (c) 2018-2025 by xcube team and contributors +# Permissions are hereby granted under the terms of the MIT License: +# https://opensource.org/licenses/MIT. + +from typing import Callable, Literal, TypeAlias + +import numpy as np + +AggMethod: TypeAlias = Literal[ + "center", + "count", + "first", + "last", + "max", + "mean", + "median", + "mode", + "min", + "prod", + "std", + "sum", + "var", +] +AggMethods: TypeAlias = AggMethod | dict[AggMethod, list[str | np.dtype]] + +AggFunction: TypeAlias = Callable[[np.ndarray, tuple[int, ...] | None], np.ndarray] + +AGG_METHODS: dict[AggMethod, AggFunction] = { + "center": xec.center, + "count": np.count_nonzero, + "first": xec.first, + "last": xec.last, + "prod": np.nanprod, + "max": np.nanmax, + "mean": xec.mean, + "median": xec.median, + "min": np.nanmin, + "mode": xec.mode, + "std": xec.std, + "sum": np.nansum, + "var": xec.var, +} + +SplineOrder: TypeAlias = Literal[0, 1, 2, 3] +SplineOrders: TypeAlias = SplineOrder | dict[SplineOrder, list[str | np.dtype]] diff --git a/xcube/core/resampling/reproject.py b/xcube/core/resampling/reproject.py index 42d91a2e9..d6ab360aa 100644 --- a/xcube/core/resampling/reproject.py +++ b/xcube/core/resampling/reproject.py @@ -180,9 +180,10 @@ def reproject_dataset( ) slices_reprojected = [] # calculate reprojection of each chunk along the 1st (non-spatial) dimension. - for idx, chunk_size in enumerate(data_array.chunks[0]): - dim0_start = idx * chunk_size - dim0_end = (idx + 1) * chunk_size + dim0_end = 0 + for chunk_size in data_array.chunks[0]: + dim0_start = dim0_end + dim0_end = dim0_start + chunk_size data_reprojected = da.map_blocks( _reproject_block, @@ -323,7 +324,7 @@ def _downscale_source_dataset( tile_size=source_gm.tile_size, ) source_ds = affine_transform_dataset( - source_ds, source_gm, downscale_target_gm, var_configs + source_ds, source_gm, downscale_target_gm, var_configs=var_configs ) var_bounds = [ source_ds[var_name].attrs["bounds"] diff --git a/xcube/core/resampling/spatial.py b/xcube/core/resampling/spatial.py index a7177f931..938899a23 100644 --- a/xcube/core/resampling/spatial.py +++ b/xcube/core/resampling/spatial.py @@ -31,76 +31,17 @@ def resample_in_space( source_gm: Optional[GridMapping] = None, target_gm: Optional[GridMapping] = None, ref_ds: Optional[xr.Dataset] = None, - var_configs: Optional[Mapping[Hashable, Mapping[str, Any]]] = None, - encode_cf: bool = True, - gm_name: Optional[str] = None, - rectify_kwargs: Optional[dict] = None, + spline_orders: Optional[int | Mapping[type | str, int]] = None, + agg_methods: Optional[str | Mapping[type | str, str]] = None, + recover_nan: Optional[bool | Mapping[type | str, bool]] = False, ): """ Resample a dataset *source_ds* in the spatial dimensions. - If the source grid mapping *source_gm* is not given, - it is derived from *dataset*: - ``source_gm = GridMapping.from_dataset(source_ds)``. - - If the target grid mapping *target_gm* is not given, - it is derived from *source_gm* as - ``target_gm = source_gm.to_regular()``, - or if target dataset *ref_ds* is given as - ``target_gm = GridMapping.from_dataset(ref_ds)``. - - New in 1.6: If *ref_ds* is given, its coordinate - variables are copied by reference into the returned - dataset. - - If *source_gm* is almost equal to *target_gm*, this - function is a no-op and *dataset* is returned unchanged. - - Otherwise, the function computes a spatially - resampled version of *dataset* and returns it. - - Using *var_configs*, the resampling of individual - variables can be configured. If given, *var_configs* - must be a mapping from variable names to configuration - dictionaries which can have the following properties: - - * ``spline_order`` (int) - The order of spline polynomials - used for interpolating. It is used for up-sampling only. - Possible values are 0 to 5. - Default is 1 (bi-linear) for floating point variables, - and 0 (= nearest neighbor) for integer and bool variables. - * ``aggregator`` (str) - An optional aggregating - function. It is used for down-sampling only. - Examples are ``numpy.nanmean``, ``numpy.nanmin``, - ``numpy.nanmax``. - Default is ``numpy.nanmean`` for floating point variables, - and None (= nearest neighbor) for integer and bool variables. - * ``recover_nan`` (bool) - whether a special algorithm - shall be used that is able to recover values that would - otherwise yield NaN during resampling. - Default is False for all variable types since this - may require considerable CPU resources on top. - - Note that *var_configs* is only used if the resampling involves - an affine transformation. This is true if the CRS of - *source_gm* and CRS of *target_gm* are equal and one of two - cases is given: - - 1. *source_gm* is regular. - In this case the resampling is the affine transformation. - and the result is returned directly. - 2. *source_gm* is not regular and has a lower resolution - than *target_cm*. - In this case *dataset* is down-sampled first using an affine - transformation. Then the result is rectified. - - In all other cases, no affine transformation is applied and - the resampling is a direct rectification. - Args: - source_ds: The source dataset. Data variables must have - dimensions in the following order: optional `time` followed - by the y-dimension (e.g., `y` or `lat`) followed by the + source_ds: The source dataset. Data variables must have + dimensions in the following order: optional 3rd dimension followed + by the y-dimension (e.g., `y` or `lat`) followed by the x-dimension (e.g., `x` or `lon`). source_gm: The source grid mapping. target_gm: The target grid mapping. Must be regular. @@ -108,32 +49,40 @@ def resample_in_space( target grid mapping if *target_gm* is not provided. If *ref_ds* is given, its coordinate variables are copied by reference into the returned dataset. - var_configs: Optional resampling configurations - for individual variables. - encode_cf: Whether to encode the target grid mapping - into the resampled dataset in a CF-compliant way. - Defaults to ``True``. - gm_name: Name for the grid mapping variable. - Defaults to "crs". Used only if *encode_cf* is ``True``. - rectify_kwargs: Keyword arguments passed func:`rectify_dataset` - should a rectification be required. - - - Returns: The spatially resampled dataset, or None if the requested - output area does not intersect with *dataset*. + spline_orders: Spline orders to be used for upsampling + spatial data variables. It can be a single spline order + for all variables or a dictionary that maps a variable name or a data dtype + to the spline order. A spline order is given by one of `0` + (nearest neighbor), `1` (linear), `2` (bi-linear), or `3` (cubic). + The default is `3` fo floating point datasets and `0` for integer datasets. + agg_methods: Aggregation methods to be used for downsampling + spatial data variables. It can be a single aggregation method for all + variables or a dictionary that maps a variable name or a data dtype to the + aggregation method. The aggregation method is a function like `np.sum`, + `np.mean` which is propagated to [`dask.array.coarsen`](https://docs.dask.org/en/stable/generated/dask.array.coarsen.html). + recover_nan: If true, whether a special algorithm shall be used that is able + to recover values that would otherwise yield NaN during resampling. Default + is False for all variable types since this may require considerable CPU + resources on top. It can be a single aggregation method for all + variables or a dictionary that maps a variable name or a data dtype to a + boolean. + + Returns: + The spatially resampled dataset, or None if the requested output area does + not intersect with *dataset*. Notes: - The method `xcube.core.resampling.reproject_dataset` is a high-performance - alternative to `xcube.core.resampling.resample_in_space` for reprojecting - datasets to different coordinate reference systems (CRS). It is ideal for - reprojection between regular grids. It improves computational efficiency - and simplifies the reprojection process. - - The methods `reproject_dataset` and `resample_in_space` produce nearly identical - results when reprojecting to a different CRS, with only negligible differences. - `resample_in_space` remains available to preserve compatibility with existing - services. Once `reproject_dataset` proves stable in production use, it may be - integrated into `resample_in_space`. + - If the source grid mapping *source_gm* is not given, it is derived from *dataset*: + `source_gm = GridMapping.from_dataset(source_ds)`. + - If the target grid mapping *target_gm* is not given, it is derived from + *ref_ds* as `target_gm = GridMapping.from_dataset(ref_ds)`; if *ref_ds* is + not given, *target_gm* is derived from *source_gm* as + `target_gm = source_gm.to_regular()`. + - New in 1.6: If *ref_ds* is given, its coordinate variables are copied by + reference into the returned dataset. + - If *source_gm* is almost equal to *target_gm*, this function is a no-op + and *dataset* is returned unchanged. + - further information is given in the [xcube documentation](https://xcube.readthedocs.io/en/latest/rectify.html) """ if source_gm is None: # No source grid mapping given, so do derive it from dataset. @@ -149,9 +98,6 @@ def resample_in_space( if source_gm.is_close(target_gm): # If source and target grid mappings are almost equal. - # NOTE: Actually we should only return input here if - # encode_cf == False and gm_name is None and target_ds is None. - # Otherwise, create a copy and apply encoding and coords copy. return source_ds # target_gm must be regular