diff --git a/CHANGES.md b/CHANGES.md index e1aa88486..34b979098 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,8 @@ ## Changes in 1.0.6 (in development) +* Reimplemented the logic to detect a dataset's grid mapping + in module `xcube.core.gridmapping.cfconv`. + * Updated [xcube Dataset Specification](docs/source/cubespec.md). * Bundled [xcube-viewer 1.1.0-dev.1](https://github.com/dcs4cop/xcube-viewer/releases/tag/v1.1.0-dev.1). * Fixed various issues with the auto-generated Python API documentation. diff --git a/test/core/gridmapping/test_cfconv.py b/test/core/gridmapping/test_cfconv.py index 703a60f8d..9a088f274 100644 --- a/test/core/gridmapping/test_cfconv.py +++ b/test/core/gridmapping/test_cfconv.py @@ -1,24 +1,15 @@ -import shutil import unittest -from typing import Set +from typing import Tuple -import fsspec import numpy as np import pyproj +import pytest import xarray as xr -from test.s3test import MOTO_SERVER_ENDPOINT_URL -from test.s3test import S3Test -from xcube.core.gridmapping import GridMapping -from xcube.core.gridmapping.cfconv import GridCoords -from xcube.core.gridmapping.cfconv import GridMappingProxy -from xcube.core.gridmapping.cfconv import add_spatial_ref -from xcube.core.gridmapping.cfconv import get_dataset_grid_mapping_proxies -from xcube.core.new import new_cube -from xcube.core.store.fs.registry import new_fs_data_store +from xcube.core.gridmapping import GridMapping, CRS_CRS84 +from xcube.core.gridmapping.cfconv import find_grid_mapping_for_data_var CRS_WGS84 = pyproj.crs.CRS(4326) -CRS_CRS84 = pyproj.crs.CRS.from_string("urn:ogc:def:crs:OGC:1.3:CRS84") CRS_UTM_33N = pyproj.crs.CRS(32633) CRS_ROTATED_POLE = pyproj.crs.CRS.from_cf( @@ -27,308 +18,404 @@ grid_north_pole_longitude=170.)) -class GetDatasetGridMappingsTest(unittest.TestCase): - def test_no_crs_lon_lat_common_names(self): - dataset = xr.Dataset(coords=dict(lon=xr.DataArray(np.linspace(10, 12, 11), dims='lon'), - lat=xr.DataArray(np.linspace(50, 52, 11), dims='lat'))) - grid_mappings = get_dataset_grid_mapping_proxies(dataset) - self.assertEqual(1, len(grid_mappings)) - self.assertIn(None, grid_mappings) - grid_mapping = grid_mappings.get(None) - self.assertIsInstance(grid_mapping, GridMappingProxy) - self.assertEqual(CRS_WGS84, grid_mapping.crs) - self.assertEqual('latitude_longitude', grid_mapping.name) - self.assertIsInstance(grid_mapping.coords, GridCoords) - self.assertIsInstance(grid_mapping.coords.x, xr.DataArray) - self.assertIsInstance(grid_mapping.coords.y, xr.DataArray) - self.assertEqual('lon', grid_mapping.coords.x.name) - self.assertEqual('lat', grid_mapping.coords.y.name) - - def test_no_crs_lon_lat_standard_names(self): - dataset = xr.Dataset(coords=dict(weird_x=xr.DataArray(np.linspace(10, 12, 11), dims='i', - attrs=dict(standard_name='longitude')), - weird_y=xr.DataArray(np.linspace(50, 52, 11), dims='j', - attrs=dict(standard_name='latitude')))) - grid_mappings = get_dataset_grid_mapping_proxies(dataset) - self.assertEqual(1, len(grid_mappings)) - self.assertIn(None, grid_mappings) - grid_mapping = grid_mappings.get(None) - self.assertIsInstance(grid_mapping, GridMappingProxy) - self.assertEqual(CRS_WGS84, grid_mapping.crs) - self.assertEqual('latitude_longitude', grid_mapping.name) - self.assertIsInstance(grid_mapping.coords, GridCoords) - self.assertIsInstance(grid_mapping.coords.x, xr.DataArray) - self.assertIsInstance(grid_mapping.coords.y, xr.DataArray) - self.assertEqual('weird_x', grid_mapping.coords.x.name) - self.assertEqual('weird_y', grid_mapping.coords.y.name) - - def test_crs_x_y_with_common_names(self): - dataset = xr.Dataset(dict(crs=xr.DataArray(0, attrs=CRS_UTM_33N.to_cf())), - coords=dict(x=xr.DataArray(np.linspace(1000, 12000, 11), dims='x'), - y=xr.DataArray(np.linspace(5000, 52000, 11), dims='y'))) - grid_mappings = get_dataset_grid_mapping_proxies(dataset) - self.assertEqual(1, len(grid_mappings)) - self.assertIn('crs', grid_mappings) - grid_mapping = grid_mappings.get('crs') - self.assertIsInstance(grid_mapping, GridMappingProxy) - self.assertEqual(CRS_UTM_33N, grid_mapping.crs) - self.assertEqual('transverse_mercator', grid_mapping.name) - self.assertIsInstance(grid_mapping.coords, GridCoords) - self.assertIsInstance(grid_mapping.coords.x, xr.DataArray) - self.assertIsInstance(grid_mapping.coords.y, xr.DataArray) - self.assertEqual('x', grid_mapping.coords.x.name) - self.assertEqual('y', grid_mapping.coords.y.name) - - def test_crs_x_y_with_standard_names(self): - dataset = xr.Dataset(dict(crs=xr.DataArray(0, attrs=CRS_UTM_33N.to_cf())), - coords=dict(myx=xr.DataArray(np.linspace(1000, 12000, 11), dims='x', - attrs=dict(standard_name='projection_x_coordinate')), - myy=xr.DataArray(np.linspace(5000, 52000, 11), dims='y', - attrs=dict(standard_name='projection_y_coordinate')))) - grid_mappings = get_dataset_grid_mapping_proxies(dataset) - self.assertEqual(1, len(grid_mappings)) - self.assertIn('crs', grid_mappings) - grid_mapping = grid_mappings.get('crs') - self.assertIsInstance(grid_mapping, GridMappingProxy) - self.assertEqual(CRS_UTM_33N, grid_mapping.crs) - self.assertEqual('transverse_mercator', grid_mapping.name) - self.assertIsInstance(grid_mapping.coords, GridCoords) - self.assertIsInstance(grid_mapping.coords.x, xr.DataArray) - self.assertIsInstance(grid_mapping.coords.y, xr.DataArray) - self.assertEqual('myx', grid_mapping.coords.x.name) - self.assertEqual('myy', grid_mapping.coords.y.name) - - def test_latitude_longitude_with_x_y(self): - # This is what we get when opening a CRS-84 GeoTIFF using rioxarray - dataset = xr.Dataset( - dict( - band_1=xr.DataArray(np.zeros((11, 11)), dims=["y", "x"]), - spatial_ref=xr.DataArray(0, attrs=CRS_CRS84.to_cf()) - ), - coords=dict( - x=xr.DataArray(np.linspace(10, 20, 11), dims='x'), - y=xr.DataArray(np.linspace(50, 40, 11), dims='y'), - ) +class FindGridMappingForVarTest(unittest.TestCase): + w = 20 + h = 10 + + def assert_grid_mapping_tuple_tuple_ok( + self, + actual_gmt, + expected_crs: pyproj.CRS, + expected_gm_name: str, + expected_xy_names: Tuple[str, str], + expected_xy_dims: Tuple[str, str], + expected_xy_sizes: Tuple[int, int] + ): + self.assertIsInstance(actual_gmt, tuple) + self.assertEqual(3, len(actual_gmt)) + crs, gm_name, xy_coords = actual_gmt + self.assertEqual(expected_crs.to_cf(), crs.to_cf()) + self.assertEqual(expected_gm_name, gm_name) + self.assertIsInstance(xy_coords, tuple) + self.assertEqual(2, len(xy_coords)) + x, y = xy_coords + self.assertIsInstance(x, xr.DataArray) + self.assertIsInstance(y, xr.DataArray) + self.assertEqual(1, x.ndim) + self.assertEqual(1, y.ndim) + self.assertEqual(expected_xy_dims, (x.dims[0], y.dims[0])) + self.assertEqual(expected_xy_names, (x.name, y.name)) + self.assertEqual(expected_xy_sizes, (x.size, y.size)) + + def new_data_var(self, + shape=None, + dims=None, + attrs=None) -> xr.DataArray: + return xr.DataArray( + np.zeros(shape or (1, self.h, self.w), dtype=np.float32), + dims=dims or ("time", "y", "x"), + attrs=attrs ) - grid_mappings = get_dataset_grid_mapping_proxies(dataset) - self.assertEqual(1, len(grid_mappings)) - self.assertIn('spatial_ref', grid_mappings) - grid_mapping = grid_mappings.get('spatial_ref') - self.assertIsInstance(grid_mapping, GridMappingProxy) - self.assertEqual(CRS_CRS84, grid_mapping.crs) - self.assertEqual('latitude_longitude', grid_mapping.name) - self.assertIsInstance(grid_mapping.coords, GridCoords) - self.assertIsInstance(grid_mapping.coords.x, xr.DataArray) - self.assertIsInstance(grid_mapping.coords.y, xr.DataArray) - self.assertEqual('x', grid_mapping.coords.x.name) - self.assertEqual('y', grid_mapping.coords.y.name) - def test_rotated_pole_with_common_names(self): - dataset = xr.Dataset( - dict( - rotated_pole=xr.DataArray(0, attrs=CRS_ROTATED_POLE.to_cf()) - ), - coords=dict( - rlon=xr.DataArray(np.linspace(-180, 180, 11), dims='rlon'), - rlat=xr.DataArray(np.linspace(0, 90, 11), dims='rlat') - ) + def new_x_coord_var(self, + dim=None, + attrs=None) -> xr.DataArray: + return xr.DataArray( + np.linspace(10000, 10000 + 10 * self.w, self.w), + dims=dim or "x", + attrs=attrs ) - grid_mappings = get_dataset_grid_mapping_proxies(dataset) - self.assertEqual(1, len(grid_mappings)) - self.assertIn('rotated_pole', grid_mappings) - grid_mapping = grid_mappings.get('rotated_pole') - self.assertIsInstance(grid_mapping, GridMappingProxy) - self.assertIn('Geographic', grid_mapping.crs.type_name) - self.assertIsInstance(grid_mapping.coords, GridCoords) - self.assertIsInstance(grid_mapping.coords.x, xr.DataArray) - self.assertIsInstance(grid_mapping.coords.y, xr.DataArray) - self.assertEqual('rlon', grid_mapping.coords.x.name) - self.assertEqual('rlat', grid_mapping.coords.y.name) - def test_rotated_pole_with_standard_names(self): - dataset = xr.Dataset( - dict( - rotated_pole=xr.DataArray(0, - attrs=CRS_ROTATED_POLE.to_cf()) - ), - coords=dict( - u=xr.DataArray(np.linspace(-180, 180, 11), dims='u', - attrs=dict(standard_name='grid_longitude')), - v=xr.DataArray(np.linspace(0, 90, 11), dims='v', - attrs=dict(standard_name='grid_latitude')) - ) + def new_y_coord_var(self, + dim=None, + attrs=None) -> xr.DataArray: + return xr.DataArray( + np.linspace(10000, 10000 + 10 * self.h, self.h), + dims=dim or "y", + attrs=attrs ) - grid_mappings = get_dataset_grid_mapping_proxies(dataset) - self.assertEqual(1, len(grid_mappings)) - self.assertIn('rotated_pole', grid_mappings) - grid_mapping = grid_mappings.get('rotated_pole') - self.assertIsInstance(grid_mapping, GridMappingProxy) - self.assertIn('Geographic', grid_mapping.crs.type_name) - self.assertIsInstance(grid_mapping.coords, GridCoords) - self.assertIsInstance(grid_mapping.coords.x, xr.DataArray) - self.assertIsInstance(grid_mapping.coords.y, xr.DataArray) - self.assertEqual('u', grid_mapping.coords.x.name) - self.assertEqual('v', grid_mapping.coords.y.name) - - -class XarrayDecodeCfTest(unittest.TestCase): - """Find out how xarray treats 1D and 2D coordinate variables when decode_cf=True or =False""" - - def test_cf_1d_coords(self): - self._write_coords(*self._gen_1d()) - self.assertVarNames({'noise', 'crs'}, {'lon', 'lat'}, decode_cf=True) - self.assertVarNames({'noise', 'crs'}, {'lon', 'lat'}, decode_cf=False) - - def test_cf_1d_data_vars(self): - self._write_data_vars(*self._gen_1d()) - self.assertVarNames({'noise', 'crs'}, {'lon', 'lat'}, decode_cf=True) - self.assertVarNames({'noise', 'crs'}, {'lon', 'lat'}, decode_cf=False) - - def test_cf_2d_coords(self): - self._write_coords(*self._gen_2d()) - self.assertVarNames({'noise', 'crs'}, {'lon', 'lat'}, decode_cf=True) - self.assertVarNames({'noise', 'crs', 'lon', 'lat'}, set(), decode_cf=False) - - def test_cf_2d_data_vars(self): - self._write_data_vars(*self._gen_2d()) - self.assertVarNames({'noise', 'crs', 'lon', 'lat'}, set(), decode_cf=True) - self.assertVarNames({'noise', 'crs', 'lon', 'lat'}, set(), decode_cf=False) - - def assertVarNames(self, exp_data_vars: Set[str], exp_coords: Set[str], decode_cf: bool): - ds1 = xr.open_zarr('noise.zarr', decode_cf=decode_cf) - self.assertEqual(exp_coords, set(ds1.coords)) - self.assertEqual(exp_data_vars, set(ds1.data_vars)) - - @classmethod - def _write_coords(cls, noise, crs, lon, lat): - dataset = xr.Dataset(dict(noise=noise, crs=crs), coords=dict(lon=lon, lat=lat)) - dataset.to_zarr('noise.zarr', mode='w') - @classmethod - def _write_data_vars(cls, noise, crs, lon, lat): - dataset = xr.Dataset(dict(noise=noise, crs=crs, lon=lon, lat=lat)) - dataset.to_zarr('noise.zarr', mode='w') - - @classmethod - def tearDownClass(cls) -> None: - shutil.rmtree('noise.zarr') - - def _gen_1d(self): - noise = xr.DataArray(np.random.random((11, 11)), dims=('lat', 'lon')) - crs = xr.DataArray(0, attrs=CRS_CRS84.to_cf()) - lon = xr.DataArray(np.linspace(10, 12, 11), dims='lon') - lat = xr.DataArray(np.linspace(50, 52, 11), dims='lat') - noise.attrs['grid_mapping'] = 'crs' - lon.attrs['standard_name'] = 'longitude' - lat.attrs['standard_name'] = 'latitude' - self.assertEqual(('lon',), lon.dims) - self.assertEqual(('lat',), lat.dims) - return noise, crs, lon, lat - - def _gen_2d(self): - noise = xr.DataArray(np.random.random((11, 11)), dims=('y', 'x')) - crs = xr.DataArray(0, attrs=CRS_CRS84.to_cf()) - lon = xr.DataArray(np.linspace(10, 12, 11), dims='x') - lat = xr.DataArray(np.linspace(50, 52, 11), dims='y') - lat, lon = xr.broadcast(lat, lon) - noise.attrs['grid_mapping'] = 'crs' - lon.attrs['standard_name'] = 'longitude' - lat.attrs['standard_name'] = 'latitude' - self.assertEqual(('y', 'x'), lon.dims) - self.assertEqual(('y', 'x'), lat.dims) - return noise, crs, lon, lat - - -class AddSpatialRefTest(S3Test, unittest.TestCase): - - def test_add_spatial_ref(self): - self.assert_add_spatial_ref_ok(None, None) - self.assert_add_spatial_ref_ok(None, ('cx', 'cy')) - self.assert_add_spatial_ref_ok('crs', None) - self.assert_add_spatial_ref_ok('crs', ('cx', 'cy')) - - def assert_add_spatial_ref_ok(self, crs_var_name, xy_dim_names): - - root = 'eurodatacube-test/xcube-eea' - data_id = 'test.zarr' - crs = pyproj.CRS.from_string("EPSG:3035") - - if xy_dim_names: - x_name, y_name = xy_dim_names - else: - x_name, y_name = 'x', 'y' - - cube = new_cube(x_name=x_name, - y_name=y_name, - x_start=0, - y_start=0, - x_res=10, - y_res=10, - x_units='metres', - y_units='metres', - drop_bounds=True, - width=100, - height=100, - variables=dict(A=1.3, B=8.3)) - - storage_options = dict( - anon=False, - client_kwargs=dict( - endpoint_url=MOTO_SERVER_ENDPOINT_URL, - ) + def new_crs_var(self, crs) -> xr.DataArray: + return xr.DataArray(0, attrs=crs.to_cf()) + + def test_with_gm_var_and_named_coords(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + attrs={"grid_mapping": "crs: a b"} + ), + "crs": self.new_crs_var(CRS_UTM_33N) + }, + coords={ + "a": self.new_x_coord_var(), + "b": self.new_y_coord_var(), + "x": self.new_x_coord_var(), + "y": self.new_y_coord_var(), + } + ) + gmt = find_grid_mapping_for_data_var(ds, "sst") + self.assert_grid_mapping_tuple_tuple_ok( + gmt, + CRS_UTM_33N, + "transverse_mercator", + ("a", "b"), + ("x", "y"), + (20, 10) ) - fs: fsspec.AbstractFileSystem = fsspec.filesystem('s3', - **storage_options) - if fs.isdir(root): - fs.rm(root, recursive=True) - fs.mkdirs(root, exist_ok=True) - - data_store = new_fs_data_store('s3', - root=root, - storage_options=storage_options) + def test_with_gm_var_fails_with_invalid_coords(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + attrs={"grid_mapping": "crs: a b"} + ), + "crs": self.new_crs_var(CRS_UTM_33N) + }, + coords={ + "x": self.new_x_coord_var(), + "y": self.new_y_coord_var(), + } + ) + with pytest.raises(ValueError, + match="invalid coordinates in" + " grid mapping value 'crs: a b'"): + find_grid_mapping_for_data_var(ds, "sst") + + def test_with_gm_var_fails_with_invalid_crs(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + attrs={"grid_mapping": "crs"} + ), + "crs": xr.DataArray(0, attrs={"bibo": 12}) + }, + coords={ + "x": self.new_x_coord_var(), + "y": self.new_y_coord_var(), + } + ) + with pytest.raises(ValueError, + match="variable 'crs' is not" + " a valid grid mapping"): + find_grid_mapping_for_data_var(ds, "sst") + + def test_with_gm_var_fails_with_invalid_gm_var(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + attrs={"grid_mapping": "crs"} + ), + "spatial_ref": self.new_crs_var(CRS_UTM_33N) + }, + coords={ + "x": self.new_x_coord_var(), + "y": self.new_y_coord_var(), + } + ) + with pytest.raises(ValueError, + match="grid mapping variable 'crs'" + " not found in dataset"): + find_grid_mapping_for_data_var(ds, "sst") + + def test_with_gm_var_and_standard_name_lat_lon(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + attrs={"grid_mapping": "crs"} + ), + "crs": self.new_crs_var(CRS_WGS84) + }, + coords={ + "a": self.new_x_coord_var( + attrs={"standard_name": "longitude"} + ), + "b": self.new_y_coord_var( + attrs={"standard_name": "latitude"} + ), + "lon": self.new_x_coord_var(), + "lat": self.new_y_coord_var(), + } + ) + gmt = find_grid_mapping_for_data_var(ds, "sst") + self.assert_grid_mapping_tuple_tuple_ok( + gmt, + CRS_WGS84, + "latitude_longitude", + ("a", "b"), + ("x", "y"), + (20, 10) + ) - data_store.write_data(cube, data_id=data_id) - cube = data_store.open_data(data_id) - self.assertEqual({'A', 'B', 'time', x_name, y_name}, - set(cube.variables)) + def test_with_gm_var_and_standard_name_rot_lat_lon(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + attrs={"grid_mapping": "crs"} + ), + "crs": self.new_crs_var(CRS_ROTATED_POLE) + }, + coords={ + "a": self.new_x_coord_var( + attrs={"standard_name": "grid_longitude"} + ), + "b": self.new_y_coord_var( + attrs={"standard_name": "grid_latitude"} + ), + "lon": self.new_x_coord_var(), + "lat": self.new_y_coord_var(), + } + ) + gmt = find_grid_mapping_for_data_var(ds, "sst") + self.assert_grid_mapping_tuple_tuple_ok( + gmt, + CRS_ROTATED_POLE, + "rotated_latitude_longitude", + ("a", "b"), + ("x", "y"), + (20, 10) + ) - with self.assertRaises(ValueError) as cm: - GridMapping.from_dataset(cube) - self.assertEqual( - ('cannot find any grid mapping in dataset',), - cm.exception.args + def test_with_gm_var_and_standard_name_projected(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + attrs={"grid_mapping": "crs"} + ), + "crs": self.new_crs_var(CRS_UTM_33N) + }, + coords={ + "a": self.new_x_coord_var( + attrs={"standard_name": "projection_x_coordinate"} + ), + "b": self.new_y_coord_var( + attrs={"standard_name": "projection_y_coordinate"} + ), + "x": self.new_x_coord_var(), + "y": self.new_y_coord_var(), + } + ) + gmt = find_grid_mapping_for_data_var(ds, "sst") + self.assert_grid_mapping_tuple_tuple_ok( + gmt, + CRS_UTM_33N, + "transverse_mercator", + ("a", "b"), + ("x", "y"), + (20, 10) ) - path = f"{root}/{data_id}" - group_store = fs.get_mapper(path, create=True) + def test_with_gm_var_and_axis(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + attrs={"grid_mapping": "crs"} + ), + "crs": self.new_crs_var(CRS_UTM_33N) + }, + coords={ + "a": self.new_x_coord_var(attrs={"axis": "X"}), + "b": self.new_y_coord_var(attrs={"axis": "Y"}), + "x": self.new_x_coord_var(), + "y": self.new_y_coord_var(), + } + ) + gmt = find_grid_mapping_for_data_var(ds, "sst") + self.assert_grid_mapping_tuple_tuple_ok( + gmt, + CRS_UTM_33N, + "transverse_mercator", + ("a", "b"), + ("x", "y"), + (20, 10) + ) - expected_crs_var_name = crs_var_name or 'spatial_ref' + def test_with_gm_var_and_common_dim_and_var_name(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + attrs={"grid_mapping": "crs"} + ), + "crs": self.new_crs_var(CRS_UTM_33N) + }, + coords={ + "a": self.new_x_coord_var(), + "b": self.new_y_coord_var(), + "x": self.new_x_coord_var(), + "y": self.new_y_coord_var(), + } + ) + gmt = find_grid_mapping_for_data_var(ds, "sst") + self.assert_grid_mapping_tuple_tuple_ok( + gmt, + CRS_UTM_33N, + "transverse_mercator", + ("x", "y"), + ("x", "y"), + (20, 10) + ) - self.assertTrue(fs.exists(path)) - self.assertFalse(fs.exists(f"{path}/{expected_crs_var_name}")) - self.assertFalse(fs.exists(f"{path}/{expected_crs_var_name}/.zarray")) - self.assertFalse(fs.exists(f"{path}/{expected_crs_var_name}/.zattrs")) + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + dims=("time", "lat", "lon"), + attrs={"grid_mapping": "crs"} + ), + "crs": self.new_crs_var(CRS_WGS84) + }, + coords={ + "a": self.new_x_coord_var(dim="lon"), + "b": self.new_y_coord_var(dim="lat"), + "lon": self.new_x_coord_var(dim="lon"), + "lat": self.new_y_coord_var(dim="lat"), + } + ) + gmt = find_grid_mapping_for_data_var(ds, "sst") + self.assert_grid_mapping_tuple_tuple_ok( + gmt, + CRS_WGS84, + "latitude_longitude", + ("lon", "lat"), + ("lon", "lat"), + (20, 10) + ) - kwargs = {} - if crs_var_name is not None: - kwargs.update(crs_var_name=crs_var_name) - if xy_dim_names is not None: - kwargs.update(xy_dim_names=xy_dim_names) - add_spatial_ref(group_store, crs, **kwargs) + def test_with_gm_var_and_common_dim_name(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + attrs={"grid_mapping": "crs"} + ), + "crs": self.new_crs_var(CRS_UTM_33N) + }, + coords={ + "a": self.new_x_coord_var(), + "b": self.new_y_coord_var(), + } + ) + gmt = find_grid_mapping_for_data_var(ds, "sst") + self.assert_grid_mapping_tuple_tuple_ok( + gmt, + CRS_UTM_33N, + "transverse_mercator", + ("a", "b"), + ("x", "y"), + (20, 10) + ) - self.assertTrue(fs.exists(f"{path}/{expected_crs_var_name}")) - self.assertTrue(fs.exists(f"{path}/{expected_crs_var_name}/.zarray")) - self.assertTrue(fs.exists(f"{path}/{expected_crs_var_name}/.zattrs")) + def test_with_gm_var_but_without_spatial_var(self): + ds = xr.Dataset( + data_vars={ + "crs": self.new_crs_var(CRS_UTM_33N), + }, + coords={ + "x": self.new_x_coord_var(), + "y": self.new_y_coord_var(), + } + ) + gmt = find_grid_mapping_for_data_var(ds, "crs") + self.assertIsNone(gmt) + + def test_without_gm_var_and_common_dim_and_var_name(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var(), + }, + coords={ + "a": self.new_x_coord_var(), + "b": self.new_y_coord_var(), + "x": self.new_x_coord_var(), + "y": self.new_y_coord_var(), + } + ) + gmt = find_grid_mapping_for_data_var(ds, "sst") + self.assert_grid_mapping_tuple_tuple_ok( + gmt, + CRS_WGS84, + "latitude_longitude", + ("x", "y"), + ("x", "y"), + (20, 10) + ) - cube = data_store.open_data(data_id) - self.assertEqual({'A', 'B', 'time', - x_name, y_name, expected_crs_var_name}, - set(cube.variables)) - self.assertEqual(expected_crs_var_name, - cube.A.attrs.get('grid_mapping')) - self.assertEqual(expected_crs_var_name, - cube.B.attrs.get('grid_mapping')) + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var( + dims=("time", "lat", "lon"), + ), + }, + coords={ + "a": self.new_x_coord_var(dim="lon"), + "b": self.new_y_coord_var(dim="lat"), + "lon": self.new_x_coord_var(dim="lon"), + "lat": self.new_y_coord_var(dim="lat"), + } + ) + gmt = find_grid_mapping_for_data_var(ds, "sst") + self.assert_grid_mapping_tuple_tuple_ok( + gmt, + CRS_WGS84, + "latitude_longitude", + ("lon", "lat"), + ("lon", "lat"), + (20, 10) + ) - gm = GridMapping.from_dataset(cube) - self.assertIn("LAEA Europe", gm.crs.srs) + def test_without_gm_var_and_common_dim_name(self): + ds = xr.Dataset( + data_vars={ + "sst": self.new_data_var() + }, + coords={ + "a": self.new_x_coord_var(), + "b": self.new_y_coord_var(), + } + ) + gmt = find_grid_mapping_for_data_var(ds, "sst") + self.assert_grid_mapping_tuple_tuple_ok( + gmt, + CRS_WGS84, + "latitude_longitude", + ("a", "b"), + ("x", "y"), + (20, 10) + ) diff --git a/test/core/gridmapping/test_dataset.py b/test/core/gridmapping/test_dataset.py index 17ce3a7b0..40383c9a7 100644 --- a/test/core/gridmapping/test_dataset.py +++ b/test/core/gridmapping/test_dataset.py @@ -3,6 +3,7 @@ import numpy as np import pyproj +import pytest import xarray as xr import xcube.core.new @@ -40,12 +41,10 @@ def test_from_regular_cube_with_crs(self): gm1 = GridMapping.from_dataset(dataset) self.assertEqual(pyproj.CRS.from_string('epsg:25832'), gm1.crs) dataset = dataset.drop_vars('crs') - gm2 = GridMapping.from_dataset(dataset) - self.assertEqual(GEO_CRS, gm2.crs) - gm3 = GridMapping.from_dataset(dataset, crs=gm1.crs) - self.assertEqual(gm1.crs, gm3.crs) - self.assertEqual(('x', 'y'), gm3.xy_var_names) - self.assertEqual(('x', 'y'), gm3.xy_dim_names) + with pytest.raises(ValueError, + match="grid mapping variable 'crs'" + " not found in dataset"): + GridMapping.from_dataset(dataset) def test_from_regular_cube_no_chunks_and_chunks(self): dataset = xcube.core.new.new_cube(variables=dict(rad=0.5)) diff --git a/xcube/core/gridmapping/cfconv.py b/xcube/core/gridmapping/cfconv.py index 5f212a307..a470a1929 100644 --- a/xcube/core/gridmapping/cfconv.py +++ b/xcube/core/gridmapping/cfconv.py @@ -1,370 +1,324 @@ # The MIT License (MIT) -# Copyright (c) 2021 by the xcube development team and contributors +# Copyright (c) 2023 by the xcube development team and contributors # -# Permission is hereby granted, free of charge, to any person obtaining a copy of -# this software and associated documentation files (the "Software"), to deal in -# the Software without restriction, including without limitation the rights to -# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -# of the Software, and to permit persons to whom the Software is furnished to do -# so, subject to the following conditions: +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: # -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. -import warnings -from collections.abc import MutableMapping -from typing import Optional, Dict, Any, Hashable, Union, Set, List, Tuple +from typing import Optional, Sequence, Tuple, Dict, Hashable, List -import numpy as np import pyproj +import pyproj.exceptions import xarray as xr -import zarr -import zarr.convenience -from xcube.core.schema import get_dataset_chunks from xcube.util.assertions import assert_instance +from xcube.util.assertions import assert_true from .base import CRS_WGS84 -class GridCoords: - """ - Grid coordinates comprising x and y of - type xarray.DataArray. - """ +DimPair = Tuple[Hashable, Hashable] +CoordsPair = Tuple[xr.DataArray, xr.DataArray] +GridMappingTuple = Tuple[pyproj.CRS, str, CoordsPair] - def __init__(self): - self.x: Optional[xr.DataArray] = None - self.y: Optional[xr.DataArray] = None +def find_grid_mappings_for_data_vars(dataset: xr.Dataset) \ + -> Dict[Hashable, GridMappingTuple]: + grid_mappings = find_grid_mappings_for_dataset(dataset) + vars_to_grid_mappings: Dict[Hashable, GridMappingTuple] = {} + for var_name, var in dataset.data_vars.items(): + if var.ndim < 2: + continue + for gmt in grid_mappings: + _, _, xy_coords = gmt + x_dim, y_dim = _get_xy_dims_from_xy_coords(xy_coords) + if x_dim in var.dims and y_dim in var.dims: + vars_to_grid_mappings[var_name] = gmt + break + return vars_to_grid_mappings -class GridMappingProxy: - """ - Grid mapping comprising *crs* of type pyproj.crs.CRS, - grid coordinates, an optional name, coordinates, and a - tile size (= spatial chunk sizes). - """ - def __init__(self, - crs: Optional[pyproj.crs.CRS] = None, - name: Optional[str] = None, - coords: Optional[GridCoords] = None, - tile_size: Optional[Tuple[int, int]] = None): - self.crs = crs - self.name = name - self.coords = coords - self.tile_size = tile_size +def find_grid_mappings_for_dataset(dataset: xr.Dataset) \ + -> List[GridMappingTuple]: + dims_to_grid_mappings: Dict[DimPair, GridMappingTuple] = {} + for var_name, var in dataset.data_vars.items(): + found = False + for x_dim, y_dim in dims_to_grid_mappings.keys(): + if x_dim in var.dims and y_dim in var.dims: + found = True + break + if not found: + gmt = find_grid_mapping_for_data_var(dataset, var_name) + if gmt is not None: + _, _, xy_coords = gmt + xy_dims = _get_xy_dims_from_xy_coords(xy_coords) + dims_to_grid_mappings[xy_dims] = gmt + + grid_mapping_tuples = list(dims_to_grid_mappings.values()) + + lat_lon_2d_gmt = _find_lat_lon_2d_gmt( + dataset, + tuple(dims_to_grid_mappings.keys()) or (("x", "y"),) + ) + if lat_lon_2d_gmt is not None: + grid_mapping_tuples.append(lat_lon_2d_gmt) + + return grid_mapping_tuples + + +def _find_lat_lon_2d_gmt(dataset: xr.Dataset, + xy_dims: Tuple[DimPair]) \ + -> Optional[GridMappingTuple]: + for x_dim, y_dim in xy_dims: + yx_dims = y_dim, x_dim + for lon_name, lat_name in (('lon', 'lat'), + ('longitude', 'latitude')): + lon_coords = dataset.coords.get(lon_name) + lat_coords = dataset.coords.get(lat_name) + if lon_coords is None or lat_coords is None: + lon_coords = dataset.data_vars.get(lon_name) + lat_coords = dataset.data_vars.get(lat_name) + if lon_coords is not None \ + and lat_coords is not None \ + and lon_coords.dims == yx_dims \ + and lat_coords.dims == yx_dims: + return (CRS_WGS84, + "latitude_longitude", + (lon_coords, lat_coords)) + return None -def get_dataset_grid_mapping_proxies( +def _get_xy_dims_from_xy_coords(xy_coords: CoordsPair) -> DimPair: + x_coords, y_coords = xy_coords + if x_coords.ndim == 1: + # 1-D coordinates + assert y_coords.ndim == 1 + return x_coords.dims[0], y_coords.dims[0] + else: + # 2-D coordinates + assert x_coords.ndim == 2 + assert x_coords.dims == y_coords.dims + return x_coords.dims[1], x_coords.dims[0] + + +def find_grid_mapping_for_data_var( dataset: xr.Dataset, - *, - missing_latitude_longitude_crs: pyproj.crs.CRS = None, - missing_rotated_latitude_longitude_crs: pyproj.crs.CRS = None, - missing_projected_crs: pyproj.crs.CRS = None, - emit_warnings: bool = False -) -> Dict[Union[Hashable, None], GridMappingProxy]: - """ - Find grid mappings encoded as described in the CF conventions - [Horizontal Coordinate Reference Systems, Grid Mappings, and Projections] - (http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#grid-mappings-and-projections). - - :param dataset: - :param missing_latitude_longitude_crs: - :param missing_rotated_latitude_longitude_crs: - :param missing_projected_crs: - :param emit_warnings: - :return: - """ - grid_mapping_proxies: Dict[Union[Hashable, None], - GridMappingProxy] = dict() - - # Find any grid mapping variables by CF 'grid_mapping' attribute - # - for var_name, var in dataset.variables.items(): - grid_mapping_var_name = var.attrs.get('grid_mapping') - if grid_mapping_var_name \ - and grid_mapping_var_name not in grid_mapping_proxies \ - and grid_mapping_var_name in dataset: - grid_mapping_var = dataset[grid_mapping_var_name] - gmp = _parse_crs_from_attrs(grid_mapping_var.attrs) - grid_mapping_proxies[grid_mapping_var_name] = gmp - - # If no grid mapping variables found, - # try if CRS is encoded in some variable's attributes - # - if not grid_mapping_proxies: - for var_name, var in dataset.variables.items(): - gmp = _parse_crs_from_attrs(var.attrs) - if gmp is not None: - grid_mapping_proxies[var_name] = gmp - break + var_name: Hashable +) -> Optional[GridMappingTuple]: + assert_instance(dataset, xr.Dataset, "dataset") + assert_instance(var_name, Hashable, "var_name") + assert_true(var_name in dataset, + message=f"variable {var_name!r} not found in dataset") + + var = dataset[var_name] + if var.ndim < 2: + return None - # If no grid mapping variables found, - # try if CRS is encoded in dataset attributes - # - if not grid_mapping_proxies: - gmp = _parse_crs_from_attrs(dataset.attrs) - if gmp is not None: - grid_mapping_proxies[None] = gmp - - # Find coordinate variables. - # - - latitude_longitude_coords = GridCoords() - rotated_latitude_longitude_coords = GridCoords() - projected_coords = GridCoords() - - potential_coord_vars = _find_potential_coord_vars(dataset) - - # Find coordinate variables that use a CF standard_name. - # - coords_standard_names = ( - (latitude_longitude_coords, - 'longitude', 'latitude'), - (rotated_latitude_longitude_coords, - 'grid_longitude', 'grid_latitude'), - (projected_coords, - 'projection_x_coordinate', 'projection_y_coordinate') - ) - for var_name in potential_coord_vars: - var = dataset[var_name] - if var.ndim not in (1, 2): - continue - standard_name = var.attrs.get('standard_name') - for coords, x_name, y_name in coords_standard_names: - if coords.x is None and standard_name == x_name: - coords.x = var - if coords.y is None and standard_name == y_name: - coords.y = var - - # Find coordinate variables by common naming convention. - # - coords_var_names = ( - (latitude_longitude_coords, - ('lon', 'longitude'), - ('lat', 'latitude')), - (rotated_latitude_longitude_coords, - ('rlon', 'rlongitude'), - ('rlat', 'rlatitude')), - (projected_coords, - ('x', 'xc', 'transformed_x'), - ('y', 'yc', 'transformed_y')) + gm_value = var.attrs.get("grid_mapping") + if isinstance(gm_value, str) and gm_value: + crs, gm_name, xy_coords = _find_grid_mapping_for_var_with_gm_value( + dataset, var, gm_value + ) + force_xy_coords = True + else: + crs, gm_name, xy_coords = CRS_WGS84, "latitude_longitude", None + force_xy_coords = False + + if xy_coords is None: + xy_coords = _find_coordinates_for_crs_and_gm_name( + dataset, var, gm_name, force_xy_coords + ) + if xy_coords is None: + return None + + return crs, gm_name, xy_coords + + +def _find_grid_mapping_for_var_with_gm_value( + dataset: xr.Dataset, + var: xr.DataArray, + gm_value: str +) -> Tuple[pyproj.CRS, + str, + Optional[CoordsPair]]: + xy_coords = None + + if ":" in gm_value: + # gm_value has format ": " + gm_var_name, coord_var_names = gm_value.split(":", maxsplit=1) + coord_var_names = coord_var_names.split() + if len(coord_var_names) == 2: + x_name, y_name = coord_var_names + # Check CF, whether the following is correct: + # if gm_name in ("latitude_longitude", + # "rotated_latitude_longitude"): + # x_name, y_name = y_name, x_name + x_coords = dataset.get(x_name) + y_coords = dataset.get(y_name) + if ((_is_valid_1d_coord_var(var, x_coords) + and _is_valid_1d_coord_var(var, y_coords)) + or (_is_valid_2d_coord_var(var, x_coords) + and _is_valid_2d_coord_var(var, y_coords))): + xy_coords = x_coords, y_coords + if xy_coords is None: + raise ValueError(f"invalid coordinates in" + f" grid mapping value {gm_value!r}") + else: + # gm_value has format "" + gm_var_name = gm_value + + crs, gm_name = _parse_crs(dataset, gm_var_name) + + return crs, gm_name, xy_coords + + +def _find_coordinates_for_crs_and_gm_name( + dataset: xr.Dataset, + var: xr.DataArray, + gm_name: str, + force: bool +) -> Optional[Tuple[xr.DataArray, xr.DataArray]]: + other_vars = [dataset[var_name] + for var_name in dataset.variables.keys() + if var_name != var.name] + + valid_1d_coord_vars = [ + other_var for other_var in other_vars + if _is_valid_1d_coord_var(var, other_var) + ] + + if gm_name == "latitude_longitude": + x_name, y_name = "longitude", "latitude" + elif gm_name == "rotated_latitude_longitude": + x_name, y_name = "grid_longitude", "grid_latitude" + else: + x_name, y_name = "projection_x_coordinate", "projection_y_coordinate" + + x_coords = _find_coord_var_by_standard_name(valid_1d_coord_vars, x_name) + y_coords = _find_coord_var_by_standard_name(valid_1d_coord_vars, y_name) + if x_coords is not None and y_coords is not None: + return x_coords, y_coords + + valid_2d_coord_vars = [ + other_var for other_var in other_vars + if _is_valid_2d_coord_var(var, other_var) + ] + x_coords = _find_coord_var_by_standard_name(valid_2d_coord_vars, x_name) + y_coords = _find_coord_var_by_standard_name(valid_2d_coord_vars, y_name) + if x_coords is not None and y_coords is not None \ + and x_coords.dims == y_coords.dims: + return x_coords, y_coords + + xy_coords = _find_1d_coord_var_by_common_names( + valid_1d_coord_vars, + (("lon", "lat"), + ("longitude", "latitude"), + ("x", "y"), + ("xc", "yc")), ) - for var_name in potential_coord_vars: - var = dataset[var_name] - if var.ndim not in (1, 2): - continue - for coords, x_names, y_names in coords_var_names: - if coords.x is None and var_name in x_names: - coords.x = var - if coords.y is None and var_name in y_names: - coords.y = var - - # Assign found coordinates to grid mappings - # - for gmp in grid_mapping_proxies.values(): - if gmp.name == 'latitude_longitude': - gmp.coords = latitude_longitude_coords - elif gmp.name == 'rotated_latitude_longitude': - gmp.coords = rotated_latitude_longitude_coords - else: - gmp.coords = projected_coords - - _complement_grid_mapping_coords(latitude_longitude_coords, - 'latitude_longitude', - missing_latitude_longitude_crs - or CRS_WGS84, - grid_mapping_proxies) - _complement_grid_mapping_coords(rotated_latitude_longitude_coords, - 'rotated_latitude_longitude', - missing_rotated_latitude_longitude_crs, - grid_mapping_proxies) - _complement_grid_mapping_coords(projected_coords, - None, - missing_projected_crs, - grid_mapping_proxies) - - # Collect complete grid mappings - complete_grid_mappings = dict() - for var_name, gmp in grid_mapping_proxies.items(): - if gmp.coords is not None \ - and gmp.coords.x is not None \ - and gmp.coords.y is not None \ - and gmp.coords.x.size >= 2 \ - and gmp.coords.y.size >= 2 \ - and gmp.coords.x.ndim == gmp.coords.y.ndim: - if gmp.coords.x.ndim == 1: - gmp.tile_size = _find_dataset_tile_size( - dataset, - gmp.coords.x.dims[0], - gmp.coords.y.dims[0] - ) - complete_grid_mappings[var_name] = gmp - elif gmp.coords.x.ndim == 2 \ - and gmp.coords.x.dims == gmp.coords.y.dims: - gmp.tile_size = _find_dataset_tile_size( - dataset, - gmp.coords.x.dims[1], - gmp.coords.x.dims[0] - ) - complete_grid_mappings[var_name] = gmp - elif emit_warnings: - warnings.warn(f'CRS "{gmp.name}": ' - f'missing x- and/or y-coordinates ' - f'(grid mapping variable "{var_name}": ' - f'grid_mapping_name="{gmp.name}")') - - return complete_grid_mappings - - -def _parse_crs_from_attrs(attrs: Dict[Hashable, Any]) \ - -> Optional[GridMappingProxy]: - # noinspection PyBroadException - try: - crs = pyproj.crs.CRS.from_cf(attrs) - except pyproj.crs.CRSError: - return None - return GridMappingProxy(crs=crs, - name=attrs.get('grid_mapping_name')) - - -def _complement_grid_mapping_coords( - coords: GridCoords, - grid_mapping_name: Optional[str], - missing_crs: Optional[pyproj.crs.CRS], - grid_mappings: Dict[Optional[str], GridMappingProxy] -): - if coords.x is not None or coords.y is not None: - grid_mapping = next((grid_mapping - for grid_mapping in grid_mappings.values() - if grid_mapping_name is None - or grid_mapping_name == grid_mapping.name), - None) - if grid_mapping is None and missing_crs is not None: - grid_mapping = GridMappingProxy(crs=missing_crs, - name=grid_mapping_name) - grid_mappings[None] = grid_mapping - - if grid_mapping is not None: - if grid_mapping.coords is None: - grid_mapping.coords = coords - # Edge case from GeoTIFF with CRS-84 with 1D - # coordinates named "x" and "y" as read by rioxarray - if grid_mapping.coords.x is None: - grid_mapping.coords.x = coords.x - if grid_mapping.coords.y is None: - grid_mapping.coords.y = coords.y - - -def _find_potential_coord_vars(dataset: xr.Dataset) -> List[Hashable]: - """ - Find potential coordinate variables. - - We need this function as we can not rely on xarray.Dataset.coords, - because 2D coordinate arrays are most likely not indicated as such - in many datasets. - """ - - # Collect bounds variables. We must exclude them. - bounds_vars = set() - for k in dataset.variables: - var = dataset[k] - - # Bounds variable as recommended through CF conventions - bounds_k = var.attrs.get('bounds') - if bounds_k is not None and bounds_k in dataset: - bounds_vars.add(bounds_k) - - # Bounds variable by naming convention, - # e.g. "lon_bnds" or "lat_bounds" - k_splits = str(k).rsplit("_", maxsplit=1) - if len(k_splits) == 2: - k_base, k_suffix = k_splits - if k_suffix in ('bnds', 'bounds') and k_base in dataset: - bounds_vars.add(k) - - potential_coord_vars = [] - - # First consider any CF global attribute "coordinates" - coordinates = dataset.attrs.get('coordinates') - if coordinates is not None: - for var_name in coordinates.split(): - if _is_potential_coord_var(dataset, bounds_vars, var_name): - potential_coord_vars.append(var_name) - - # Then consider any other 1D/2D variables - for var_name in dataset.variables: - if var_name not in potential_coord_vars \ - and _is_potential_coord_var(dataset, bounds_vars, var_name): - potential_coord_vars.append(var_name) - - return potential_coord_vars - - -def _is_potential_coord_var(dataset: xr.Dataset, - bounds_var_names: Set[str], - var_name: Hashable) -> bool: - if var_name in dataset: - var = dataset[var_name] - return var.ndim in (1, 2) and var_name not in bounds_var_names - return False - - -def _find_dataset_tile_size(dataset: xr.Dataset, - x_dim_name: Hashable, - y_dim_name: Hashable) \ - -> Optional[Tuple[int, int]]: - """Find the most likely tile size in *dataset*""" - dataset_chunks = get_dataset_chunks(dataset) - tile_width = dataset_chunks.get(x_dim_name) - tile_height = dataset_chunks.get(y_dim_name) - if tile_width is not None and tile_height is not None: - return tile_width, tile_height + if xy_coords is not None: + return xy_coords + + # Check: also try _find_2d_coord_var_by_common_names()? + + if force: + raise ValueError(f"cannot determine grid mapping" + f" coordinates for variable {var.name!r}" + f" with dimensions {var.dims!r}") + return None + +def _find_1d_coord_var_by_common_names( + coords: Sequence[xr.DataArray], + common_xy_names: Sequence[Tuple[str, str]], +) -> Optional[Tuple[xr.DataArray, xr.DataArray]]: + + # Priority 1: find var by "axis" attribute + x_coords = None + y_coords = None + for var in coords: + if var.attrs.get("axis") == "X": + x_coords = var + if var.attrs.get("axis") == "Y": + y_coords = var + if x_coords is not None and y_coords is not None: + return x_coords, y_coords + + # Priority 2: find var where dim name == common name == var name + x_coords = None + y_coords = None + for var in coords: + for x_name, y_name in common_xy_names: + if var.dims[0] == x_name and var.name == x_name: + x_coords = var + if var.dims[0] == y_name and var.name == y_name: + y_coords = var + if x_coords is not None and y_coords is not None: + return x_coords, y_coords + + # Priority 3: find var where dim name == common name + x_coords = None + y_coords = None + for var in coords: + for x_name, y_name in common_xy_names: + if var.dims[0] == x_name: + x_coords = var + if var.dims[0] == y_name: + y_coords = var + if x_coords is not None and y_coords is not None: + return x_coords, y_coords + + return None + + +def _find_coord_var_by_standard_name( + coords: Sequence[xr.DataArray], + standard_name: str +) -> Optional[xr.DataArray]: + for var in coords: + if var.attrs.get("standard_name") == standard_name: + return var return None -def add_spatial_ref(dataset_store: zarr.convenience.StoreLike, - crs: pyproj.CRS, - crs_var_name: Optional[str] = 'spatial_ref', - xy_dim_names: Optional[Tuple[str, str]] = None): - """ - Helper function that allows adding a spatial reference to an - existing Zarr dataset. - - :param dataset_store: The dataset's existing Zarr store or path. - :param crs: The spatial coordinate reference system. - :param crs_var_name: The name of the variable that will hold the - spatial reference. Defaults to "spatial_ref". - :param xy_dim_names: The names of the x and y dimensions. - Defaults to ("x", "y"). - """ - assert_instance(dataset_store, (MutableMapping, str), name='group_store') - assert_instance(crs_var_name, str, name='crs_var_name') - x_dim_name, y_dim_name = xy_dim_names or ('x', 'y') - - spatial_attrs = crs.to_cf() - spatial_attrs['_ARRAY_DIMENSIONS'] = [] # Required by xarray - group = zarr.open(dataset_store, mode='r+') - spatial_ref = group.array(crs_var_name, - 0, - shape=(), - dtype=np.uint8, - fill_value=0) - spatial_ref.attrs.update(**spatial_attrs) - - for item_name, item in group.items(): - if item_name != crs_var_name: - dims = item.attrs.get('_ARRAY_DIMENSIONS') - if dims and len(dims) >= 2 \ - and dims[-2] == y_dim_name \ - and dims[-1] == x_dim_name: - item.attrs['grid_mapping'] = crs_var_name - - zarr.convenience.consolidate_metadata(dataset_store) +def _parse_crs(dataset: xr.Dataset, + gm_var_name: str) -> Tuple[pyproj.CRS, Optional[str]]: + if gm_var_name not in dataset: + raise ValueError(f"grid mapping variable {gm_var_name!r}" + f" not found in dataset") + gm_var = dataset[gm_var_name] + try: + return (pyproj.CRS.from_cf(gm_var.attrs), + gm_var.attrs.get("grid_mapping_name")) + except pyproj.exceptions.CRSError as e: + raise ValueError(f"variable {gm_var_name!r}" + f" is not a valid grid mapping") from e + + +def _is_valid_1d_coord_var(data_var: xr.DataArray, + coord_var: Optional[xr.DataArray]) -> bool: + return (coord_var is not None + and coord_var.ndim == 1 + and coord_var.dims[0] in data_var.dims) + + +def _is_valid_2d_coord_var(data_var: xr.DataArray, + coord_var: Optional[xr.DataArray]) -> bool: + return (coord_var is not None + and coord_var.ndim == 2 + and coord_var.dims[0] in data_var.dims + and coord_var.dims[1] in data_var.dims) + + + diff --git a/xcube/core/gridmapping/dataset.py b/xcube/core/gridmapping/dataset.py index 2567b1bf7..c54be83bb 100644 --- a/xcube/core/gridmapping/dataset.py +++ b/xcube/core/gridmapping/dataset.py @@ -20,14 +20,13 @@ # SOFTWARE. from typing import Optional, Union, Tuple -import warnings import pyproj import xarray as xr from .base import DEFAULT_TOLERANCE from .base import GridMapping -from .cfconv import get_dataset_grid_mapping_proxies +from .cfconv import find_grid_mappings_for_dataset from .coords import new_grid_mapping_from_coords from .helpers import _normalize_crs @@ -54,21 +53,14 @@ def new_grid_mapping_from_dataset( else: prefer_crs = crs - grid_mapping_proxies = get_dataset_grid_mapping_proxies( - dataset, - emit_warnings=emit_warnings, - missing_projected_crs=crs, - missing_rotated_latitude_longitude_crs=crs, - missing_latitude_longitude_crs=crs, - ).values() - + grid_mapping_tuples = find_grid_mappings_for_dataset(dataset) grid_mappings = [ - new_grid_mapping_from_coords(x_coords=gmp.coords.x, - y_coords=gmp.coords.y, - crs=gmp.crs, - tile_size=tile_size or gmp.tile_size, + new_grid_mapping_from_coords(x_coords=x_coords, + y_coords=y_coords, + crs=crs, + tile_size=tile_size, tolerance=tolerance) - for gmp in grid_mapping_proxies + for crs, _, (x_coords, y_coords) in grid_mapping_tuples ] if len(grid_mappings) > 1: diff --git a/xcube/core/gridmapping/regular.py b/xcube/core/gridmapping/regular.py index 68869a10e..2fcfa5d36 100644 --- a/xcube/core/gridmapping/regular.py +++ b/xcube/core/gridmapping/regular.py @@ -1,23 +1,23 @@ # The MIT License (MIT) -# Copyright (c) 2021 by the xcube development team and contributors +# Copyright (c) 2023 by the xcube development team and contributors # -# Permission is hereby granted, free of charge, to any person obtaining a copy of -# this software and associated documentation files (the "Software"), to deal in -# the Software without restriction, including without limitation the rights to -# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -# of the Software, and to permit persons to whom the Software is furnished to do -# so, subject to the following conditions: +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: # -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. from typing import Tuple, Union