Skip to content

Commit 6682b44

Browse files
sjordan29webb-ben
andauthored
CRS handling in xarray provider properties (geopython#1641)
* update crs handling * fix epsg code * config parsing, lean on pyproj * consolidate code and leverage prior crs work * update crs handling * fix epsg code * config parsing, lean on pyproj * consolidate code and leverage prior crs work * fix function call * bug and flake8 fixes * documentation updates * flake8 * Update ogcapi-coverages.rst * update crs handling * fix epsg code * config parsing, lean on pyproj * consolidate code and leverage prior crs work * update crs handling * fix epsg code * config parsing, lean on pyproj * consolidate code and leverage prior crs work * fix function call * bug and flake8 fixes * documentation updates * flake8 * Update ogcapi-coverages.rst * flake8 fix * rebase issues * update import formatting Co-authored-by: Benjamin Webb <40066515+webb-ben@users.noreply.github.com> * update conditional logic Co-authored-by: Benjamin Webb <40066515+webb-ben@users.noreply.github.com> * update error handling Co-authored-by: Benjamin Webb <40066515+webb-ben@users.noreply.github.com> * parse storage crs in init --------- Co-authored-by: Benjamin Webb <40066515+webb-ben@users.noreply.github.com>
1 parent deb043f commit 6682b44

File tree

3 files changed

+104
-10
lines changed

3 files changed

+104
-10
lines changed

docs/source/data-publishing/ogcapi-coverages.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,9 @@ The `Xarray`_ provider plugin reads and extracts `NetCDF`_ and `Zarr`_ data.
7575
x_field: lon
7676
y_field: lat
7777
time_field: time
78+
# optionally specify the coordinate reference system of your dataset
79+
# else pygeoapi assumes it is WGS84 (EPSG:4326).
80+
storage_crs: 4326
7881
format:
7982
name: netcdf
8083
mimetype: application/x-netcdf
@@ -96,6 +99,11 @@ The `Xarray`_ provider plugin reads and extracts `NetCDF`_ and `Zarr`_ data.
9699
be sure to provide the full S3 URL. Any parameters required to open the dataset
97100
using fsspec can be added to the config file under `options` and `s3`.
98101

102+
.. note::
103+
When providing a `storage_crs` value in the xarray configuration, specify the
104+
coordinate reference system using any valid input for
105+
`pyproj.CRS.from_user_input`_.
106+
99107
Data access examples
100108
--------------------
101109

@@ -146,3 +154,4 @@ Data access examples
146154
.. _`NetCDF`: https://en.wikipedia.org/wiki/NetCDF
147155
.. _`Zarr`: https://zarr.readthedocs.io/en/stable
148156
.. _`GDAL raster driver short name`: https://gdal.org/drivers/raster/index.html
157+
.. _`pyproj.CRS.from_user_input`: https://pyproj4.github.io/pyproj/stable/api/crs/coordinate_system.html#pyproj.crs.CoordinateSystem.from_user_input

docs/source/data-publishing/ogcapi-edr.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ The `xarray-edr`_ provider plugin reads and extracts `NetCDF`_ and `Zarr`_ data
4747
x_field: lon
4848
y_field: lat
4949
time_field: time
50+
# optionally specify the coordinate reference system of your dataset
51+
# else pygeoapi assumes it is WGS84 (EPSG:4326).
52+
storage_crs: 4326
5053
format:
5154
name: netcdf
5255
mimetype: application/x-netcdf
@@ -81,6 +84,11 @@ The `xarray-edr`_ provider plugin reads and extracts `NetCDF`_ and `Zarr`_ data
8184
S3 URL. Any parameters required to open the dataset using fsspec can be added
8285
to the config file under `options` and `s3`, as shown above.
8386

87+
.. note::
88+
When providing a `storage_crs` value in the EDR configuration, specify the
89+
coordinate reference system using any valid input for
90+
`pyproj.CRS.from_user_input`_.
91+
8492

8593
Data access examples
8694
--------------------
@@ -105,6 +113,7 @@ Data access examples
105113
.. _`xarray`: https://docs.xarray.dev/en/stable/
106114
.. _`NetCDF`: https://en.wikipedia.org/wiki/NetCDF
107115
.. _`Zarr`: https://zarr.readthedocs.io/en/stable
116+
.. _`pyproj.CRS.from_user_input`: https://pyproj4.github.io/pyproj/stable/api/crs/coordinate_system.html#pyproj.crs.CoordinateSystem.from_user_input
108117

109118

110119
.. _`OGC Environmental Data Retrieval (EDR) (API)`: https://github.com/opengeospatial/ogcapi-coverages

pygeoapi/provider/xarray_.py

Lines changed: 86 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,16 @@
3737
import xarray
3838
import fsspec
3939
import numpy as np
40+
import pyproj
41+
from pyproj.exceptions import CRSError
42+
43+
from pygeoapi.api import DEFAULT_STORAGE_CRS
4044

4145
from pygeoapi.provider.base import (BaseProvider,
4246
ProviderConnectionError,
4347
ProviderNoDataError,
4448
ProviderQueryError)
45-
from pygeoapi.util import read_data
49+
from pygeoapi.util import get_crs_from_uri, read_data
4650

4751
LOGGER = logging.getLogger(__name__)
4852

@@ -82,6 +86,7 @@ def __init__(self, provider_def):
8286
data_to_open = self.data
8387

8488
self._data = open_func(data_to_open)
89+
self.storage_crs = self._parse_storage_crs(provider_def)
8590
self._coverage_properties = self._get_coverage_properties()
8691

8792
self.axes = [self._coverage_properties['x_axis_label'],
@@ -341,6 +346,7 @@ def gen_covjson(self, metadata, data, fields):
341346
def _get_coverage_properties(self):
342347
"""
343348
Helper function to normalize coverage properties
349+
:param provider_def: provider definition
344350
345351
:returns: `dict` of coverage properties
346352
"""
@@ -401,16 +407,21 @@ def _get_coverage_properties(self):
401407
'restime': self.get_time_resolution()
402408
}
403409

404-
if 'crs' in self._data.variables.keys():
405-
try:
406-
properties['bbox_crs'] = f'http://www.opengis.net/def/crs/OGC/1.3/{self._data.crs.epsg_code}' # noqa
407-
408-
properties['inverse_flattening'] = self._data.crs.\
409-
inverse_flattening
410-
410+
# Update properties based on the xarray's CRS
411+
epsg_code = self.storage_crs.to_epsg()
412+
LOGGER.debug(f'{epsg_code}')
413+
if epsg_code == 4326 or self.storage_crs == 'OGC:CRS84':
414+
pass
415+
LOGGER.debug('Confirmed default of WGS 84')
416+
else:
417+
properties['bbox_crs'] = \
418+
f'https://www.opengis.net/def/crs/EPSG/0/{epsg_code}'
419+
properties['inverse_flattening'] = \
420+
self.storage_crs.ellipsoid.inverse_flattening
421+
if self.storage_crs.is_projected:
411422
properties['crs_type'] = 'ProjectedCRS'
412-
except AttributeError:
413-
pass
423+
424+
LOGGER.debug(f'properties: {properties}')
414425

415426
properties['axes'] = [
416427
properties['x_axis_label'],
@@ -476,6 +487,71 @@ def get_time_coverage_duration(self):
476487

477488
return ', '.join(times)
478489

490+
def _parse_grid_mapping(self):
491+
"""
492+
Identifies grid_mapping.
493+
494+
:returns: name of xarray data variable that contains CRS information.
495+
"""
496+
LOGGER.debug('Parsing grid mapping...')
497+
spatiotemporal_dims = (self.time_field, self.y_field, self.x_field)
498+
LOGGER.debug(spatiotemporal_dims)
499+
grid_mapping_name = None
500+
for var_name, var in self._data.variables.items():
501+
if all(dim in var.dims for dim in spatiotemporal_dims):
502+
try:
503+
grid_mapping_name = self._data[var_name].attrs['grid_mapping'] # noqa
504+
LOGGER.debug(f'Grid mapping: {grid_mapping_name}')
505+
except KeyError as err:
506+
LOGGER.debug(err)
507+
LOGGER.debug('No grid mapping information found.')
508+
return grid_mapping_name
509+
510+
def _parse_storage_crs(
511+
self,
512+
provider_def: dict
513+
) -> pyproj.CRS:
514+
"""
515+
Parse the storage CRS from an xarray dataset.
516+
517+
:param provider_def: provider definition
518+
519+
:returns: `pyproj.CRS` instance parsed from dataset
520+
"""
521+
storage_crs = None
522+
523+
try:
524+
storage_crs = provider_def['storage_crs']
525+
crs_function = pyproj.CRS.from_user_input
526+
except KeyError as err:
527+
LOGGER.debug(err)
528+
LOGGER.debug('No storage_crs found. Attempting to parse the CRS.')
529+
530+
if storage_crs is None:
531+
grid_mapping = self._parse_grid_mapping()
532+
if grid_mapping is not None:
533+
storage_crs = self._data[grid_mapping].attrs
534+
crs_function = pyproj.CRS.from_cf
535+
elif 'crs' in self._data.variables.keys():
536+
storage_crs = self._data['crs'].attrs
537+
crs_function = pyproj.CRS.from_dict
538+
else:
539+
storage_crs = DEFAULT_STORAGE_CRS
540+
crs_function = get_crs_from_uri
541+
LOGGER.debug('Failed to parse dataset CRS. Assuming WGS84.')
542+
543+
LOGGER.debug(f'Parsing CRS {storage_crs} with {crs_function}')
544+
try:
545+
crs = crs_function(storage_crs)
546+
except CRSError as err:
547+
LOGGER.debug(f'Unable to parse projection with pyproj: {err}')
548+
LOGGER.debug('Assuming default WGS84.')
549+
crs = get_crs_from_uri(DEFAULT_STORAGE_CRS)
550+
551+
LOGGER.debug(crs)
552+
553+
return crs
554+
479555

480556
def _to_datetime_string(datetime_obj):
481557
"""

0 commit comments

Comments
 (0)