Skip to content

Commit b33698a

Browse files
committed
Support multiple grid mappings on a DataArray
1 parent 6b18b75 commit b33698a

File tree

3 files changed

+224
-22
lines changed

3 files changed

+224
-22
lines changed

cf_xarray/accessor.py

Lines changed: 132 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,32 @@ def _get_bounds(obj: DataArray | Dataset, key: Hashable) -> list[Hashable]:
440440
return list(results)
441441

442442

443+
def _parse_grid_mapping_attribute(grid_mapping_attr: str) -> list[str]:
444+
"""
445+
Parse a grid_mapping attribute that may contain multiple grid mappings.
446+
447+
The attribute has the format: "grid_mapping_variable_name: optional_coordinate_names_space_separated"
448+
Multiple sections are separated by colons.
449+
450+
Examples:
451+
- Single: "spatial_ref"
452+
- Multiple: "spatial_ref: crs_4326: latitude longitude crs_27700: x27700 y27700"
453+
454+
Returns a list of grid mapping variable names.
455+
"""
456+
# Check if there are colons indicating multiple mappings
457+
if ":" not in grid_mapping_attr:
458+
return [grid_mapping_attr.strip()]
459+
460+
# Use regex to find grid mapping variable names
461+
# Pattern matches: word at start OR word that comes after some coordinate names and before ":"
462+
# This handles cases like "spatial_ref: crs_4326: latitude longitude crs_27700: x27700 y27700"
463+
pattern = r"(?:^|\s)([a-zA-Z_][a-zA-Z0-9_]*)(?=\s*:)"
464+
matches = re.findall(pattern, grid_mapping_attr)
465+
466+
return matches if matches else [grid_mapping_attr.strip()]
467+
468+
443469
def _get_grid_mapping_name(obj: DataArray | Dataset, key: str) -> list[str]:
444470
"""
445471
Translate from grid mapping name attribute to appropriate variable name.
@@ -467,13 +493,17 @@ def _get_grid_mapping_name(obj: DataArray | Dataset, key: str) -> list[str]:
467493
for var in variables.values():
468494
attrs_or_encoding = ChainMap(var.attrs, var.encoding)
469495
if "grid_mapping" in attrs_or_encoding:
470-
grid_mapping_var_name = attrs_or_encoding["grid_mapping"]
471-
if grid_mapping_var_name not in variables:
472-
raise ValueError(
473-
f"{var} defines non-existing grid_mapping variable {grid_mapping_var_name}."
474-
)
475-
if key == variables[grid_mapping_var_name].attrs["grid_mapping_name"]:
476-
results.update([grid_mapping_var_name])
496+
grid_mapping_attr = attrs_or_encoding["grid_mapping"]
497+
# Parse potentially multiple grid mappings
498+
grid_mapping_var_names = _parse_grid_mapping_attribute(grid_mapping_attr)
499+
500+
for grid_mapping_var_name in grid_mapping_var_names:
501+
if grid_mapping_var_name not in variables:
502+
raise ValueError(
503+
f"{var} defines non-existing grid_mapping variable {grid_mapping_var_name}."
504+
)
505+
if key == variables[grid_mapping_var_name].attrs["grid_mapping_name"]:
506+
results.update([grid_mapping_var_name])
477507
return list(results)
478508

479509

@@ -1943,9 +1973,34 @@ def get_associated_variable_names(
19431973
if dbounds := self._obj[dim].attrs.get("bounds", None):
19441974
coords["bounds"].append(dbounds)
19451975

1946-
for attrname in ["grid", "grid_mapping"]:
1947-
if maybe := attrs_or_encoding.get(attrname, None):
1948-
coords[attrname] = [maybe]
1976+
if grid := attrs_or_encoding.get("grid", None):
1977+
coords["grid"] = [grid]
1978+
1979+
if grid_mapping_attr := attrs_or_encoding.get("grid_mapping", None):
1980+
# Parse grid mapping variables using the same function
1981+
grid_mapping_vars = _parse_grid_mapping_attribute(grid_mapping_attr)
1982+
coords["grid_mapping"] = grid_mapping_vars
1983+
1984+
# Extract coordinate variables using regex
1985+
if ":" in grid_mapping_attr:
1986+
# Pattern to find coordinate variables: words that come after ":" but before next grid mapping variable
1987+
# This captures coordinate variables between grid mapping sections
1988+
coord_pattern = r":\s+([^:]+?)(?=\s+[a-zA-Z_][a-zA-Z0-9_]*\s*:|$)"
1989+
coord_matches = re.findall(coord_pattern, grid_mapping_attr)
1990+
1991+
for coord_section in coord_matches:
1992+
# Split each coordinate section and add valid coordinate names
1993+
coord_vars = coord_section.split()
1994+
# Filter out grid mapping variable names that might have been captured
1995+
coord_vars = [
1996+
var
1997+
for var in coord_vars
1998+
if not (
1999+
var.startswith(("crs_", "spatial_", "proj_"))
2000+
and var in grid_mapping_vars
2001+
)
2002+
]
2003+
coords["coordinates"].extend(coord_vars)
19492004

19502005
more: Sequence[Hashable] = ()
19512006
if geometry_var := attrs_or_encoding.get("geometry", None):
@@ -2899,6 +2954,60 @@ def formula_terms(self) -> dict[str, str]: # numpydoc ignore=SS06
28992954
terms[key] = value
29002955
return terms
29012956

2957+
@property
2958+
def grid_mapping_names(self) -> dict[str, list[str]]:
2959+
"""
2960+
Mapping the CF grid mapping name to the grid mapping variable name.
2961+
2962+
Returns
2963+
-------
2964+
dict
2965+
Dictionary mapping the CF grid mapping name to the variable name containing
2966+
the grid mapping attributes.
2967+
2968+
See Also
2969+
--------
2970+
DataArray.cf.grid_mapping_name
2971+
Dataset.cf.grid_mapping_names
2972+
2973+
References
2974+
----------
2975+
https://cfconventions.org/Data/cf-conventions/cf-conventions-1.10/cf-conventions.html#appendix-grid-mappings
2976+
2977+
Examples
2978+
--------
2979+
>>> from cf_xarray.datasets import hrrrds
2980+
>>> hrrrds.foo.cf.grid_mapping_names
2981+
{'latitude_longitude': ['crs_4326'], 'lambert_azimuthal_equal_area': ['spatial_ref']}
2982+
"""
2983+
da = self._obj
2984+
attrs_or_encoding = ChainMap(da.attrs, da.encoding)
2985+
grid_mapping_attr = attrs_or_encoding.get("grid_mapping", None)
2986+
2987+
if not grid_mapping_attr:
2988+
return {}
2989+
2990+
# Parse potentially multiple grid mappings
2991+
grid_mapping_var_names = _parse_grid_mapping_attribute(grid_mapping_attr)
2992+
2993+
results = {}
2994+
for grid_mapping_var_name in grid_mapping_var_names:
2995+
# First check if it's in the DataArray's coords (for multiple grid mappings
2996+
# that are coordinates of the DataArray)
2997+
if grid_mapping_var_name in da.coords:
2998+
grid_mapping_var = da.coords[grid_mapping_var_name]
2999+
if "grid_mapping_name" in grid_mapping_var.attrs:
3000+
gmn = grid_mapping_var.attrs["grid_mapping_name"]
3001+
if gmn not in results:
3002+
results[gmn] = [grid_mapping_var_name]
3003+
else:
3004+
results[gmn].append(grid_mapping_var_name)
3005+
# For standalone DataArrays, the grid mapping variables may not be available
3006+
# This is a limitation of the xarray data model - when you extract a DataArray
3007+
# from a Dataset, it doesn't carry over non-coordinate variables
3008+
3009+
return results
3010+
29023011
@property
29033012
def grid_mapping_name(self) -> str:
29043013
"""
@@ -2911,6 +3020,7 @@ def grid_mapping_name(self) -> str:
29113020
29123021
See Also
29133022
--------
3023+
DataArray.cf.grid_mapping_names
29143024
Dataset.cf.grid_mapping_names
29153025
29163026
Examples
@@ -2920,19 +3030,22 @@ def grid_mapping_name(self) -> str:
29203030
'rotated_latitude_longitude'
29213031
"""
29223032

2923-
da = self._obj
3033+
# Use grid_mapping_names under the hood
3034+
grid_mapping_names = self.grid_mapping_names
29243035

2925-
attrs_or_encoding = ChainMap(da.attrs, da.encoding)
2926-
grid_mapping = attrs_or_encoding.get("grid_mapping", None)
2927-
if not grid_mapping:
3036+
if not grid_mapping_names:
29283037
raise ValueError("No 'grid_mapping' attribute present.")
29293038

2930-
if grid_mapping not in da._coords:
2931-
raise ValueError(f"Grid Mapping variable {grid_mapping} not present.")
2932-
2933-
grid_mapping_var = da[grid_mapping]
3039+
if len(grid_mapping_names) > 1:
3040+
# Get the variable names for error message
3041+
all_vars = list(itertools.chain.from_iterable(grid_mapping_names.values()))
3042+
raise ValueError(
3043+
f"Multiple grid mappings found: {all_vars}. "
3044+
"Please use DataArray.cf.grid_mapping_names instead."
3045+
)
29343046

2935-
return grid_mapping_var.attrs["grid_mapping_name"]
3047+
# Return the single grid mapping name
3048+
return next(iter(grid_mapping_names.keys()))
29363049

29373050
def __getitem__(self, key: Hashable | Iterable[Hashable]) -> DataArray:
29383051
"""

cf_xarray/datasets.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import numpy as np
22
import xarray as xr
3+
from pyproj import CRS
34

45
airds = xr.tutorial.open_dataset("air_temperature").isel(time=slice(4), lon=slice(50))
56
airds.air.attrs["cell_measures"] = "area: cell_area"
@@ -750,6 +751,39 @@ def _create_inexact_bounds():
750751
)
751752

752753

754+
hrrrds = xr.Dataset()
755+
hrrrds["foo"] = (
756+
("x", "y"),
757+
np.arange(200).reshape((10, 20)),
758+
{
759+
"grid_mapping": "spatial_ref: crs_4326: latitude longitude crs_27700: x27700 y27700"
760+
},
761+
)
762+
hrrrds.coords["spatial_ref"] = ((), 0, CRS.from_epsg(3035).to_cf())
763+
hrrrds.coords["crs_4326"] = ((), 0, CRS.from_epsg(4326).to_cf())
764+
hrrrds.coords["crs_27700"] = ((), 0, CRS.from_epsg(27700).to_cf())
765+
hrrrds.coords["latitude"] = (
766+
("x", "y"),
767+
np.ones((10, 20)),
768+
{"standard_name": "latitude"},
769+
)
770+
hrrrds.coords["longitude"] = (
771+
("x", "y"),
772+
np.zeros((10, 20)),
773+
{"standard_name": "longitude"},
774+
)
775+
hrrrds.coords["y27700"] = (
776+
("x", "y"),
777+
np.ones((10, 20)),
778+
{"standard_name": "projected_x_coordinate"},
779+
)
780+
hrrrds.coords["x27700"] = (
781+
("x", "y"),
782+
np.zeros((10, 20)),
783+
{"standard_name": "projected_y_coordinate"},
784+
)
785+
786+
753787
def point_dataset():
754788
from shapely.geometry import MultiPoint, Point
755789

cf_xarray/tests/test_accessor.py

Lines changed: 58 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
flag_indep_uint16,
3030
flag_mix,
3131
forecast,
32+
hrrrds,
3233
mollwds,
3334
multiple,
3435
popds,
@@ -1018,9 +1019,7 @@ def test_grid_mappings():
10181019
assert_identical(actual, expected)
10191020

10201021
# not properly propagated if grid mapping variable not in coords
1021-
with pytest.raises(
1022-
ValueError, match="Grid Mapping variable rotated_pole not present."
1023-
):
1022+
with pytest.raises(ValueError, match="No 'grid_mapping' attribute present."):
10241023
ds.temp.cf.grid_mapping_name
10251024

10261025
# check for https://github.com/xarray-contrib/cf-xarray/issues/448
@@ -1068,6 +1067,62 @@ def test_grid_mappings():
10681067
assert "rotated_pole" in ds.coords
10691068

10701069

1070+
def test_multiple_grid_mapping_attribute():
1071+
ds = hrrrds
1072+
1073+
# Test Dataset grid_mapping_names
1074+
# Now includes British National Grid (EPSG:27700) which has grid_mapping_name
1075+
assert ds.cf.grid_mapping_names == {
1076+
"latitude_longitude": ["crs_4326"],
1077+
"lambert_azimuthal_equal_area": ["spatial_ref"],
1078+
"transverse_mercator": ["crs_27700"],
1079+
}
1080+
1081+
# Test DataArray grid_mapping_names
1082+
da = ds.foo
1083+
# Now with improved regex parsing, all 3 grid mappings should be detected
1084+
assert da.cf.grid_mapping_names == {
1085+
"latitude_longitude": ["crs_4326"],
1086+
"lambert_azimuthal_equal_area": ["spatial_ref"],
1087+
"transverse_mercator": ["crs_27700"],
1088+
}
1089+
1090+
# Test that grid_mapping_name raises an error with multiple mappings
1091+
with pytest.raises(
1092+
ValueError,
1093+
match="Multiple grid mappings found.*Please use DataArray.cf.grid_mapping_names",
1094+
):
1095+
da.cf.grid_mapping_name
1096+
1097+
assert "crs_4326" in ds.cf["foo"].coords
1098+
assert "spatial_ref" in ds.cf["foo"].coords
1099+
assert "crs_27700" in ds.cf["foo"].coords
1100+
# Also check that coordinate variables are included
1101+
assert "latitude" in ds.cf["foo"].coords
1102+
assert "longitude" in ds.cf["foo"].coords
1103+
assert "x27700" in ds.cf["foo"].coords
1104+
assert "y27700" in ds.cf["foo"].coords
1105+
1106+
# Test that accessing grid_mapping with cf indexing raises an error for multiple mappings
1107+
with pytest.raises(
1108+
KeyError, match="Receive multiple variables for key 'grid_mapping'"
1109+
):
1110+
da.cf["grid_mapping"]
1111+
1112+
# Test that DataArrays don't support list indexing
1113+
with pytest.raises(
1114+
KeyError, match="Cannot use an Iterable of keys with DataArrays"
1115+
):
1116+
da.cf[["grid_mapping"]]
1117+
1118+
# But Dataset should support list indexing and return all grid mappings and coordinates
1119+
result = ds.cf[["foo", "grid_mapping"]]
1120+
assert "crs_4326" in result.coords
1121+
assert "spatial_ref" in result.coords
1122+
assert "crs_27700" in result.coords
1123+
assert "foo" in result.data_vars
1124+
1125+
10711126
def test_bad_grid_mapping_attribute():
10721127
ds = rotds.copy(deep=False)
10731128
ds.temp.attrs["grid_mapping"] = "foo"

0 commit comments

Comments
 (0)