Skip to content

Commit d6ae4b1

Browse files
committed
fixes
1 parent c5dfc7a commit d6ae4b1

File tree

2 files changed

+59
-22
lines changed

2 files changed

+59
-22
lines changed

cf_xarray/accessor.py

Lines changed: 55 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -440,30 +440,60 @@ def _get_bounds(obj: DataArray | Dataset, key: Hashable) -> list[Hashable]:
440440
return list(results)
441441

442442

443-
def _parse_grid_mapping_attribute(grid_mapping_attr: str) -> list[str]:
443+
def _parse_grid_mapping_attribute(grid_mapping_attr: str) -> dict[str, list[str]]:
444444
"""
445445
Parse a grid_mapping attribute that may contain multiple grid mappings.
446446
447447
The attribute has the format: "grid_mapping_variable_name: optional_coordinate_names_space_separated"
448448
Multiple sections are separated by colons.
449449
450450
Examples:
451-
- Single: "spatial_ref"
451+
- Single: "spatial_ref" -> {"spatial_ref": []}
452452
- Multiple: "spatial_ref: crs_4326: latitude longitude crs_27700: x27700 y27700"
453+
-> {"spatial_ref": [], "crs_4326": ["latitude", "longitude"], "crs_27700": ["x27700", "y27700"]}
453454
454-
Returns a list of grid mapping variable names.
455+
Returns a dictionary mapping grid mapping variable names to their associated coordinate variables.
455456
"""
456457
# Check if there are colons indicating multiple mappings
457458
if ":" not in grid_mapping_attr:
458-
return [grid_mapping_attr.strip()]
459-
460-
# Use regex to find grid mapping variable names
461-
# Pattern matches: word at start OR word that comes after some coordinate names and before ":"
462-
# This handles cases like "spatial_ref: crs_4326: latitude longitude crs_27700: x27700 y27700"
463-
pattern = r"(?:^|\s)([a-zA-Z_][a-zA-Z0-9_]*)(?=\s*:)"
464-
matches = re.findall(pattern, grid_mapping_attr)
459+
return {grid_mapping_attr.strip(): []}
460+
461+
# Use regex to parse the format
462+
# First, find all grid mapping variables (words before colons)
463+
grid_pattern = r"(?:^|\s)([a-zA-Z_][a-zA-Z0-9_]*)(?=\s*:)"
464+
grid_mappings = re.findall(grid_pattern, grid_mapping_attr)
465+
466+
if not grid_mappings:
467+
return {grid_mapping_attr.strip(): []}
468+
469+
result = {}
470+
471+
# Now extract coordinates for each grid mapping
472+
# Split the string to find what comes after each grid mapping variable
473+
for i, gm in enumerate(grid_mappings):
474+
# Pattern to capture everything after this grid mapping until the next one or end
475+
if i < len(grid_mappings) - 1:
476+
next_gm = grid_mappings[i + 1]
477+
# Capture everything between current grid mapping and next one
478+
coord_pattern = (
479+
rf"{re.escape(gm)}\s*:\s*([^:]*?)(?=\s+{re.escape(next_gm)}\s*:)"
480+
)
481+
else:
482+
# Last grid mapping - capture everything after it
483+
coord_pattern = rf"{re.escape(gm)}\s*:\s*(.*)$"
484+
485+
coord_match = re.search(coord_pattern, grid_mapping_attr)
486+
if coord_match:
487+
coord_text = coord_match.group(1).strip()
488+
# Split coordinates and filter out any grid mapping names that might have been captured
489+
coords = coord_text.split() if coord_text else []
490+
# Filter out the next grid mapping variable if it got captured
491+
coords = [c for c in coords if c not in grid_mappings]
492+
result[gm] = coords
493+
else:
494+
result[gm] = []
465495

466-
return matches if matches else [grid_mapping_attr.strip()]
496+
return result
467497

468498

469499
def _get_grid_mapping_name(obj: DataArray | Dataset, key: str) -> list[str]:
@@ -494,8 +524,8 @@ def _get_grid_mapping_name(obj: DataArray | Dataset, key: str) -> list[str]:
494524
attrs_or_encoding = ChainMap(var.attrs, var.encoding)
495525
if grid_mapping_attr := attrs_or_encoding.get("grid_mapping"):
496526
# Parse potentially multiple grid mappings
497-
grid_mapping_var_names = _parse_grid_mapping_attribute(grid_mapping_attr)
498-
for grid_mapping_var_name in grid_mapping_var_names:
527+
grid_mapping_dict = _parse_grid_mapping_attribute(grid_mapping_attr)
528+
for grid_mapping_var_name in grid_mapping_dict.keys():
499529
if grid_mapping_var_name not in variables:
500530
raise ValueError(
501531
f"{var} defines non-existing grid_mapping variable {grid_mapping_var_name}."
@@ -1975,9 +2005,16 @@ def get_associated_variable_names(
19752005
coords["grid"] = [grid]
19762006

19772007
if grid_mapping_attr := attrs_or_encoding.get("grid_mapping", None):
1978-
# Parse grid mapping variables using the same function
1979-
grid_mapping_vars = _parse_grid_mapping_attribute(grid_mapping_attr)
1980-
coords["grid_mapping"] = cast(list[Hashable], grid_mapping_vars)
2008+
# Parse grid mapping variables and their coordinates
2009+
grid_mapping_dict = _parse_grid_mapping_attribute(grid_mapping_attr)
2010+
coords["grid_mapping"] = cast(
2011+
list[Hashable], list(grid_mapping_dict.keys())
2012+
)
2013+
2014+
# Add coordinate variables from the grid mapping
2015+
for coord_vars in grid_mapping_dict.values():
2016+
if coord_vars:
2017+
coords["coordinates"].extend(coord_vars)
19812018

19822019
more: Sequence[Hashable] = ()
19832020
if geometry_var := attrs_or_encoding.get("geometry", None):
@@ -2965,10 +3002,10 @@ def grid_mapping_names(self) -> dict[str, list[str]]:
29653002
return {}
29663003

29673004
# Parse potentially multiple grid mappings
2968-
grid_mapping_var_names = _parse_grid_mapping_attribute(grid_mapping_attr)
3005+
grid_mapping_dict = _parse_grid_mapping_attribute(grid_mapping_attr)
29693006

29703007
results = defaultdict(list)
2971-
for grid_mapping_var_name in grid_mapping_var_names and set(da.coords):
3008+
for grid_mapping_var_name in grid_mapping_dict.keys() & set(da.coords):
29723009
grid_mapping_var = da.coords[grid_mapping_var_name]
29733010
if gmn := grid_mapping_var.attrs.get("grid_mapping_name"):
29743011
results[gmn].append(grid_mapping_var_name)

cf_xarray/tests/test_accessor.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1088,10 +1088,10 @@ def test_multiple_grid_mapping_attribute():
10881088
"transverse_mercator": ["crs_27700"],
10891089
}
10901090

1091-
assert da.cf.get_associated_variable_names()["grid_mapping"] == [
1092-
"latitude_longitude",
1093-
"lambert_azimuthal_equal_area",
1094-
"transverse_mercator",
1091+
assert ds.cf.get_associated_variable_names("foo")["grid_mapping"] == [
1092+
"spatial_ref",
1093+
"crs_4326",
1094+
"crs_27700",
10951095
]
10961096

10971097
# Test that grid_mapping_name raises an error with multiple mappings

0 commit comments

Comments
 (0)