@@ -440,30 +440,60 @@ def _get_bounds(obj: DataArray | Dataset, key: Hashable) -> list[Hashable]:
440
440
return list (results )
441
441
442
442
443
- def _parse_grid_mapping_attribute (grid_mapping_attr : str ) -> list [str ]:
443
+ def _parse_grid_mapping_attribute (grid_mapping_attr : str ) -> dict [ str , list [str ] ]:
444
444
"""
445
445
Parse a grid_mapping attribute that may contain multiple grid mappings.
446
446
447
447
The attribute has the format: "grid_mapping_variable_name: optional_coordinate_names_space_separated"
448
448
Multiple sections are separated by colons.
449
449
450
450
Examples:
451
- - Single: "spatial_ref"
451
+ - Single: "spatial_ref" -> {"spatial_ref": []}
452
452
- Multiple: "spatial_ref: crs_4326: latitude longitude crs_27700: x27700 y27700"
453
+ -> {"spatial_ref": [], "crs_4326": ["latitude", "longitude"], "crs_27700": ["x27700", "y27700"]}
453
454
454
- Returns a list of grid mapping variable names.
455
+ Returns a dictionary mapping grid mapping variable names to their associated coordinate variables .
455
456
"""
456
457
# Check if there are colons indicating multiple mappings
457
458
if ":" not in grid_mapping_attr :
458
- return [grid_mapping_attr .strip ()]
459
-
460
- # Use regex to find grid mapping variable names
461
- # Pattern matches: word at start OR word that comes after some coordinate names and before ":"
462
- # This handles cases like "spatial_ref: crs_4326: latitude longitude crs_27700: x27700 y27700"
463
- pattern = r"(?:^|\s)([a-zA-Z_][a-zA-Z0-9_]*)(?=\s*:)"
464
- matches = re .findall (pattern , grid_mapping_attr )
459
+ return {grid_mapping_attr .strip (): []}
460
+
461
+ # Use regex to parse the format
462
+ # First, find all grid mapping variables (words before colons)
463
+ grid_pattern = r"(?:^|\s)([a-zA-Z_][a-zA-Z0-9_]*)(?=\s*:)"
464
+ grid_mappings = re .findall (grid_pattern , grid_mapping_attr )
465
+
466
+ if not grid_mappings :
467
+ return {grid_mapping_attr .strip (): []}
468
+
469
+ result = {}
470
+
471
+ # Now extract coordinates for each grid mapping
472
+ # Split the string to find what comes after each grid mapping variable
473
+ for i , gm in enumerate (grid_mappings ):
474
+ # Pattern to capture everything after this grid mapping until the next one or end
475
+ if i < len (grid_mappings ) - 1 :
476
+ next_gm = grid_mappings [i + 1 ]
477
+ # Capture everything between current grid mapping and next one
478
+ coord_pattern = (
479
+ rf"{ re .escape (gm )} \s*:\s*([^:]*?)(?=\s+{ re .escape (next_gm )} \s*:)"
480
+ )
481
+ else :
482
+ # Last grid mapping - capture everything after it
483
+ coord_pattern = rf"{ re .escape (gm )} \s*:\s*(.*)$"
484
+
485
+ coord_match = re .search (coord_pattern , grid_mapping_attr )
486
+ if coord_match :
487
+ coord_text = coord_match .group (1 ).strip ()
488
+ # Split coordinates and filter out any grid mapping names that might have been captured
489
+ coords = coord_text .split () if coord_text else []
490
+ # Filter out the next grid mapping variable if it got captured
491
+ coords = [c for c in coords if c not in grid_mappings ]
492
+ result [gm ] = coords
493
+ else :
494
+ result [gm ] = []
465
495
466
- return matches if matches else [ grid_mapping_attr . strip ()]
496
+ return result
467
497
468
498
469
499
def _get_grid_mapping_name (obj : DataArray | Dataset , key : str ) -> list [str ]:
@@ -494,8 +524,8 @@ def _get_grid_mapping_name(obj: DataArray | Dataset, key: str) -> list[str]:
494
524
attrs_or_encoding = ChainMap (var .attrs , var .encoding )
495
525
if grid_mapping_attr := attrs_or_encoding .get ("grid_mapping" ):
496
526
# Parse potentially multiple grid mappings
497
- grid_mapping_var_names = _parse_grid_mapping_attribute (grid_mapping_attr )
498
- for grid_mapping_var_name in grid_mapping_var_names :
527
+ grid_mapping_dict = _parse_grid_mapping_attribute (grid_mapping_attr )
528
+ for grid_mapping_var_name in grid_mapping_dict . keys () :
499
529
if grid_mapping_var_name not in variables :
500
530
raise ValueError (
501
531
f"{ var } defines non-existing grid_mapping variable { grid_mapping_var_name } ."
@@ -1975,9 +2005,16 @@ def get_associated_variable_names(
1975
2005
coords ["grid" ] = [grid ]
1976
2006
1977
2007
if grid_mapping_attr := attrs_or_encoding .get ("grid_mapping" , None ):
1978
- # Parse grid mapping variables using the same function
1979
- grid_mapping_vars = _parse_grid_mapping_attribute (grid_mapping_attr )
1980
- coords ["grid_mapping" ] = cast (list [Hashable ], grid_mapping_vars )
2008
+ # Parse grid mapping variables and their coordinates
2009
+ grid_mapping_dict = _parse_grid_mapping_attribute (grid_mapping_attr )
2010
+ coords ["grid_mapping" ] = cast (
2011
+ list [Hashable ], list (grid_mapping_dict .keys ())
2012
+ )
2013
+
2014
+ # Add coordinate variables from the grid mapping
2015
+ for coord_vars in grid_mapping_dict .values ():
2016
+ if coord_vars :
2017
+ coords ["coordinates" ].extend (coord_vars )
1981
2018
1982
2019
more : Sequence [Hashable ] = ()
1983
2020
if geometry_var := attrs_or_encoding .get ("geometry" , None ):
@@ -2965,10 +3002,10 @@ def grid_mapping_names(self) -> dict[str, list[str]]:
2965
3002
return {}
2966
3003
2967
3004
# Parse potentially multiple grid mappings
2968
- grid_mapping_var_names = _parse_grid_mapping_attribute (grid_mapping_attr )
3005
+ grid_mapping_dict = _parse_grid_mapping_attribute (grid_mapping_attr )
2969
3006
2970
3007
results = defaultdict (list )
2971
- for grid_mapping_var_name in grid_mapping_var_names and set (da .coords ):
3008
+ for grid_mapping_var_name in grid_mapping_dict . keys () & set (da .coords ):
2972
3009
grid_mapping_var = da .coords [grid_mapping_var_name ]
2973
3010
if gmn := grid_mapping_var .attrs .get ("grid_mapping_name" ):
2974
3011
results [gmn ].append (grid_mapping_var_name )
0 commit comments