Skip to content

Commit f3cc5d5

Browse files
committed
Refactor whitespace and formatting in atmosphere.py, calc_atmos.py, ACCESS-ESM1.6_mappings.json, and vocabulary_processors.py for improved readability
1 parent c3444ce commit f3cc5d5

File tree

4 files changed

+79
-55
lines changed

4 files changed

+79
-55
lines changed

src/access_moppy/atmosphere.py

Lines changed: 34 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
import warnings
21
import re
2+
import warnings
33

44
import numpy as np
55
import xarray as xr
@@ -72,66 +72,72 @@ def calculate_missing_bounds_variables(self, bnds_required):
7272
def remove_spurious_time_dimensions(self, required_vars):
7373
"""
7474
Remove spurious time dimensions from coordinate and auxiliary variables.
75-
75+
7676
This method addresses a common issue in xarray when combining datasets:
77-
spatial bounds (lat_bnds, lon_bnds) and other coordinate variables can incorrectly
77+
spatial bounds (lat_bnds, lon_bnds) and other coordinate variables can incorrectly
7878
gain time dimensions during multi-file dataset operations, even though they are time-invariant.
79-
79+
8080
Why this is necessary:
81-
- When using xr.open_mfdataset() with combine_coords="time", xarray
81+
- When using xr.open_mfdataset() with combine_coords="time", xarray
8282
conservatively assumes all coordinate-linked variables might vary with time
8383
- This causes spatial bounds and coordinates to be broadcasted along the time dimension
8484
- Results in redundant data storage and non-CF-compliant files
85-
85+
8686
Why this is reasonable for ACCESS Models:
8787
- ACCESS Models use static grids throughout model runs
8888
- Latitude, longitude coordinates (and their bounds) are time-invariant
8989
- The grid definition remains constant across all timesteps
9090
- Only time_bnds and data variables should legitimately have a time dimension
9191
- This optimization is safe and improves storage efficiency
92-
92+
9393
Args:
9494
required_vars (list): Variables that should keep their time dimension
9595
"""
9696
# Identify all variables that have gained spurious time dimensions
9797
# Include bounds variables and any other coordinate variables
9898
problematic_vars = [
99-
name for name in self.ds.variables
99+
name
100+
for name in self.ds.variables
100101
if "time" not in name # Don't touch time_bnds or time coordinate
101102
and name not in required_vars # Don't touch required data variables
102-
and name in self.ds
103+
and name in self.ds
103104
and "time" in self.ds[name].coords
104105
and self.ds[name].dims != ("time",) # Skip pure time variables
105106
]
106-
107+
107108
if problematic_vars:
108109
# Process all problematic variables efficiently in a single operation
109110
corrections = {
110-
name: self.ds[name].isel(time=0).drop_vars("time")
111+
name: self.ds[name].isel(time=0).drop_vars("time")
111112
for name in problematic_vars
112113
}
113114
self.ds = self.ds.assign(corrections)
114115

115116
def select_and_process_variables(self):
116-
117117
# Select input variables required for the CMOR variable
118118
required_vars = self.mapping[self.cmor_name]["model_variables"]
119-
120-
required_axes, axes_rename_map = self.vocab._get_axes(self.mapping)
121-
required_bounds, bounds_rename_map = self.vocab._get_required_bounds_variables(self.mapping)
122119

123-
required = set(required_vars + list(axes_rename_map.keys()) + list(bounds_rename_map.keys()))
120+
required_axes, axes_rename_map = self.vocab._get_axes(self.mapping)
121+
required_bounds, bounds_rename_map = self.vocab._get_required_bounds_variables(
122+
self.mapping
123+
)
124+
125+
required = set(
126+
required_vars
127+
+ list(axes_rename_map.keys())
128+
+ list(bounds_rename_map.keys())
129+
)
124130
self.load_dataset(required_vars=required)
125131

126132
# Remove spurious time dimensions from spatial bounds and coordinates
127133
self.remove_spurious_time_dimensions(required_vars)
128134

129135
# Ensure time dimension is sorted
130136
self.sort_time_dimension()
131-
137+
132138
## Calculate missing bounds variables
133139
##self.calculate_missing_bounds_variables(required_bounds)
134-
140+
135141
calc = self.mapping[self.cmor_name]["calculation"]
136142

137143
# Handle the calculation type
@@ -161,12 +167,18 @@ def select_and_process_variables(self):
161167
raise ValueError(f"Unsupported calculation type: {calc['type']}")
162168

163169
# Rename axes and bounds variables
164-
rename_map = {k: v for k, v in {**bounds_rename_map, **axes_rename_map}.items() if k in self.ds}
170+
rename_map = {
171+
k: v
172+
for k, v in {**bounds_rename_map, **axes_rename_map}.items()
173+
if k in self.ds
174+
}
165175
self.ds = self.ds.rename(rename_map)
166176

167177
# Transpose the data variable according to the CMOR dimensions
168-
cmor_dims = re.sub(r'\w*level', 'lev', self.vocab.variable["dimensions"]).split()
169-
178+
cmor_dims = re.sub(
179+
r"\w*level", "lev", self.vocab.variable["dimensions"]
180+
).split()
181+
170182
transpose_order = [
171183
self.vocab.axes[dim]["out_name"]
172184
for dim in cmor_dims
@@ -194,7 +206,7 @@ def update_attributes(self):
194206
)
195207

196208
cmor_attrs = self.vocab.variable
197-
#self._check_units(self.cmor_name, cmor_attrs.get("units"))
209+
# self._check_units(self.cmor_name, cmor_attrs.get("units"))
198210

199211
self.ds[self.cmor_name].attrs.update(
200212
{k: v for k, v in cmor_attrs.items() if v not in (None, "")}

src/access_moppy/derivations/calc_atmos.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -250,8 +250,11 @@
250250
def cli_level_to_height(ds):
251251
# Handle level coordinate transformation
252252
if "theta_level_height" in ds:
253-
ds = (ds.assign_coords({"lev": ds["theta_level_height"]})
254-
.swap_dims({"model_theta_level_number": "lev"})
255-
.drop_vars(["theta_level_height", "model_theta_level_number"], errors="ignore"))
253+
ds = (
254+
ds.assign_coords({"lev": ds["theta_level_height"]})
255+
.swap_dims({"model_theta_level_number": "lev"})
256+
.drop_vars(
257+
["theta_level_height", "model_theta_level_number"], errors="ignore"
258+
)
259+
)
256260
return ds
257-

src/access_moppy/mappings/ACCESS-ESM1.6_mappings.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@
7777
"type": "hybrid_height",
7878
"coordinate_variables": {
7979
"sigma_theta": "b",
80-
"surface_altitude": "orog",
80+
"surface_altitude": "orog",
8181
"theta_level_height": "lev"
8282
},
8383
"formula": "z = lev + b*orog"
@@ -86,7 +86,7 @@
8686
"type": "formula",
8787
"operation": "swap_dimensions",
8888
"operands": [
89-
"fld_s02i261",
89+
"fld_s02i261",
9090
{"literal": "model_level_number"},
9191
{"literal": "lev"}
9292
]
@@ -109,7 +109,7 @@
109109
"type": "hybrid_height",
110110
"coordinate_variables": {
111111
"sigma_theta": "b",
112-
"surface_altitude": "orog",
112+
"surface_altitude": "orog",
113113
"theta_level_height": "lev"
114114
},
115115
"formula": "z = a + b*orog"

src/access_moppy/vocabulary_processors.py

Lines changed: 35 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,6 @@ def _get_variable_suggestions(self) -> List[str]:
258258
return suggestions
259259

260260
def _get_axes(self, mapping) -> Dict[str, Any]:
261-
262261
# Resolve resource inside the module path
263262
coord_entry = files(self.table_dir) / "CMIP6_coordinate.json"
264263

@@ -268,48 +267,54 @@ def _get_axes(self, mapping) -> Dict[str, Any]:
268267

269268
dims = self.variable["dimensions"].split()
270269
vars_required = {}
271-
270+
272271
for dim in dims:
273272
if dim in axes and dim not in ["alevel"]:
274273
coord = axes[dim]
275274
vars_required[dim] = {k: v for k, v in coord.items() if v != ""}
276-
275+
277276
# Add z-axis coordinate variables if applicable
278277
if "zaxis" in mapping[self.cmor_name]:
279-
280278
# Get z-axis type from mapping
281-
zaxis_type = mapping[self.cmor_name]["zaxis"].get(
282-
"type", {}
283-
)
279+
zaxis_type = mapping[self.cmor_name]["zaxis"].get("type", {})
284280

285281
# Process main z-axis coordinate
286282
zcoord = axes.get(zaxis_type, {})["out_name"]
287-
vars_required[zcoord] = {k: v for k, v in axes[zaxis_type].items() if v != ""}
288-
283+
vars_required[zcoord] = {
284+
k: v for k, v in axes[zaxis_type].items() if v != ""
285+
}
286+
289287
# Process z_factors
290288
zfactors_str = axes.get(zaxis_type, {}).get("z_factors", "")
291-
289+
292290
zfactors = {}
293291
if zfactors_str:
294292
parts = zfactors_str.split()
295-
zfactors = {parts[i].rstrip(':'): parts[i+1]
296-
for i in range(0, len(parts), 2) if i+1 < len(parts)}
297-
293+
zfactors = {
294+
parts[i].rstrip(":"): parts[i + 1]
295+
for i in range(0, len(parts), 2)
296+
if i + 1 < len(parts)
297+
}
298+
298299
formula_entry = files(self.table_dir) / "CMIP6_formula_terms.json"
299300
with as_file(formula_entry) as fpath:
300301
with open(fpath, "r", encoding="utf-8") as ff:
301-
formula_terms = json.load(ff)["formula_entry"]
302+
formula_terms = json.load(ff)["formula_entry"]
302303

303304
for factor_name, _ in zfactors.items():
304305
if factor_name in formula_terms:
305306
zcoord = formula_terms[factor_name]
306-
vars_required[factor_name] = {k: v for k, v in zcoord.items() if v != ""}
307+
vars_required[factor_name] = {
308+
k: v for k, v in zcoord.items() if v != ""
309+
}
307310

308311
# Let's map the axis and formula terms to the inputs
309312
vars_rename_map = {}
310-
extended_mapping = mapping[self.cmor_name]["dimensions"] | mapping[self.cmor_name].get("zaxis", {}).get("coordinate_variables", {})
311-
inverted_extended_mapping = {v: k for k, v in extended_mapping.items()}
312-
313+
extended_mapping = mapping[self.cmor_name]["dimensions"] | mapping[
314+
self.cmor_name
315+
].get("zaxis", {}).get("coordinate_variables", {})
316+
inverted_extended_mapping = {v: k for k, v in extended_mapping.items()}
317+
313318
for _, v in vars_required.items():
314319
input_dim = inverted_extended_mapping.get(v["out_name"])
315320
if input_dim:
@@ -322,21 +327,23 @@ def _get_axes(self, mapping) -> Dict[str, Any]:
322327
def _get_required_bounds_variables(self, mapping: Dict[str, Any]) -> tuple:
323328
"""
324329
Get required bounds variables based on CMOR vocabulary axes.
325-
330+
326331
Args:
327332
mapping: Variable mapping dictionary containing dimensions
328-
333+
329334
Returns:
330335
tuple: (bnds_required, bounds_rename_map) where
331336
- bnds_required: list of required bounds variable names
332337
- bounds_rename_map: dict mapping input bounds names to output bounds names
333338
"""
334339
bnds_required = {}
335340
bounds_rename_map = {}
336-
337-
extended_mapping = mapping[self.cmor_name]["dimensions"] | mapping[self.cmor_name].get("zaxis", {}).get("coordinate_variables", {})
338-
inverted_extended_mapping = {v: k for k, v in extended_mapping.items()}
339-
341+
342+
extended_mapping = mapping[self.cmor_name]["dimensions"] | mapping[
343+
self.cmor_name
344+
].get("zaxis", {}).get("coordinate_variables", {})
345+
inverted_extended_mapping = {v: k for k, v in extended_mapping.items()}
346+
340347
axes, _ = self._get_axes(mapping)
341348
for _, v in axes.items():
342349
if v.get("must_have_bounds") == "yes":
@@ -345,8 +352,10 @@ def _get_required_bounds_variables(self, mapping: Dict[str, Any]) -> tuple:
345352
input_bounds = input_dim + "_bnds"
346353
output_bounds = v["out_name"] + "_bnds"
347354
bounds_rename_map[input_bounds] = output_bounds
348-
bnds_required[output_bounds] = {key: val for key, val in v.items() if val != ""}
349-
355+
bnds_required[output_bounds] = {
356+
key: val for key, val in v.items() if val != ""
357+
}
358+
350359
return bnds_required, bounds_rename_map
351360

352361
def get_variant_components(self) -> Dict[str, int]:

0 commit comments

Comments
 (0)