Skip to content

Commit dd7c770

Browse files
committed
Bleh
1 parent 03b2b28 commit dd7c770

File tree

1 file changed

+108
-0
lines changed

1 file changed

+108
-0
lines changed

regional_mom6/validate.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
"""
2+
MOM6 requires NetCDF files to be in a very specific format to pass validation, including fill value and missing value attributes. This module is designed to accept input files and warn users of potential issues with their files
3+
If you can, leave proof in the form of the exact lines of Fortran code where this is required!
4+
5+
"""
6+
7+
from pathlib import Path
8+
import xarray as xr
9+
from .utils import setup_logger
10+
11+
logger = setup_logger(__name__)
12+
13+
14+
def get_file(file: Path | xr.Dataset):
15+
"""accept a filepath or xarray dataset and return the xarray dataset"""
16+
if type(file) == xr.Dataset:
17+
return file
18+
else:
19+
return xr.open_dataset(file)
20+
21+
22+
def check(condition, warning):
23+
condition or logger.warn(warning)
24+
25+
26+
# Individual validation rule functions
27+
def _check_fill_value(da: xr.DataArray):
28+
"""Check that fill values are set correctly"""
29+
check("_FillValue" in da.attrs, f"{var_name} does not have a FillValue attribute")
30+
31+
check(
32+
not np.isnan(da.attrs["_FillValue"]),
33+
f"Fill Value for variable {var_name} is NaN (normally not wanted)",
34+
)
35+
36+
37+
def _check_coordinates(ds: xr.Dataset, var_name: str):
38+
"""Check that missing values are set correctly"""
39+
40+
assert var_name in ds
41+
check(
42+
"coordinates" in ds[var_name].attrs,
43+
f"{var_name} does not have a coordinates attribute",
44+
)
45+
46+
coordinates = ds[var_name].attrs["coordinates"]
47+
coordinates = coordinates.strip(" ")
48+
for coord in coordinates:
49+
check(coord in ds, f"Coordinate {coord} for variable {var_name} does not exist")
50+
51+
52+
def _check_required_dimensions(da: xr.DataArray, surface=False):
53+
"""Check that required dimensions exist"""
54+
if not surface:
55+
check(len(da.dims) == 4, f"Variable {da.name} does not have 4 dimensions")
56+
else:
57+
check(
58+
len(da.dims) == 3, f"Surface Variable {da.name} does not have 3 dimensions"
59+
)
60+
61+
62+
def validate_obc_file(
63+
file: Path | xr.Dataset, variable_names: list, encoding_dict={}, surface_var="eta"
64+
):
65+
"""Validate boundary condition file specifically (requires additional segment number validation)"""
66+
ds = get_file(file)
67+
68+
# Check individual data variable specifications (nothing that starts with dz)
69+
print(
70+
"This function identifies variables by if they have the word 'segment' in the name and don't start with nz,dz,lon,lat."
71+
)
72+
73+
for var in variable_names:
74+
75+
# check variable name format
76+
check(
77+
ends_with_3_digits(var),
78+
f"Variable {var} does not end with a 3 digit number. OBC file variables must end with a number",
79+
)
80+
check(
81+
"segment" in var,
82+
f"Variable {var} does not end with a 3 digit number. OBC file variables must end with a number",
83+
)
84+
85+
# Add encodings
86+
if var in encoding_dict:
87+
for key, value in encoding_dict[var].item():
88+
ds[var].attrs[key] = value
89+
90+
# Check if there is a non-NaN fill value
91+
_check_fill_value(ds[var])
92+
93+
# check coordinates
94+
_check_coordinates(ds, var_name=var)
95+
96+
# Check the correct number of dimensions
97+
_check_required_dimensions(ds[var], surface=(var == surface_var)) # just two
98+
99+
# Check for thickness variable
100+
if var != surface_var:
101+
check(
102+
f"dz_{var}" in ds,
103+
f"Cannot find thickness variable for var {var}, it should be of the form dz_{var}",
104+
)
105+
106+
107+
def ends_with_3_digits(s: str) -> bool:
108+
return bool(re.search(r"\d{3}$", s))

0 commit comments

Comments
 (0)