|
| 1 | +""" |
| 2 | +MOM6 requires NetCDF files to be in a very specific format to pass validation, including fill value and missing value attributes. This module is designed to accept input files and warn users of potential issues with their files |
| 3 | +If you can, leave proof in the form of the exact lines of Fortran code where this is required! |
| 4 | +
|
| 5 | +""" |
| 6 | + |
| 7 | +from pathlib import Path |
| 8 | +import xarray as xr |
| 9 | +from .utils import setup_logger |
| 10 | + |
| 11 | +logger = setup_logger(__name__) |
| 12 | + |
| 13 | + |
| 14 | +def get_file(file: Path | xr.Dataset): |
| 15 | + """accept a filepath or xarray dataset and return the xarray dataset""" |
| 16 | + if type(file) == xr.Dataset: |
| 17 | + return file |
| 18 | + else: |
| 19 | + return xr.open_dataset(file) |
| 20 | + |
| 21 | + |
| 22 | +def check(condition, warning): |
| 23 | + condition or logger.warn(warning) |
| 24 | + |
| 25 | + |
| 26 | +# Individual validation rule functions |
| 27 | +def _check_fill_value(da: xr.DataArray): |
| 28 | + """Check that fill values are set correctly""" |
| 29 | + check("_FillValue" in da.attrs, f"{var_name} does not have a FillValue attribute") |
| 30 | + |
| 31 | + check( |
| 32 | + not np.isnan(da.attrs["_FillValue"]), |
| 33 | + f"Fill Value for variable {var_name} is NaN (normally not wanted)", |
| 34 | + ) |
| 35 | + |
| 36 | + |
| 37 | +def _check_coordinates(ds: xr.Dataset, var_name: str): |
| 38 | + """Check that missing values are set correctly""" |
| 39 | + |
| 40 | + assert var_name in ds |
| 41 | + check( |
| 42 | + "coordinates" in ds[var_name].attrs, |
| 43 | + f"{var_name} does not have a coordinates attribute", |
| 44 | + ) |
| 45 | + |
| 46 | + coordinates = ds[var_name].attrs["coordinates"] |
| 47 | + coordinates = coordinates.strip(" ") |
| 48 | + for coord in coordinates: |
| 49 | + check(coord in ds, f"Coordinate {coord} for variable {var_name} does not exist") |
| 50 | + |
| 51 | + |
| 52 | +def _check_required_dimensions(da: xr.DataArray, surface=False): |
| 53 | + """Check that required dimensions exist""" |
| 54 | + if not surface: |
| 55 | + check(len(da.dims) == 4, f"Variable {da.name} does not have 4 dimensions") |
| 56 | + else: |
| 57 | + check( |
| 58 | + len(da.dims) == 3, f"Surface Variable {da.name} does not have 3 dimensions" |
| 59 | + ) |
| 60 | + |
| 61 | + |
| 62 | +def validate_obc_file( |
| 63 | + file: Path | xr.Dataset, variable_names: list, encoding_dict={}, surface_var="eta" |
| 64 | +): |
| 65 | + """Validate boundary condition file specifically (requires additional segment number validation)""" |
| 66 | + ds = get_file(file) |
| 67 | + |
| 68 | + # Check individual data variable specifications (nothing that starts with dz) |
| 69 | + print( |
| 70 | + "This function identifies variables by if they have the word 'segment' in the name and don't start with nz,dz,lon,lat." |
| 71 | + ) |
| 72 | + |
| 73 | + for var in variable_names: |
| 74 | + |
| 75 | + # check variable name format |
| 76 | + check( |
| 77 | + ends_with_3_digits(var), |
| 78 | + f"Variable {var} does not end with a 3 digit number. OBC file variables must end with a number", |
| 79 | + ) |
| 80 | + check( |
| 81 | + "segment" in var, |
| 82 | + f"Variable {var} does not end with a 3 digit number. OBC file variables must end with a number", |
| 83 | + ) |
| 84 | + |
| 85 | + # Add encodings |
| 86 | + if var in encoding_dict: |
| 87 | + for key, value in encoding_dict[var].item(): |
| 88 | + ds[var].attrs[key] = value |
| 89 | + |
| 90 | + # Check if there is a non-NaN fill value |
| 91 | + _check_fill_value(ds[var]) |
| 92 | + |
| 93 | + # check coordinates |
| 94 | + _check_coordinates(ds, var_name=var) |
| 95 | + |
| 96 | + # Check the correct number of dimensions |
| 97 | + _check_required_dimensions(ds[var], surface=(var == surface_var)) # just two |
| 98 | + |
| 99 | + # Check for thickness variable |
| 100 | + if var != surface_var: |
| 101 | + check( |
| 102 | + f"dz_{var}" in ds, |
| 103 | + f"Cannot find thickness variable for var {var}, it should be of the form dz_{var}", |
| 104 | + ) |
| 105 | + |
| 106 | + |
| 107 | +def ends_with_3_digits(s: str) -> bool: |
| 108 | + return bool(re.search(r"\d{3}$", s)) |
0 commit comments