diff --git a/climate_toolbox/__init__.py b/climate_toolbox/__init__.py index 060088e..ccccd5d 100644 --- a/climate_toolbox/__init__.py +++ b/climate_toolbox/__init__.py @@ -1,7 +1,22 @@ # -*- coding: utf-8 -*- - """Top-level package for climate_toolbox.""" +from __future__ import absolute_import + __author__ = """Justin Simcock""" __email__ = 'jsimcock@rhg.com' __version__ = '0.1.5' + +import climate_toolbox.aggregations +import climate_toolbox.geo +import climate_toolbox.io +import climate_toolbox.transformations +import climate_toolbox.utils + +__all__ = [ + 'climate_toolbox.aggregations', + 'climate_toolbox.geo', + 'climate_toolbox.io', + 'climate_toolbox.transformations', + 'climate_toolbox.utils', +] diff --git a/climate_toolbox/aggregations/aggregations.py b/climate_toolbox/aggregations/aggregations.py index 7ca995b..1c11b5b 100644 --- a/climate_toolbox/aggregations/aggregations.py +++ b/climate_toolbox/aggregations/aggregations.py @@ -1,3 +1,6 @@ + +from __future__ import absolute_import + import xarray as xr import numpy as np import pandas as pd diff --git a/climate_toolbox/climate_toolbox.py b/climate_toolbox/climate_toolbox.py index 68af849..4b1ee27 100644 --- a/climate_toolbox/climate_toolbox.py +++ b/climate_toolbox/climate_toolbox.py @@ -2,3 +2,92 @@ This file describes the process for computing weighted climate data """ +from __future__ import absolute_import + +import climate_toolbox.io +import logging + +logger = logging.getLogger('climate_toolbox') + +def transform_and_weighted_aggregate_climate_data( + climate_data_loader=None, + loader_kwargs=None, + transform=None, + extra_transform_kwargs=None, + aggregator=None, + extra_aggregation_kwargs=None, + validate=None, + extra_validation_kwargs=None, + writers=None, + extra_writer_kwargs=None, + metadata=None, + assign_attrs_list=None, + interactive=False, + logger=None, + **iteration_kwargs): + ''' + + ''' + logger.info('starting') + + if climate_data_loader is None: + climate_data_loader = ( + climate_toolbox.io.load_and_standardize_climate_dataset_from_pattern) + + if loader_kwargs is None: + loader_kwargs = {} + + if transform is None: + transform = lambda x, *args, **kwargs: x + + if extra_transform_kwargs is None: + extra_transform_kwargs = {} + + if aggregator is None: + aggregator = lambda x, *args, **kwargs: x + + if extra_aggregation_kwargs is None: + extra_aggregation_kwargs = {} + + if validator is None: + validator = lambda x, *args, **kwargs: None + + if extra_validation_kwargs is None: + extra_validation_kwargs = {} + + if writers is None: + writers = [lambda x, *args, **kwargs: None] + + if extra_writer_kwargs is None: + extra_writer_kwargs = {} + + if assign_attrs_list is None: + assign_attrs_list = [] + + logger.debug('loading climate data') + data = climate_data_loader(**loader_kwargs, **iteration_kwargs) + + logger.debug('transforming climate data') + data = transform(data, **extra_transform_kwargs, **iteration_kwargs) + + logger.debug('aggregating transformed data') + data = aggregator(data, **extra_aggregation_kwargs, **iteration_kwargs) + + logger.debug('validating results') + data = validate(data, **extra_validation_kwargs, **iteration_kwargs) + + logger.debug('updating metadata') + if metadata is not None: + data.attrs.update({ + k: v + for k, v in dict(**metadata, **iteration_kwargs).items() + if k in assign_attrs_list}) + + if interactive: + return data + + logger.debug('writing results') + for wi, writer in enumerate(writers): + writer(data, **extra_writer_kwargs[wi], **iteration_kwargs) + + logger.info('done') diff --git a/climate_toolbox/geo/distance.py b/climate_toolbox/geo/distance.py index 51600bd..9150ef1 100644 --- a/climate_toolbox/geo/distance.py +++ b/climate_toolbox/geo/distance.py @@ -1,3 +1,6 @@ + +from __future__ import absolute_import + import numpy as np # model major (km) minor (km) flattening ELLIPSOIDS = {'WGS-84': (6378.137, 6356.7523142, 1 / 298.257223563), diff --git a/climate_toolbox/io/__init__.py b/climate_toolbox/io/__init__.py index 3671680..4f64c17 100644 --- a/climate_toolbox/io/__init__.py +++ b/climate_toolbox/io/__init__.py @@ -1 +1,6 @@ -from .io import standardize_climate_data, load_bcsd + +from __future__ import absolute_import + +from climate_toolbox.io.load import load_and_standardize_climate_dataset +import climate_toolbox.io.sources as sources +import climate_toolbox.io.write as write diff --git a/climate_toolbox/io/io.py b/climate_toolbox/io/io.py deleted file mode 100644 index c7698d4..0000000 --- a/climate_toolbox/io/io.py +++ /dev/null @@ -1,71 +0,0 @@ -import xarray as xr - -from climate_toolbox.utils.utils import * - - -def standardize_climate_data(ds): - """ - Read climate data and standardize units to: - - lon and lat, - - lon to -180 to 180 and - - Parameters - ---------- - ds: xr.Dataset - - Returns - ------- - xr.Dataset - """ - - ds = rename_coords_to_lon_and_lat(ds) - ds = convert_lons_split(ds, lon_name='lon') - - return ds - - -def load_bcsd(fp, varname, lon_name='lon', broadcast_dims=('time',)): - """ - Read and prepare climate data - - After reading data, this method also fills NA values using linear - interpolation, and standardizes longitude to -180:180 - - Parameters - ---------- - fp: str - File path or dataset - - varname: str - Variable name to be read - - lon_name : str, optional - Name of the longitude dimension (defualt selects from ['lon' or - 'longitude']) - - Returns - ------- - xr.Dataset - xarray dataset loaded into memory - """ - - if lon_name is not None: - lon_names = [lon_name] - - if hasattr(fp, 'sel_points'): - ds = fp - - else: - with xr.open_dataset(fp) as ds: - ds.load() - - return standardize_climate_data(ds) - - -def load_gmfd(fp, varname, lon_name='lon', broadcast_dims=('time',)): - pass - - -def load_best(fp, varname, lon_name='lon', broadcast_dims=('time',)): - pass - diff --git a/climate_toolbox/io/load.py b/climate_toolbox/io/load.py new file mode 100644 index 0000000..2179da9 --- /dev/null +++ b/climate_toolbox/io/load.py @@ -0,0 +1,210 @@ + +from __future__ import absolute_import + +import xarray as xr + +from climate_toolbox.utils.utils import ( + rename_coords_to_lon_and_lat, + convert_lons_split, +) + +import climate_toolbox.io.sources as sources + +def standardize_climate_data( + ds, lon_name='lon', lat_name='lat', source=None): + """ + Standardize dimensions and units for common surface climate data variables + + Standardizes: + - dimension names: lon and lat + - lon to -180 to 180 and + - for known sources, drops length 0 or 1 z/alt/pres dimension + - for known sources, standardizes variable names and units for tavg, + tmin, tmax, & precip + + Parameters + ---------- + ds: xr.Dataset + lon_name + + Returns + ------- + xr.Dataset + """ + + if source is not None: + ds = sources.standardize_source_data(ds, source=source) + else: + ds = rename_coords_to_lon_and_lat(ds) + ds = convert_lons_split(ds, lon_name=lon_name) + + return ds + + +def load_and_standardize_climate_dataset( + ds, lon_name=None, lat_name=None, source=None): + """ + Read and prepare climate data + + After reading data, this method also fills NA values using linear + interpolation, and standardizes longitude to -180:180 + + Parameters + ---------- + fp: str + File path or dataset + lon_name : str, optional + Name of the longitude dimension, which will be standardized to -180 to + 180 and renamed "lon" (defualt selects from ['lon', 'lng', 'long', or + 'longitude']) + lat_name : str, optional + Name of the latitude dimension, which will be renamed "lat" (default) + selects from ['lat', 'latitude'] + source: str, optional + Source name, used to standardize variable names for data from known + sources. See climate_toolbox.io.sources.KNOWN_SOURCES for a list of + available sources. + + Returns + ------- + xr.Dataset + xarray dataset loaded into memory + """ + + if isinstance(fp, (xr.Dataset, xr.DataArray)): + ds = fp + + else: + with xr.open_dataset(fp) as ds: + ds.load() + + return standardize_climate_data( + ds, lon_name=lon_name, lat_name=lat_name, source=source) + +def load_and_standardize_multiple_climate_datasets( + file_specs, + lon_name=None, + lat_name=None, + source=None): + """ + Read and prepare multiple climate datasets + + After reading data, this method also standardizes dimension names and + variable names for datasets from known sources. + + Parameters + ---------- + file_specs: dict + Dictionary of {variable name: filepath or Dataset} pairs that will be + read in (if a path) and standardized. + lon_name : str, optional + Name of the longitude dimension, which will be standardized to -180 to + 180 and renamed "lon" (defualt selects from ['lon', 'lng', 'long', or + 'longitude']) + lat_name : str, optional + Name of the latitude dimension, which will be renamed "lat" (default) + selects from ['lat', 'latitude'] + source: str, optional + Source name, used to standardize variable names for data from known + sources. See climate_toolbox.io.sources.KNOWN_SOURCES for a list of + available sources. + + Returns + ------- + dict + Dictionary with variable name, Dataset pairs, with the Datasets loaded + into memory and reformatted. + + Examples + -------- + + >>> load_and_standardize_multiple_climate_datasets( + ... { + ... 'tasmin': '/shares/data/tasmin.nc', + ... 'tasmax': '/shares/data/tasmax.nc'}, + ... source='NASA-NEX/GDDP') # doctest: +SKIP + ... + {'tasmin': , 'tasmax': } + """ + + res = {} + + for varname, path in file_specs.items(): + if isinstance(path, (xr.Dataset, xr.DataArray)): + ds = path + + else: + with xr.open_dataset(path) as ds: + ds.load() + + res[varname] = standardize_climate_data( + ds, lon_name=lon_name, lat_name=lat_name, source=source) + + return res + +def load_and_standardize_climate_dataset_from_pattern( + pattern, lon_name=None, lat_name=None, source=None, **kwargs): + + return load_and_standardize_climate_dataset( + pattern.format(**kwargs), + lon_name=lon_name, + lat_name=lat_name, + source=source) + +def load_and_standardize_multiple_climate_datasets_from_patterns( + patterns, lon_name=None, lat_name=None, source=None, **kwargs): + """ + Read and prepare multiple climate datasets + + After reading data, this method also standardizes dimension names and + variable names for datasets from known sources. + + Parameters + ---------- + patterns: dict + Dictionary of {variable name: string file pattern} pairs that will be + populated with keyword arguments and read in, then standardized. + lon_name : str, optional + Name of the longitude dimension, which will be standardized to -180 to + 180 and renamed "lon" (defualt selects from ['lon', 'lng', 'long', or + 'longitude']) + lat_name : str, optional + Name of the latitude dimension, which will be renamed "lat" (default) + selects from ['lat', 'latitude'] + source: str, optional + Source name, used to standardize variable names for data from known + sources. See climate_toolbox.io.sources.KNOWN_SOURCES for a list of + available sources. + + **kwargs used to populate file patterns + + Returns + ------- + dict + Dictionary with variable name, Dataset pairs, with the Datasets loaded + into memory and reformatted. + + Examples + -------- + + >>> load_and_standardize_multiple_climate_datasets_from_patterns( + ... { + ... 'tasmin': '/shares/data/tasmin/{rcp}/{model}/{year}.nc', + ... 'tasmax': '/shares/data/tasmax/{rcp}/{model}/{year}.nc'}, + ... source='NASA-NEX/GDDP', + ... rcp='rcp85', + ... model='CCSM4', + ... year=2005) # doctest: +SKIP + ... + {'tasmin': , 'tasmax': } + """ + + res = { + k: load_and_standardize_climate_dataset( + pattern.format(**kwargs), + lon_name=lon_name, + lat_name=lat_name, + source=source) + for k, pattern in patterns.items()} + + return res diff --git a/climate_toolbox/io/sources.py b/climate_toolbox/io/sources.py new file mode 100644 index 0000000..b7dc221 --- /dev/null +++ b/climate_toolbox/io/sources.py @@ -0,0 +1,128 @@ + +from __future__ import absolute_import + +from climate_toolbox.utils.utils import ( + rename_coords_to_lon_and_lat, + convert_lons_split, + convert_kelvin_to_celsius, +) + +__all__ = ['KNOWN_SOURCES', 'standardize_source_data'] + +def standardize_berkeley_earth_data(ds): + raise NotImplementedError( + "needs work to combine climatology and temperature variables") + +def standardize_kelvin_temperature_data(ds): + ''' + Convert kelvin to degrees C and assert reasonable surface temperature bounds + ''' + + if 'tas' in ds.data_vars: + all_values_null_or_valid = ( + ((ds['tas'] > -55 + 273.15) & (ds['tas'] < 50 + 273.15)) + | (ds['tas'].isnull())).all() + + assert all_values_null_or_valid, "values outside range [-55, 55] C" + + # convert to Celsius + convert_kelvin_to_celsius(ds, 'tas') + + if 'tasmin' in ds.data_vars: + all_values_null_or_valid = ( + ((ds['tasmin'] > -75 + 273.15) & (ds['tasmin'] < 40 + 273.15)) + | (ds['tasmin'].isnull())).all() + + assert all_values_null_or_valid, "values outside range [-55, 55] C" + + # convert to Celsius + convert_kelvin_to_celsius(ds, 'tasmin') + + if 'tasmax' in ds.data_vars: + all_values_null_or_valid = ( + ((ds['tasmax'] > -45 + 273.15) & (ds['tasmax'] < 55 + 273.15)) + | (ds['tasmax'].isnull())).all() + + assert all_values_null_or_valid, "values outside range [-55, 55] C" + + # convert to Celsius + convert_kelvin_to_celsius(ds, 'tasmax') + +def standardize_global_meterological_forcing_dataset_v1(ds): + + # standardize dimension names + ds = ds.rename({'longitude': 'lon', 'latitude': 'lat'}) + + # remove length-1 vertical dimension + assert len(ds.z) <= 1, 'expected length 1 z dimention' + ds = ds.isel(z=0, drop=True) + + # standardize variable names + ds = ds.rename({ + 'tmax': 'tasmax', + 'tmin': 'tasmin', + 'tavg': 'tas', + 'prcp': 'precip'}) + + standardize_kelvin_temperature_data(ds) + + return ds + +def standardize_global_meterological_forcing_dataset_v3(ds): + + assert len(ds.dims) == 3, 'expected 3 dimensions: (lon, lat, time)' + + for dim in ['lon', 'lat', 'time']: + assert dim in ds.dims, "dimension {} not found".format(dim) + + # standardize variable names + ds = ds.rename({ + 'tmax': 'tasmax', + 'tmin': 'tasmin', + 'tavg': 'tas', + 'prcp': 'precip'}) + + standardize_kelvin_temperature_data(ds) + + return ds + +def standardize_nasa_nex_gddp_dataset(ds): + raise NotImplementedError( + "Mike couldn't find these on sacagawea and got lazy") + +def standardize_climate_impact_lab_smme_pattern_scaled_nasa_nex_dataset(ds): + raise NotImplementedError( + "Mike couldn't find these on sacagawea and got lazy") + +_source_standardizer_lookup = { + 'BerkeleyEarth': standardize_berkeley_earth_data, + 'GMFDv1': standardize_global_meterological_forcing_dataset_v1, + 'GMFDv3': standardize_global_meterological_forcing_dataset_v3, + 'NASA-NEX/GDDP': standardize_nasa_nex_gddp_dataset, + 'CIL-SMME-NASA/NEX-GDDP': ( + standardize_climate_impact_lab_smme_pattern_scaled_nasa_nex_dataset), +} + +KNOWN_SOURCES = list(_source_standardizer_lookup.keys()) + +def standardize_source_data(ds, source): + ''' + Calls source-specific functions which reformat climate data to a standard format + + Parameters + ---------- + ds : Dataset + :py:class:`xarray.Dataset` to standardize + source: str + Known source with an associated standardization function. See + climate_toolbox.io.sources.KNOWN_SOURCES for a list of available + sources. + + Returns + ------- + ds : Dataset + Standardized dataset + ''' + + ds = _source_standardizer_lookup[source](ds) + return ds diff --git a/climate_toolbox/io/write.py b/climate_toolbox/io/write.py new file mode 100644 index 0000000..c405b7a --- /dev/null +++ b/climate_toolbox/io/write.py @@ -0,0 +1,78 @@ + +from __future__ import absolute_import + +import os + +def netcdf_writer(ds, path, **unused_kwargs): + ''' write to a NetCDF dataset ''' + + os.makedirs(os.path.dirname(write_file), exist_ok=True) + + # Coerce attributes to string (required by NetCDF spec) + ds = ds.copy(deep=False) + ds.attrs.update({k: str(v) for k, v in ds.attrs.items()}) + for k, v in ds.data_vars.items(): + v.attrs.update({k: str(v) for k, v in v.attrs.items()}) + + ds.to_netcdf(path) + + +def netcdf_write_to_pattern(ds, pattern, **pattern_kwargs): + netcdf_writer(ds, pattern.format(**pattern_kwargs)) + + +def atomic_netcdf_writer(ds, path, metadata, temp_path=None, **unused_kwargs): + ''' + atomic write to a NetCDF dataset + + Safe version of climate_toolbox.io.write.netcdf_writer, in which the + Dataset will be written to a temporary location and then moved into the + final destination on write completion. This ensures a complete write + when working in unstable or preemptable computing environments. + + Note that the user is responsible for checking to ensure all writes were + successful and for deleting any incomplete writes at the temp_path + location. + + Parameters + ---------- + ds : Dataset + :py:class:`xarray.Dataset` object to write + path : str + Filepath of write destination + temp_path : str, optional + Filepath of temporary write destination. Default uses `path + '~'`. + + ''' + + if temp_path is None: + temp_path = path + '~' + + # Coerce attributes to string (required by NetCDF spec) + ds = ds.copy(deep=False) + ds.attrs.update({k: str(v) for k, v in ds.attrs.items()}) + for k, v in ds.data_vars.items(): + v.attrs.update({k: str(v) for k, v in v.attrs.items()}) + + ds.to_netcdf(temp_path) + os.rename(temp_path, path) + + +def atomic_netcdf_write_to_pattern(ds, pattern, **pattern_kwargs): + atomic_netcdf_writer(ds, pattern.format(**pattern_kwargs)) + + +def fgh_header_writer(ds, path, **unused_kwargs): + + metacsv.to_header( + path, + attrs=dict(ds.attrs), + variables={v: dict(ds[v].attrs) for v in ds.data_vars.keys()}) + + +def fgh_header_write_to_pattern(ds, pattern, **pattern_kwargs): + fgh_header_writer(ds, pattern.format(**pattern_kwargs)) + + +def csv_writer(ds, path, metadata, **unused_kwargs): + raise NotImplementedError diff --git a/climate_toolbox/transformations/transformations.py b/climate_toolbox/transformations/transformations.py index 1a40a21..64738c7 100644 --- a/climate_toolbox/transformations/transformations.py +++ b/climate_toolbox/transformations/transformations.py @@ -1,3 +1,6 @@ + +from __future__ import absolute_import + import xarray as xr import numpy as np @@ -5,7 +8,7 @@ remove_leap_days, convert_kelvin_to_celsius -def snyder_edd(tasmin, tasmax, threshold): +def snyder_edd(tasmin_tasmax, threshold, **unused_kwargs): r""" Snyder exceedance degree days/cooling degree days @@ -43,11 +46,12 @@ def snyder_edd(tasmin, tasmax, threshold): Parameters ---------- - tasmin : xarray.DataArray - Daily minimum temperature (degrees C) - - tasmax : xarray.DataArray - Daily maximum temperature (degrees C) + tasmin_tasmax : tuple of xarray.Datasets + tuple containing two :py:class:`xarray.DataArray` objects, in the order + ``(tasmin, tasmax)``. tasmin should contain a variable ``tasmin`` with + daily minimum surface air temperature and tasmax should contain a + variable ``tasmax`` with daily minimum surface temperature. The units + should be the same as the threshold. threshold : int, float, xarray.DataArray Threshold (degrees C) @@ -60,6 +64,9 @@ def snyder_edd(tasmin, tasmax, threshold): """ + tasmin = tasmin_tasmax[0].tasmin + tasmax = tasmin_tasmax[1].tasmax + # Check for unit agreement assert tasmin.units == tasmax.units @@ -69,7 +76,7 @@ def snyder_edd(tasmin, tasmax, threshold): # compute useful quantities for use in the transformation snyder_mean = ((tasmax + tasmin)/2) snyder_width = ((tasmax - tasmin)/2) - snyder_theta = xr.ufuncs.arcsin((threshold - snyder_mean)/snyder_width) + snyder_theta = np.arcsin((threshold - snyder_mean)/snyder_width) # the trasnformation is computed using numpy arrays, taking advantage of # numpy's second where clause. Note that in the current dev build of @@ -85,12 +92,23 @@ def snyder_edd(tasmin, tasmax, threshold): snyder_mean - threshold) res.attrs['units'] = ( - 'degreedays_{}{}'.format(threshold, tasmax.attrs['units'])) + 'degreedays_{}{}'.format(threshold, tasmax.attrs.get('units', ''))) + + out_ds = xr.Dataset({'edd': res}) + + out_ds['edd'].attrs['long_title'] = 'Exceedance degree days' + out_ds['edd'].attrs['description'] = ( + 'Exceedance degree days computed using the synder diurnal cycle ' + 'interpolation method, computed above a threshold of ' + '{threshold}{units}.' + .format( + threshold=threshold, + units=tasmax.attrs.get('units', ''))) - return res + return out_ds -def snyder_gdd(tasmin, tasmax, threshold_low, threshold_high): +def snyder_gdd(tasmin_tasmax, threshold_low, threshold_high, **unused_kwargs): r""" Snyder growing degree days @@ -110,17 +128,17 @@ def snyder_gdd(tasmin, tasmax, threshold_low, threshold_high): Parameters ---------- - tasmin : xarray.DataArray - Daily minimum temperature (degrees C) - - tasmax : xarray.DataArray - Daily maximum temperature (degrees C) + tasmin_tasmax : tuple of xarray.DataArrays + tuple containing two :py:class:`xarray.DataArray` objects, in the order + ``(tasmin, tasmax)``. tasmin should contain daily minimum surface air + temperature and tasmax should contain daily minimum surface air + temperature. The units should be the same as the thresholds. threshold_low : int, float, xarray.DataArray - Lower threshold (degrees C) + Lower threshold (same units as datasets in tasmin_tasmax) threshold_high : int, float, xarray.DataArray - Upper threshold (degrees C) + Upper threshold (same units as datasets in tasmin_tasmax) Returns ------- @@ -130,31 +148,56 @@ def snyder_gdd(tasmin, tasmax, threshold_low, threshold_high): """ + tasmin, tasmax = tasmin_tasmax + # Check for unit agreement - assert tasmin.units == tasmax.units + assert tasmin.tasmin.units == tasmax.tasmax.units res = ( - snyder_edd(tasmin, tasmax, threshold_low) - - snyder_edd(tasmin, tasmax, threshold_high)) + snyder_edd((tasmin, tasmax), threshold_low).edd + - snyder_edd((tasmin, tasmax), threshold_high).edd) res.attrs['units'] = ( - 'degreedays_{}-{}{}'.format(threshold_low, threshold_high, tasmax.attrs['units'])) + 'degreedays_{}-{}{}' + .format(threshold_low, threshold_high, tasmax.tasmax.attrs.get('units', ''))) + out_ds = xr.Dataset({'gdd': res}) - return res + out_ds['gdd'].attrs['long_title'] = 'Growing degree days' + out_ds['gdd'].attrs['description'] = ( + 'Growing degree days computed using the synder diurnal cycle ' + 'interpolation method, computed between thresholds of ' + '{threshold_low}{units} and {threshold_high}{units}.' + .format( + threshold_high=threshold_high, + threshold_low=threshold_low, + units=tasmax.tasmax.attrs.get('units', ''))) + return out_ds + + +def validate_snyder_edd(ds, thresholds, assert_no_nans=False, **unused_kwargs): + ''' + TODO: + This is a CIL-specific validation function. we need a way of + standardizing this. + ''' -def validate_edd_snyder_agriculture(ds, thresholds): msg_null = 'hierid dims do not match 24378' assert ds.hierid.shape == (24378,), msg_null for threshold in thresholds: assert threshold in list(ds.refTemp) + + if assert_no_nans: + for v in ds.data_vars.keys(): + assert ds[v].notnull().all(), "NaNs encountered in {}".format(v) + return -def tas_poly(ds, power, varname): +def tas_poly(ds, power, variable, **unused_kwargs): """ Daily average temperature (degrees C), raised to a power @@ -162,8 +205,6 @@ def tas_poly(ds, power, varname): calendar). """ - powername = ordinal(power) - description = (''' Daily average temperature (degrees C){raised} @@ -174,33 +215,53 @@ def tas_poly(ds, power, varname): ' raised to the {powername} power' .format(powername=powername)))).strip() - ds1 = xr.Dataset() + out_ds = xr.Dataset() # remove leap years ds = remove_leap_days(ds) # do transformation - ds1[varname] = (ds.tas - 273.15)**power + out_ds[variable] = (ds.tas)**power + + # ======================== MOVE THE FOLLOWING TO CIL WRAPPER ============== + # This is the worst and is impactlab specific until brewster can update the + # projection system to handle datetime objects. we should put this in + # climate_transform_specs as a custom writer. # Replace datetime64[ns] 'time' with YYYYDDD int 'day' if ds.dims['time'] > 365: raise ValueError - - ds1.coords['day'] = ds['time.year']*1000 + np.arange(1, len(ds.time)+1) - ds1 = ds1.swap_dims({'time': 'day'}) - ds1 = ds1.drop('time') - - ds1 = ds1.rename({'day': 'time'}) + out_ds.coords['day'] = ds['time.year']*1000 + np.arange(1, len(ds.time)+1) + out_ds = out_ds.swap_dims({'time': 'day'}) + out_ds = out_ds.drop('time') + out_ds = out_ds.rename({'day': 'time'}) + # ======================== MOVE THE ABOVE TO CIL WRAPPER ================== # document variable - ds1[varname].attrs['units'] = ( + out_ds[variable].attrs['units'] = ( 'C^{}'.format(power) if power > 1 else 'C') - ds1[varname].attrs['long_title'] = description.splitlines()[0] - ds1[varname].attrs['description'] = description - ds1[varname].attrs['variable'] = varname + out_ds[variable].attrs['long_title'] = description.splitlines()[0] + out_ds[variable].attrs['description'] = description + out_ds[variable].attrs['variable'] = variable + + return out_ds + + +def validate_tas_poly( + ds, power, valid_tas_range=(-20, 55), assert_no_nans=True): + + mint = ds.tas.sel(lat=slice(-60, 70)).min() + maxt = ds.tas.sel(lat=slice(-60, 70)).max() + + min_msg = "min value {} outside allowed range".format(mint) + assert mint >= min(0, valid_tas_range[0]**power), min_msg + + max_msg = "max value {} outside allowed range".format(mint) + assert maxt <= (valid_tas_range[1]**power), max_msg - return ds1 + if assert_no_nans: + assert ds.tas.notnull().all(), "NaNs found in the data" def ordinal(n): diff --git a/climate_toolbox/utils/utils.py b/climate_toolbox/utils/utils.py index a6e09e5..86cec0d 100644 --- a/climate_toolbox/utils/utils.py +++ b/climate_toolbox/utils/utils.py @@ -2,28 +2,29 @@ Handy functions for standardizing the format of climate data """ +from __future__ import absolute_import + import xarray as xr import numpy as np -def convert_kelvin_to_celsius(df, temp_name): +def convert_kelvin_to_celsius(ds, temp_name): """ Convert Kelvin to Celsius """ - df_attrs = df[temp_name].attrs - df[temp_name] = df[temp_name] - 273.15 + + ds_attrs = ds[temp_name].attrs + ds[temp_name] = ds[temp_name] - 273.15 + # update attrs & unit information - df[temp_name].attrs.update(df_attrs) - df[temp_name].attrs['units'] = 'C' - df[temp_name].attrs['valid_min'] = -108.78788 - df[temp_name].attrs['valid_max'] = 62.02828 + ds[temp_name].attrs.update(ds_attrs) + ds[temp_name].attrs['units'] = 'C' - return df + return ds def convert_lons_mono(ds, lon_name='longitude'): """ Convert longitude from -180-180 to 0-360 """ - ds[lon_name].values = np.where( - ds[lon_name].values < 0, 360 + ds[lon_name].values, ds[lon_name].values - ) + ds[lon_name].values = xr.where( + ds[lon_name] < 0, 360 + ds[lon_name], ds[lon_name]).values # sort the dataset by the new lon values ds = ds.sel(**{lon_name: np.sort(ds[lon_name].values)}) @@ -34,7 +35,7 @@ def convert_lons_mono(ds, lon_name='longitude'): def convert_lons_split(ds, lon_name='longitude'): """ Convert longitude from 0-360 to -180-180 """ ds[lon_name].values = xr.where( - ds[lon_name] > 180, ds[lon_name] - 360, ds[lon_name]) + ds[lon_name] > 180, ds[lon_name] - 360, ds[lon_name]).values # sort the dataset by the new lon values ds = ds.sel(**{lon_name: np.sort(ds[lon_name].values)}) @@ -42,19 +43,30 @@ def convert_lons_split(ds, lon_name='longitude'): return ds -def rename_coords_to_lon_and_lat(ds): - """ Rename Dataset spatial coord names to: - lat, lon +def rename_coords_to_lon_and_lat(ds, lon_name=None, lat_name=None): + """ Rename Dataset spatial coord names to lat, lon """ - if 'latitude' in ds.coords: - ds = ds.rename({'latitude': 'lat'}) - if 'longitude' in ds.coords: - ds = ds.rename({'longitude': 'lon'}) - elif 'long' in ds.coords: - ds = ds.rename({'long': 'lon'}) - - if 'z' in ds.coords: - ds = ds.drop('z').squeeze() + if lat_name is None: + lat_names = ['latitude'] + else: + lat_names = [lat_name] + + for candidate in lat_names: + if candidate == 'lat': + continue + if candidate in ds.coords: + ds = ds.rename({candidate: 'lat'}) + + if lon_name is None: + lon_names = ['lng', 'long', 'longitude'] + else: + lon_names = [lon_name] + + for candidate in lon_names: + if candidate == 'lon': + continue + if candidate in ds.coords: + ds = ds.rename({candidate: 'lon'}) return ds @@ -87,11 +99,6 @@ def season_boundaries(growing_days): """ Returns the sorted start and end date of growing season """ - # the longitude values of the data is off, we need to scale it - growing_days.longitude.values = growing_days.longitude.values - 180 - # we then sort by longitude - growing_days = growing_days.sortby('longitude') - # construct the ds gdd_sorted = xr.DataArray( # xarray has no method to sort along an axis @@ -121,10 +128,10 @@ def get_daily_growing_season_mask(lat, lon, time, growing_days_path): Parameters ---------- - lat: xr.DataArray coords object - lon: xr.DataArray coords object - time: xr.DataArray coords object - growing_days_path: str + lat : DataArray + lon : DataArray + time : DataArray + growing_days_path : str Returns ------- @@ -139,9 +146,10 @@ def get_daily_growing_season_mask(lat, lon, time, growing_days_path): min_day, max_day = season_boundaries(growing_days) data = np.ones((lat.shape[0], lon.shape[0], time.shape[0])) + # create an array of ones in the shape of the data ones = xr.DataArray( - data, coords=[lat, lon, time], dims=['lat', 'lon', 'time']) + data, coords=[lat, lon, time], dims=['lat', 'lon', 'time']) # mask the array around the within calendar year start and end times # of growing season diff --git a/tests/test_climate_toolbox.py b/tests/test_climate_toolbox.py index 39be377..ef39100 100644 --- a/tests/test_climate_toolbox.py +++ b/tests/test_climate_toolbox.py @@ -9,7 +9,7 @@ from climate_toolbox.aggregations.aggregations import \ _reindex_spatial_data_to_regions, _aggregate_reindexed_data_to_regions from climate_toolbox.transformations.transformations import snyder_edd, snyder_gdd -from climate_toolbox.io import * +from climate_toolbox.io.load import standardize_climate_data import numpy as np import pandas as pd @@ -46,7 +46,7 @@ def clim_data(lat, lon, time): np.random.seed(42) temp = np.random.rand(len(lat), len(lon), len(time))*100 - ds = xr.Dataset({'temperature': (['lat', 'lon', 'time'], temp)}, + ds = xr.Dataset({'tas': (['lat', 'lon', 'time'], temp)}, coords={'lon': lon, 'lat': lat, 'time': time}) @@ -56,7 +56,7 @@ def clim_data(lat, lon, time): def test_clim_data(clim_data): - assert not clim_data.temperature.isnull().any() + assert not clim_data.tas.isnull().any() @pytest.fixture @@ -64,11 +64,11 @@ def make_holes(clim_data, lat): N = len(lat) - 1 - tmp = clim_data['temperature'].values + tmp = clim_data['tas'].values array = np.random.randint(0, N, 1) tmp[array] = np.nan - clim_data['temperature'].values = tmp + clim_data['tas'].values = tmp return clim_data @@ -99,11 +99,11 @@ def weights(lat, lon): def test_reindex_spatial_weights(clim_data, weights): - assert not clim_data.temperature.isnull().any() + assert not clim_data.tas.isnull().any() ds = _reindex_spatial_data_to_regions(clim_data, weights) - assert ds.temperature.shape == (len(ds['lon']), len(ds['time'])) + assert ds.tas.shape == (len(ds['lon']), len(ds['time'])) assert 'reshape_index' in ds.dims @@ -111,17 +111,17 @@ def test_weighting(clim_data, weights): assert np.isnan(weights['popwt'].values).any() ds = _reindex_spatial_data_to_regions(clim_data, weights) - assert not ds.temperature.isnull().any() + assert not ds.tas.isnull().any() wtd = _aggregate_reindexed_data_to_regions( - ds, 'temperature', 'popwt', 'ISO', weights) + ds, 'tas', 'popwt', 'ISO', weights) - assert not wtd.temperature.isnull().any() + assert not wtd.tas.isnull().any() wtd = _aggregate_reindexed_data_to_regions( - ds, 'temperature', 'areawt', 'ISO', weights) + ds, 'tas', 'areawt', 'ISO', weights) - assert not wtd.temperature.isnull().any() + assert not wtd.tas.isnull().any() def test_rename_coords_to_lon_and_lat(): @@ -204,9 +204,9 @@ def test_remove_leap_days_with_clim_data(clim_data): def test_convert_kelvin_to_celsius(clim_data): - ds = convert_kelvin_to_celsius(clim_data, 'temperature') + ds = convert_kelvin_to_celsius(clim_data, 'tas') - assert 'C' in ds.data_vars['temperature'].units + assert 'C' in ds.data_vars['tas'].units def test_standardize_climate_data(clim_data): @@ -216,45 +216,43 @@ def test_standardize_climate_data(clim_data): assert 'lat' in coordinates and 'latitude' not in coordinates assert 'lon' in coordinates and 'longitude' not in coordinates - - + + def test_snyder_edd(): ds_tasmax = xr.Dataset( - data_vars={'tmax': ( + data_vars={'tasmax': ( '(latitude, longitude)', [280.4963, 280.7887], {'units': 'K'})}, coords={'latitude': [-33.625, -33.375], 'longitude': [286.125, 286.375]}) ds_tasmin = xr.Dataset( - data_vars={'tmin': ( + data_vars={'tasmin': ( '(latitude, longitude)', [278.902, 278.23163], {'units': 'K'})}, coords={'latitude': [-33.625, -33.375], 'longitude': [286.125, 286.375]}) threshold = 8 res = snyder_edd( - ds_tasmin.tmin, - ds_tasmax.tmax, + (ds_tasmin, ds_tasmax), threshold=273.15 + threshold) - - assert res.units == 'degreedays_281.15K' - assert res.sum().item(0) == 0.0 - + assert res.edd.units == 'degreedays_281.15K' + assert res.edd.sum().item(0) == 0.0 + + def test_snyder_gdd(): ds_tasmax = xr.Dataset( - data_vars={'tmax': ('(latitude, longitude)', [280.4963, 280.7887], {'units': 'K'})}, + data_vars={'tasmax': ('(latitude, longitude)', [280.4963, 280.7887], {'units': 'K'})}, coords={'latitude': [-33.625, -33.375], 'longitude': [286.125, 286.375]}) ds_tasmin = xr.Dataset( - data_vars={'tmin': ('(latitude, longitude)', [278.902, 278.23163], {'units': 'K'})}, + data_vars={'tasmin': ('(latitude, longitude)', [278.902, 278.23163], {'units': 'K'})}, coords={'latitude': [-33.625, -33.375], 'longitude': [286.125, 286.375]}) - + res = snyder_gdd( - ds_tasmin.tmin, - ds_tasmax.tmax, + (ds_tasmin, ds_tasmax), threshold_low=273.15 + 1, threshold_high=273.15 + 8) - - assert not res.units == 'degreedays_281.15K' - assert res.units == 'degreedays_274.15-281.15K' - assert res.sum().item(0) == pytest.approx(11, 0.1) + + assert not res.gdd.units == 'degreedays_281.15K' + assert res.gdd.units == 'degreedays_274.15-281.15K' + assert res.gdd.sum().item(0) == pytest.approx(11, 0.1)