diff --git a/climate_toolbox/io/io.py b/climate_toolbox/io/io.py index c7698d4..df6c1d3 100644 --- a/climate_toolbox/io/io.py +++ b/climate_toolbox/io/io.py @@ -3,69 +3,113 @@ from climate_toolbox.utils.utils import * -def standardize_climate_data(ds): - """ - Read climate data and standardize units to: - - lon and lat, - - lon to -180 to 180 and +def load_climate_data(data_type, file_path): + """ load_climate_data(data_type, file_path) + Read and prepare climate data + + :param data_type: str + datatype to be read, supported types: + bcsd, gmfd, best, era5 - Parameters - ---------- - ds: xr.Dataset + :param file_path: str + File path - Returns - ------- - xr.Dataset + :return: ds: xr.Dataset + xarray dataset loaded in memory """ - ds = rename_coords_to_lon_and_lat(ds) - ds = convert_lons_split(ds, lon_name='lon') + return _load_climate_data( + _find_loader(data_type), + file_path + ) + + +def _load_climate_data(loader, file_path): + with xr.open_dataset(file_path) as ds: + ds.load() + + return loader(ds) - return ds +def load_min_max_temperatures(data_type, file_path_tmin, file_path_tmax): + """ load_min_max_temperatures(data_type, file_path_tmin, file_path_tmax) -def load_bcsd(fp, varname, lon_name='lon', broadcast_dims=('time',)): + :param data_type: str + datatype to be read, supported types: + bcsd, gmfd, best, era5 + + :param file_path_tmin: path for min temperature + :param file_path_tmax: path for max temperature + :return: + ds_tasmax: xr.Dataset, ds_tasmin: xr.Dataset """ - Read and prepare climate data - After reading data, this method also fills NA values using linear - interpolation, and standardizes longitude to -180:180 + return _load_min_max_temperatures( + _find_loader(data_type), + file_path_tmin, + file_path_tmax + ) - Parameters - ---------- - fp: str - File path or dataset - varname: str - Variable name to be read +def _load_min_max_temperatures(loader, file_path_tmin, file_path_tmax): + with xr.open_dataset(file_path_tmin) as ds_tasmin: + ds_tasmin.load() + with xr.open_dataset(file_path_tmax) as ds_tasmax: + ds_tasmax.load() - lon_name : str, optional - Name of the longitude dimension (defualt selects from ['lon' or - 'longitude']) + return loader(ds_tasmin), loader(ds_tasmax) - Returns - ------- - xr.Dataset - xarray dataset loaded into memory - """ - if lon_name is not None: - lon_names = [lon_name] +def _find_loader(data_type): + """ Helper function to find climate data loader """ - if hasattr(fp, 'sel_points'): - ds = fp + data_type = data_type.lower() + if 'bcsd' in data_type: + loader = load_bcsd + elif 'gmfd' in data_type: + loader = load_gmfd + elif 'best' in data_type: + loader = load_best + elif 'era' in data_type: + loader = load_era5 else: - with xr.open_dataset(fp) as ds: - ds.load() + raise TypeError("'" + data_type + "' not supported. Supported data " + "types are: NASA BCSD, GMFD, BEST, ERA5.") + return loader + - return standardize_climate_data(ds) +def standardize_climate_data(ds): + """ standardize_climate_data(ds) + Standardize climate data units to: + - lon and lat, + - lon to -180 to 180 and + + :param ds: xr.Dataset + :return: ds: xr.Dataset + """ + ds = rename_coords_to_lon_and_lat(ds) + ds = convert_lons_split(ds, lon_name='lon') -def load_gmfd(fp, varname, lon_name='lon', broadcast_dims=('time',)): - pass + return ds + + +def load_bcsd(ds): + return ds -def load_best(fp, varname, lon_name='lon', broadcast_dims=('time',)): +def load_gmfd(ds): + if 'tmin' in ds.data_vars or 'tmax' in ds.data_vars: + return standardize_climate_data(ds) + if 'lat' not in ds.coords or 'lon' not in ds.coords: + ds = rename_coords_to_lon_and_lat(ds) + return convert_lons_split(ds, lon_name="lon") + + +def load_best(): pass + +def load_era5(): + pass