|
| 1 | +# qsub -I -q express -P xv83 -l ncpus=48 -l mem=190GB -l jobfs=4GB -l walltime=0:20:00 -l storage=gdata/xv83+gdata/dk92+gdata/hh5+gdata/xp65+gdata/p73 |
| 2 | + |
| 3 | +# import sys to access script arguments (experiment, ensemble, first_year, last_year) |
| 4 | +import sys |
| 5 | + |
| 6 | +# These are fixed (correspond to my AA simulation) |
| 7 | +model='ACCESS-ESM1-5' |
| 8 | +experiment='historical' |
| 9 | +year_start=1850 |
| 10 | +num_years=10 |
| 11 | +lumpby="month" |
| 12 | + |
| 13 | +# 1. Load packages |
| 14 | + |
| 15 | +# Import os for makedirs/isfile/environ |
| 16 | +import os |
| 17 | +os.environ["PYTHONWARNINGS"] = "ignore" |
| 18 | + |
| 19 | +# Load dask |
| 20 | +from dask.distributed import Client |
| 21 | + |
| 22 | +# Load xarray for N-dimensional arrays |
| 23 | +import xarray as xr |
| 24 | + |
| 25 | +# Load datetime to deal with time formats |
| 26 | +import datetime |
| 27 | + |
| 28 | +# Load traceback to print exceptions |
| 29 | +import traceback |
| 30 | + |
| 31 | +# Import numpy |
| 32 | +import numpy as np |
| 33 | + |
| 34 | +# Load pandas for data manipulation |
| 35 | +import pandas as pd |
| 36 | + |
| 37 | +# 2. Define some functions |
| 38 | +# (to avoid too much boilerplate code) |
| 39 | +print("Defining functions") |
| 40 | + |
| 41 | +def open_my_dataset(paths): |
| 42 | + ds = xr.open_mfdataset( |
| 43 | + paths, |
| 44 | + chunks={'time':-1, 'st_ocean':-1}, |
| 45 | + concat_dim="time", |
| 46 | + compat='override', |
| 47 | + preprocess=None, |
| 48 | + engine='netcdf4', |
| 49 | + data_vars='minimal', |
| 50 | + coords='minimal', |
| 51 | + combine='nested', |
| 52 | + parallel=True, |
| 53 | + join='outer', |
| 54 | + attrs_file=None, |
| 55 | + combine_attrs='override', |
| 56 | + ) |
| 57 | + return ds |
| 58 | + |
| 59 | +def season_climatology(ds): |
| 60 | + # Make a DataArray with the number of days in each month, size = len(time) |
| 61 | + month_length = ds.time.dt.days_in_month |
| 62 | + # Calculate the weights by grouping by 'time.season' |
| 63 | + weights = month_length.groupby("time.season") / month_length.groupby("time.season").sum() |
| 64 | + # Test that the sum of the weights for each season is 1.0 |
| 65 | + np.testing.assert_allclose(weights.groupby("time.season").sum().values, np.ones(4)) |
| 66 | + # Calculate the weighted average |
| 67 | + return (ds * weights).groupby("time.season").sum(dim="time") |
| 68 | + |
| 69 | +def month_climatology(ds): |
| 70 | + # Make a DataArray with the number of days in each month, size = len(time) |
| 71 | + month_length = ds.time.dt.days_in_month |
| 72 | + # Calculate the weights by grouping by 'time.season' |
| 73 | + weights = month_length.groupby("time.month") / month_length.groupby("time.month").sum() |
| 74 | + # Test that the sum of the weights for each month is 1.0 |
| 75 | + np.testing.assert_allclose(weights.groupby("time.month").sum().values, np.ones(12)) |
| 76 | + # Calculate the weighted average |
| 77 | + ds_out = (ds * weights).groupby("time.month").sum(dim="time") |
| 78 | + # Keep track of mean number of days per month |
| 79 | + mean_days_in_month = month_length.groupby("time.month").mean() |
| 80 | + # And assign it to new coordinate |
| 81 | + ds_out = ds_out.assign_coords(mean_days_in_month=('month', mean_days_in_month.data)) |
| 82 | + return ds_out |
| 83 | + |
| 84 | +def climatology(ds, lumpby): |
| 85 | + if lumpby == "month": |
| 86 | + return month_climatology(ds) |
| 87 | + elif lumpby == "season": |
| 88 | + return season_climatology(ds) |
| 89 | + else: |
| 90 | + raise ValueError(f"lumpby has to be month or season") |
| 91 | + |
| 92 | +def time_window_strings(year_start, num_years): |
| 93 | + """ |
| 94 | + return strings for start_time and end_time |
| 95 | + """ |
| 96 | + # start_time is first second of year_start |
| 97 | + start_time = f'{year_start}' |
| 98 | + # end_time is last second of last_year |
| 99 | + end_time = f'{year_start + num_years - 1}' |
| 100 | + # Return the weighted average |
| 101 | + return start_time, end_time |
| 102 | + |
| 103 | +# Create directory on scratch to save the data |
| 104 | +scratchdatadir = '/scratch/xv83/TMIP/data' |
| 105 | +AAdatadir = '/scratch/xv83/bp3051/access-esm/archive/andersonacceleration_test-n10-5415f621/' |
| 106 | +gdatadatadir = '/g/data/xv83/TMIP/data' |
| 107 | + |
| 108 | +# Depends on time window |
| 109 | +start_time, end_time = time_window_strings(year_start, num_years) |
| 110 | +start_time_str = f'Jan{start_time}' |
| 111 | +end_time_str = f'Dec{end_time}' |
| 112 | + |
| 113 | +# Simulation years from AA output |
| 114 | +simyears = range(10) # AA cycles of 10 years |
| 115 | + |
| 116 | +# 4. Load data, preprocess it, and save it to NetCDF |
| 117 | + |
| 118 | +print("Starting client") |
| 119 | + |
| 120 | +# This `if` statement is required in scripts (not required in Jupyter) |
| 121 | +if __name__ == '__main__': |
| 122 | + client = Client(n_workers=44, threads_per_worker=1)#, memory_limit='16GB') # Note: with 1thread/worker cannot plot thetao. Maybe I need to understand why? |
| 123 | + |
| 124 | + # print ensemble/member |
| 125 | + print(f"\nProcessing AA output") |
| 126 | + |
| 127 | + # directory to save the data to (as NetCDF) |
| 128 | + outputdir = f'{scratchdatadir}/{model}/{experiment}/AA/{start_time_str}-{end_time_str}/cyclo{lumpby}' |
| 129 | + print("Creating directory: ", outputdir) |
| 130 | + os.makedirs(outputdir, exist_ok=True) |
| 131 | + print(" averaging data from: ", AAdatadir) |
| 132 | + print(" to be saved in: ", outputdir) |
| 133 | + |
| 134 | + # # umo / tx_trans |
| 135 | + # paths = [f'{AAdatadir}/output00{simyear}/ocean/ocean-3d-tx_trans-1monthly-mean-ym_185{simyear}_01.nc' for simyear in simyears] |
| 136 | + # try: |
| 137 | + # print(" Loading tx_trans") |
| 138 | + # tx_trans_ds = open_my_dataset(paths) |
| 139 | + # print(" selecting time window") |
| 140 | + # tx_trans_ds_sel = tx_trans_ds.sel(time=slice(start_time, end_time)) |
| 141 | + # print(f"Averaging tx_trans over each {lumpby}") |
| 142 | + # tx_trans = climatology(tx_trans_ds_sel["tx_trans"], lumpby) |
| 143 | + # print("\ntx_trans: ", tx_trans) |
| 144 | + # print("Saving tx_trans to: ", f'{outputdir}/umo.nc') |
| 145 | + # tx_trans.to_dataset(name='umo').to_netcdf(f'{outputdir}/umo.nc', compute=True) |
| 146 | + # except Exception: |
| 147 | + # print(f'Error processing AA tx_trans') |
| 148 | + # print(traceback.format_exc()) |
| 149 | + # # vmo / ty_trans |
| 150 | + # paths = [f'{AAdatadir}/output00{simyear}/ocean/ocean-3d-ty_trans-1monthly-mean-ym_185{simyear}_01.nc' for simyear in simyears] |
| 151 | + # try: |
| 152 | + # print(" Loading ty_trans") |
| 153 | + # ty_trans_ds = open_my_dataset(paths) |
| 154 | + # print(" selecting time window") |
| 155 | + # ty_trans_ds_sel = ty_trans_ds.sel(time=slice(start_time, end_time)) |
| 156 | + # print(f"Averaging ty_trans over each {lumpby}") |
| 157 | + # ty_trans = climatology(ty_trans_ds_sel["ty_trans"], lumpby) |
| 158 | + # print("\nty_trans: ", ty_trans) |
| 159 | + # print("Saving ty_trans to: ", f'{outputdir}/vmo.nc') |
| 160 | + # ty_trans.to_dataset(name='vmo').to_netcdf(f'{outputdir}/vmo.nc', compute=True) |
| 161 | + # except Exception: |
| 162 | + # print(f'Error processing AA ty_trans') |
| 163 | + # print(traceback.format_exc()) |
| 164 | + |
| 165 | + # # tx_trans_gm |
| 166 | + # paths = [f'{AAdatadir}/output00{simyear}/ocean/ocean-3d-tx_trans_gm-1monthly-mean-ym_185{simyear}_01.nc' for simyear in simyears] |
| 167 | + # try: |
| 168 | + # print(" Loading tx_trans_gm") |
| 169 | + # tx_trans_gm_ds = open_my_dataset(paths) |
| 170 | + # print(" selecting time window") |
| 171 | + # tx_trans_gm_ds_sel = tx_trans_gm_ds.sel(time=slice(start_time, end_time)) |
| 172 | + # print(f"Averaging tx_trans_gm over each {lumpby}") |
| 173 | + # tx_trans_gm = climatology(tx_trans_gm_ds_sel["tx_trans_gm"], lumpby) |
| 174 | + # print("\ntx_trans_gm: ", tx_trans_gm) |
| 175 | + # print("Saving tx_trans_gm to: ", f'{outputdir}/tx_trans_gm.nc') |
| 176 | + # tx_trans_gm.to_dataset(name='tx_trans_gm').to_netcdf(f'{outputdir}/tx_trans_gm.nc', compute=True) |
| 177 | + # except Exception: |
| 178 | + # print(f'Error processing AA tx_trans_gm') |
| 179 | + # print(traceback.format_exc()) |
| 180 | + # # ty_trans_gm |
| 181 | + # paths = [f'{AAdatadir}/output00{simyear}/ocean/ocean-3d-ty_trans_gm-1monthly-mean-ym_185{simyear}_01.nc' for simyear in simyears] |
| 182 | + # try: |
| 183 | + # print(" Loading ty_trans_gm") |
| 184 | + # ty_trans_gm_ds = open_my_dataset(paths) |
| 185 | + # print(" selecting time window") |
| 186 | + # ty_trans_gm_ds_sel = ty_trans_gm_ds.sel(time=slice(start_time, end_time)) |
| 187 | + # print(f"Averaging ty_trans_gm over each {lumpby}") |
| 188 | + # ty_trans_gm = climatology(ty_trans_gm_ds_sel["ty_trans_gm"], lumpby) |
| 189 | + # print("\nty_trans_gm: ", ty_trans_gm) |
| 190 | + # print("Saving ty_trans_gm to: ", f'{outputdir}/ty_trans_gm.nc') |
| 191 | + # ty_trans_gm.to_dataset(name='ty_trans_gm').to_netcdf(f'{outputdir}/ty_trans_gm.nc', compute=True) |
| 192 | + # except Exception: |
| 193 | + # print(f'Error processing AA ty_trans_gm') |
| 194 | + # print(traceback.format_exc()) |
| 195 | + |
| 196 | + # # tx_trans_submeso |
| 197 | + # paths = [f'{AAdatadir}/output00{simyear}/ocean/ocean-3d-tx_trans_submeso-1monthly-mean-ym_185{simyear}_01.nc' for simyear in simyears] |
| 198 | + # try: |
| 199 | + # print(" Loading tx_trans_submeso") |
| 200 | + # tx_trans_submeso_ds = open_my_dataset(paths) |
| 201 | + # print(" selecting time window") |
| 202 | + # tx_trans_submeso_ds_sel = tx_trans_submeso_ds.sel(time=slice(start_time, end_time)) |
| 203 | + # print(f"Averaging tx_trans_submeso over each {lumpby}") |
| 204 | + # tx_trans_submeso = climatology(tx_trans_submeso_ds_sel["tx_trans_submeso"], lumpby) |
| 205 | + # print("\ntx_trans_submeso: ", tx_trans_submeso) |
| 206 | + # print("Saving tx_trans_submeso to: ", f'{outputdir}/tx_trans_submeso.nc') |
| 207 | + # tx_trans_submeso.to_dataset(name='tx_trans_submeso').to_netcdf(f'{outputdir}/tx_trans_submeso.nc', compute=True) |
| 208 | + # except Exception: |
| 209 | + # print(f'Error processing AA tx_trans_submeso') |
| 210 | + # print(traceback.format_exc()) |
| 211 | + # # ty_trans_submeso |
| 212 | + # paths = [f'{AAdatadir}/output00{simyear}/ocean/ocean-3d-ty_trans_submeso-1monthly-mean-ym_185{simyear}_01.nc' for simyear in simyears] |
| 213 | + # try: |
| 214 | + # print(" Loading ty_trans_submeso") |
| 215 | + # ty_trans_submeso_ds = open_my_dataset(paths) |
| 216 | + # print(" selecting time window") |
| 217 | + # ty_trans_submeso_ds_sel = ty_trans_submeso_ds.sel(time=slice(start_time, end_time)) |
| 218 | + # print(f"Averaging ty_trans_submeso over each {lumpby}") |
| 219 | + # ty_trans_submeso = climatology(ty_trans_submeso_ds_sel["ty_trans_submeso"], lumpby) |
| 220 | + # print("\nty_trans_submeso: ", ty_trans_submeso) |
| 221 | + # print("Saving ty_trans_submeso to: ", f'{outputdir}/ty_trans_submeso.nc') |
| 222 | + # ty_trans_submeso.to_dataset(name='ty_trans_submeso').to_netcdf(f'{outputdir}/ty_trans_submeso.nc', compute=True) |
| 223 | + # except Exception: |
| 224 | + # print(f'Error processing AA ty_trans_submeso') |
| 225 | + # print(traceback.format_exc()) |
| 226 | + |
| 227 | + # mlotst dataset |
| 228 | + paths = [f'{AAdatadir}/output00{simyear}/ocean/ocean-2d-mld-1monthly-mean-ym_185{simyear}_01.nc' for simyear in simyears] |
| 229 | + try: |
| 230 | + print("Loading mlotst data") |
| 231 | + mlotst_ds = open_my_dataset(paths) |
| 232 | + print("\nmlotst_ds: ", mlotst_ds) |
| 233 | + print("Slicing mlotst for the time period") |
| 234 | + mlotst_ds_sel = mlotst_ds.sel(time=slice(start_time, end_time)) |
| 235 | + print(f"Averaging mlotst over each {lumpby}") |
| 236 | + mlotst = climatology(mlotst_ds_sel["mld"], lumpby) |
| 237 | + print("\nmlotst: ", mlotst) |
| 238 | + print("Saving mlotst to: ", f'{outputdir}/mlotst.nc') |
| 239 | + mlotst.to_dataset(name='mlotst').to_netcdf(f'{outputdir}/mlotst.nc', compute=True) |
| 240 | + except Exception: |
| 241 | + print(f'Error processing {model} {member} mlotst') |
| 242 | + print(traceback.format_exc()) |
| 243 | + |
| 244 | + client.close() |
| 245 | + |
| 246 | + |
| 247 | + |
| 248 | + |
0 commit comments