|
| 1 | + |
| 2 | + |
| 3 | +# import sys to access script arguments (experiment, ensemble, first_year, last_year) |
| 4 | +import sys |
| 5 | + |
| 6 | +# For interactive use only |
| 7 | +model = "ACCESS-ESM1-5" |
| 8 | +experiment = "historical" |
| 9 | +year_start = 1990 |
| 10 | +num_years = 10 |
| 11 | +lumpby = "month" |
| 12 | + |
| 13 | + |
| 14 | +# Model etc. defined from script input |
| 15 | +model = sys.argv[1] |
| 16 | +print("Model: ", model, " (type: ", type(model), ")") |
| 17 | +experiment = sys.argv[2] |
| 18 | +print("Experiment: ", experiment, " (type: ", type(experiment), ")") |
| 19 | +year_start = int(sys.argv[3]) |
| 20 | +num_years = int(sys.argv[4]) |
| 21 | +print("Time window: ", year_start, " to ", year_start + num_years - 1) |
| 22 | +lumpby = sys.argv[5] # "month" or "season" |
| 23 | +print("Lumping by", lumpby) |
| 24 | + |
| 25 | +seasons = ("DJF", "MAM", "JJA", "SON") |
| 26 | +months = range(1, 13) # Ugh! Zero-based indexing! |
| 27 | + |
| 28 | +# 1. Load packages |
| 29 | + |
| 30 | +# Import os for makedirs/isfile/environ |
| 31 | +import os |
| 32 | +os.environ["PYTHONWARNINGS"] = "ignore" |
| 33 | + |
| 34 | +# Load dask |
| 35 | +from dask.distributed import Client |
| 36 | + |
| 37 | +# import glob for searching directories |
| 38 | +from glob import glob |
| 39 | + |
| 40 | +# Load xarray for N-dimensional arrays |
| 41 | +import xarray as xr |
| 42 | + |
| 43 | +# Load Pandas to make axis of members |
| 44 | +import pandas as pd |
| 45 | + |
| 46 | +# Load traceback to print exceptions |
| 47 | +import traceback |
| 48 | + |
| 49 | +# Import numpy |
| 50 | +import numpy as np |
| 51 | + |
| 52 | +# # Load xmip for preprocessing (trying to get consistent metadata for making matrices down the road) |
| 53 | +# from xmip.preprocessing import combined_preprocessing |
| 54 | + |
| 55 | + |
| 56 | +# 2. Define some functions |
| 57 | +# (to avoid too much boilerplate code) |
| 58 | +print("Defining functions") |
| 59 | + |
| 60 | +def time_window_strings(year_start, num_years): |
| 61 | + """ |
| 62 | + return strings for start_time and end_time |
| 63 | + """ |
| 64 | + # start_time is first second of year_start |
| 65 | + start_time = f'{year_start}' |
| 66 | + # end_time is last second of last_year |
| 67 | + end_time = f'{year_start + num_years - 1}' |
| 68 | + # Return the weighted average |
| 69 | + return start_time, end_time |
| 70 | + |
| 71 | +def CMIP6_member(member): |
| 72 | + return f'r{member}i1p1f1' |
| 73 | + |
| 74 | +# Members to loop through |
| 75 | + |
| 76 | + |
| 77 | +# Create directory on scratch to save the data |
| 78 | +scratchdatadir = '/scratch/xv83/TMIP/data' |
| 79 | + |
| 80 | +# Depends on time window |
| 81 | +start_time, end_time = time_window_strings(year_start, num_years) |
| 82 | +start_time_str = f'Jan{start_time}' |
| 83 | +end_time_str = f'Dec{end_time}' |
| 84 | + |
| 85 | + |
| 86 | + |
| 87 | +def inputdirfun(member): |
| 88 | + return f'{scratchdatadir}/{model}/{experiment}/{CMIP6_member(member)}/{start_time_str}-{end_time_str}/cyclo{lumpby}' |
| 89 | + |
| 90 | +def inputfilepathfun(member): |
| 91 | + return f'{inputdirfun(member)}/reemergence_time.nc' |
| 92 | + |
| 93 | +def isvalidmember(member): |
| 94 | + return os.path.isfile(inputfilepathfun(member)) |
| 95 | + |
| 96 | +members = [m for m in range(1, 41) if isvalidmember(m)] |
| 97 | + |
| 98 | +# # For debugging only |
| 99 | +# members = members[0:2] |
| 100 | + |
| 101 | +paths = [inputfilepathfun(m) for m in members] |
| 102 | +members_axis = pd.Index(members, name="member") |
| 103 | + |
| 104 | + |
| 105 | +# function to open all files and combine them |
| 106 | +# TODO Figure out how to do this... !@#$%^&*xarray*&^%$#@! |
| 107 | +def open_my_dataset(paths): |
| 108 | + ds = xr.open_mfdataset( |
| 109 | + paths, |
| 110 | + chunks={'Ti':-1, 'lev':-1}, # TODO these dim names likely won't work for my Gammas |
| 111 | + concat_dim=[members_axis], # TODO these dim names likely won't work for my Gammas |
| 112 | + compat='override', |
| 113 | + preprocess=None, |
| 114 | + engine='netcdf4', |
| 115 | + # data_vars='minimal', # <- cannot have this option otherwise only one member is loaded it seems |
| 116 | + coords='minimal', |
| 117 | + combine='nested', |
| 118 | + parallel=True, |
| 119 | + join='outer', |
| 120 | + attrs_file=None, |
| 121 | + combine_attrs='override', |
| 122 | + ) |
| 123 | + return ds |
| 124 | + |
| 125 | + |
| 126 | + |
| 127 | + |
| 128 | +outputdir = f'{scratchdatadir}/{model}/{experiment}/all_members/{start_time_str}-{end_time_str}/cyclo{lumpby}' |
| 129 | +print("Creating directory: ", outputdir) |
| 130 | +os.makedirs(outputdir, exist_ok=True) |
| 131 | +print(" to be saved in: ", outputdir) |
| 132 | + |
| 133 | +print("Starting client") |
| 134 | + |
| 135 | +# This `if` statement is required in scripts (not required in Jupyter) |
| 136 | +if __name__ == '__main__': |
| 137 | + client = Client(n_workers=40, threads_per_worker=1) |
| 138 | + |
| 139 | + adjointage_ds = open_my_dataset(paths) |
| 140 | + print("\nadjointage_ds: ", adjointage_ds) |
| 141 | + adjointage = adjointage_ds.adjointage |
| 142 | + print("\nadjointage: ", adjointage) |
| 143 | + |
| 144 | + adjointage_mean = adjointage.mean(dim = ["Ti", "member"]) |
| 145 | + print("\nadjointage_mean: ", adjointage_mean) |
| 146 | + adjointage_mean.to_dataset(name = 'adjointage_mean').to_netcdf(f'{outputdir}/adjointage_mean.nc', compute = True) |
| 147 | + |
| 148 | + adjointage_std = adjointage.std(dim = ["Ti", "member"]) |
| 149 | + print("\nadjointage_std: ", adjointage_std) |
| 150 | + adjointage_std.to_dataset(name = 'adjointage_std').to_netcdf(f'{outputdir}/adjointage_std.nc', compute = True) |
| 151 | + |
| 152 | + adjointage_max = adjointage.max(dim = ["Ti", "member"]) |
| 153 | + print("\nadjointage_max: ", adjointage_max) |
| 154 | + adjointage_max.to_dataset(name = 'adjointage_max').to_netcdf(f'{outputdir}/adjointage_max.nc', compute = True) |
| 155 | + |
| 156 | + adjointage_min = adjointage.min(dim = ["Ti", "member"]) |
| 157 | + print("\nadjointage_min: ", adjointage_min) |
| 158 | + adjointage_min.to_dataset(name = 'adjointage_min').to_netcdf(f'{outputdir}/adjointage_min.nc', compute = True) |
| 159 | + |
| 160 | + |
| 161 | + |
| 162 | + client.close() |
| 163 | + |
| 164 | + |
| 165 | + |
| 166 | + |
| 167 | + |
0 commit comments