Skip to content

Commit d644356

Browse files
committed
Add AA output monthly-climatology averaging scripts (not just decadal average)
1 parent b2cea0e commit d644356

File tree

2 files changed

+278
-0
lines changed

2 files changed

+278
-0
lines changed
Lines changed: 248 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
# qsub -I -q express -P xv83 -l ncpus=48 -l mem=190GB -l jobfs=4GB -l walltime=0:20:00 -l storage=gdata/xv83+gdata/dk92+gdata/hh5+gdata/xp65+gdata/p73
2+
3+
# import sys to access script arguments (experiment, ensemble, first_year, last_year)
4+
import sys
5+
6+
# These are fixed (correspond to my AA simulation)
7+
model='ACCESS-ESM1-5'
8+
experiment='historical'
9+
year_start=1850
10+
num_years=10
11+
lumpby="month"
12+
13+
# 1. Load packages
14+
15+
# Import os for makedirs/isfile/environ
16+
import os
17+
os.environ["PYTHONWARNINGS"] = "ignore"
18+
19+
# Load dask
20+
from dask.distributed import Client
21+
22+
# Load xarray for N-dimensional arrays
23+
import xarray as xr
24+
25+
# Load datetime to deal with time formats
26+
import datetime
27+
28+
# Load traceback to print exceptions
29+
import traceback
30+
31+
# Import numpy
32+
import numpy as np
33+
34+
# Load pandas for data manipulation
35+
import pandas as pd
36+
37+
# 2. Define some functions
38+
# (to avoid too much boilerplate code)
39+
print("Defining functions")
40+
41+
def open_my_dataset(paths):
42+
ds = xr.open_mfdataset(
43+
paths,
44+
chunks={'time':-1, 'st_ocean':-1},
45+
concat_dim="time",
46+
compat='override',
47+
preprocess=None,
48+
engine='netcdf4',
49+
data_vars='minimal',
50+
coords='minimal',
51+
combine='nested',
52+
parallel=True,
53+
join='outer',
54+
attrs_file=None,
55+
combine_attrs='override',
56+
)
57+
return ds
58+
59+
def season_climatology(ds):
60+
# Make a DataArray with the number of days in each month, size = len(time)
61+
month_length = ds.time.dt.days_in_month
62+
# Calculate the weights by grouping by 'time.season'
63+
weights = month_length.groupby("time.season") / month_length.groupby("time.season").sum()
64+
# Test that the sum of the weights for each season is 1.0
65+
np.testing.assert_allclose(weights.groupby("time.season").sum().values, np.ones(4))
66+
# Calculate the weighted average
67+
return (ds * weights).groupby("time.season").sum(dim="time")
68+
69+
def month_climatology(ds):
70+
# Make a DataArray with the number of days in each month, size = len(time)
71+
month_length = ds.time.dt.days_in_month
72+
# Calculate the weights by grouping by 'time.season'
73+
weights = month_length.groupby("time.month") / month_length.groupby("time.month").sum()
74+
# Test that the sum of the weights for each month is 1.0
75+
np.testing.assert_allclose(weights.groupby("time.month").sum().values, np.ones(12))
76+
# Calculate the weighted average
77+
ds_out = (ds * weights).groupby("time.month").sum(dim="time")
78+
# Keep track of mean number of days per month
79+
mean_days_in_month = month_length.groupby("time.month").mean()
80+
# And assign it to new coordinate
81+
ds_out = ds_out.assign_coords(mean_days_in_month=('month', mean_days_in_month.data))
82+
return ds_out
83+
84+
def climatology(ds, lumpby):
85+
if lumpby == "month":
86+
return month_climatology(ds)
87+
elif lumpby == "season":
88+
return season_climatology(ds)
89+
else:
90+
raise ValueError(f"lumpby has to be month or season")
91+
92+
def time_window_strings(year_start, num_years):
93+
"""
94+
return strings for start_time and end_time
95+
"""
96+
# start_time is first second of year_start
97+
start_time = f'{year_start}'
98+
# end_time is last second of last_year
99+
end_time = f'{year_start + num_years - 1}'
100+
# Return the weighted average
101+
return start_time, end_time
102+
103+
# Create directory on scratch to save the data
104+
scratchdatadir = '/scratch/xv83/TMIP/data'
105+
AAdatadir = '/scratch/xv83/bp3051/access-esm/archive/andersonacceleration_test-n10-5415f621/'
106+
gdatadatadir = '/g/data/xv83/TMIP/data'
107+
108+
# Depends on time window
109+
start_time, end_time = time_window_strings(year_start, num_years)
110+
start_time_str = f'Jan{start_time}'
111+
end_time_str = f'Dec{end_time}'
112+
113+
# Simulation years from AA output
114+
simyears = range(10) # AA cycles of 10 years
115+
116+
# 4. Load data, preprocess it, and save it to NetCDF
117+
118+
print("Starting client")
119+
120+
# This `if` statement is required in scripts (not required in Jupyter)
121+
if __name__ == '__main__':
122+
client = Client(n_workers=44, threads_per_worker=1)#, memory_limit='16GB') # Note: with 1thread/worker cannot plot thetao. Maybe I need to understand why?
123+
124+
# print ensemble/member
125+
print(f"\nProcessing AA output")
126+
127+
# directory to save the data to (as NetCDF)
128+
outputdir = f'{scratchdatadir}/{model}/{experiment}/AA/{start_time_str}-{end_time_str}/cyclo{lumpby}'
129+
print("Creating directory: ", outputdir)
130+
os.makedirs(outputdir, exist_ok=True)
131+
print(" averaging data from: ", AAdatadir)
132+
print(" to be saved in: ", outputdir)
133+
134+
# # umo / tx_trans
135+
# paths = [f'{AAdatadir}/output00{simyear}/ocean/ocean-3d-tx_trans-1monthly-mean-ym_185{simyear}_01.nc' for simyear in simyears]
136+
# try:
137+
# print(" Loading tx_trans")
138+
# tx_trans_ds = open_my_dataset(paths)
139+
# print(" selecting time window")
140+
# tx_trans_ds_sel = tx_trans_ds.sel(time=slice(start_time, end_time))
141+
# print(f"Averaging tx_trans over each {lumpby}")
142+
# tx_trans = climatology(tx_trans_ds_sel["tx_trans"], lumpby)
143+
# print("\ntx_trans: ", tx_trans)
144+
# print("Saving tx_trans to: ", f'{outputdir}/umo.nc')
145+
# tx_trans.to_dataset(name='umo').to_netcdf(f'{outputdir}/umo.nc', compute=True)
146+
# except Exception:
147+
# print(f'Error processing AA tx_trans')
148+
# print(traceback.format_exc())
149+
# # vmo / ty_trans
150+
# paths = [f'{AAdatadir}/output00{simyear}/ocean/ocean-3d-ty_trans-1monthly-mean-ym_185{simyear}_01.nc' for simyear in simyears]
151+
# try:
152+
# print(" Loading ty_trans")
153+
# ty_trans_ds = open_my_dataset(paths)
154+
# print(" selecting time window")
155+
# ty_trans_ds_sel = ty_trans_ds.sel(time=slice(start_time, end_time))
156+
# print(f"Averaging ty_trans over each {lumpby}")
157+
# ty_trans = climatology(ty_trans_ds_sel["ty_trans"], lumpby)
158+
# print("\nty_trans: ", ty_trans)
159+
# print("Saving ty_trans to: ", f'{outputdir}/vmo.nc')
160+
# ty_trans.to_dataset(name='vmo').to_netcdf(f'{outputdir}/vmo.nc', compute=True)
161+
# except Exception:
162+
# print(f'Error processing AA ty_trans')
163+
# print(traceback.format_exc())
164+
165+
# # tx_trans_gm
166+
# paths = [f'{AAdatadir}/output00{simyear}/ocean/ocean-3d-tx_trans_gm-1monthly-mean-ym_185{simyear}_01.nc' for simyear in simyears]
167+
# try:
168+
# print(" Loading tx_trans_gm")
169+
# tx_trans_gm_ds = open_my_dataset(paths)
170+
# print(" selecting time window")
171+
# tx_trans_gm_ds_sel = tx_trans_gm_ds.sel(time=slice(start_time, end_time))
172+
# print(f"Averaging tx_trans_gm over each {lumpby}")
173+
# tx_trans_gm = climatology(tx_trans_gm_ds_sel["tx_trans_gm"], lumpby)
174+
# print("\ntx_trans_gm: ", tx_trans_gm)
175+
# print("Saving tx_trans_gm to: ", f'{outputdir}/tx_trans_gm.nc')
176+
# tx_trans_gm.to_dataset(name='tx_trans_gm').to_netcdf(f'{outputdir}/tx_trans_gm.nc', compute=True)
177+
# except Exception:
178+
# print(f'Error processing AA tx_trans_gm')
179+
# print(traceback.format_exc())
180+
# # ty_trans_gm
181+
# paths = [f'{AAdatadir}/output00{simyear}/ocean/ocean-3d-ty_trans_gm-1monthly-mean-ym_185{simyear}_01.nc' for simyear in simyears]
182+
# try:
183+
# print(" Loading ty_trans_gm")
184+
# ty_trans_gm_ds = open_my_dataset(paths)
185+
# print(" selecting time window")
186+
# ty_trans_gm_ds_sel = ty_trans_gm_ds.sel(time=slice(start_time, end_time))
187+
# print(f"Averaging ty_trans_gm over each {lumpby}")
188+
# ty_trans_gm = climatology(ty_trans_gm_ds_sel["ty_trans_gm"], lumpby)
189+
# print("\nty_trans_gm: ", ty_trans_gm)
190+
# print("Saving ty_trans_gm to: ", f'{outputdir}/ty_trans_gm.nc')
191+
# ty_trans_gm.to_dataset(name='ty_trans_gm').to_netcdf(f'{outputdir}/ty_trans_gm.nc', compute=True)
192+
# except Exception:
193+
# print(f'Error processing AA ty_trans_gm')
194+
# print(traceback.format_exc())
195+
196+
# # tx_trans_submeso
197+
# paths = [f'{AAdatadir}/output00{simyear}/ocean/ocean-3d-tx_trans_submeso-1monthly-mean-ym_185{simyear}_01.nc' for simyear in simyears]
198+
# try:
199+
# print(" Loading tx_trans_submeso")
200+
# tx_trans_submeso_ds = open_my_dataset(paths)
201+
# print(" selecting time window")
202+
# tx_trans_submeso_ds_sel = tx_trans_submeso_ds.sel(time=slice(start_time, end_time))
203+
# print(f"Averaging tx_trans_submeso over each {lumpby}")
204+
# tx_trans_submeso = climatology(tx_trans_submeso_ds_sel["tx_trans_submeso"], lumpby)
205+
# print("\ntx_trans_submeso: ", tx_trans_submeso)
206+
# print("Saving tx_trans_submeso to: ", f'{outputdir}/tx_trans_submeso.nc')
207+
# tx_trans_submeso.to_dataset(name='tx_trans_submeso').to_netcdf(f'{outputdir}/tx_trans_submeso.nc', compute=True)
208+
# except Exception:
209+
# print(f'Error processing AA tx_trans_submeso')
210+
# print(traceback.format_exc())
211+
# # ty_trans_submeso
212+
# paths = [f'{AAdatadir}/output00{simyear}/ocean/ocean-3d-ty_trans_submeso-1monthly-mean-ym_185{simyear}_01.nc' for simyear in simyears]
213+
# try:
214+
# print(" Loading ty_trans_submeso")
215+
# ty_trans_submeso_ds = open_my_dataset(paths)
216+
# print(" selecting time window")
217+
# ty_trans_submeso_ds_sel = ty_trans_submeso_ds.sel(time=slice(start_time, end_time))
218+
# print(f"Averaging ty_trans_submeso over each {lumpby}")
219+
# ty_trans_submeso = climatology(ty_trans_submeso_ds_sel["ty_trans_submeso"], lumpby)
220+
# print("\nty_trans_submeso: ", ty_trans_submeso)
221+
# print("Saving ty_trans_submeso to: ", f'{outputdir}/ty_trans_submeso.nc')
222+
# ty_trans_submeso.to_dataset(name='ty_trans_submeso').to_netcdf(f'{outputdir}/ty_trans_submeso.nc', compute=True)
223+
# except Exception:
224+
# print(f'Error processing AA ty_trans_submeso')
225+
# print(traceback.format_exc())
226+
227+
# mlotst dataset
228+
paths = [f'{AAdatadir}/output00{simyear}/ocean/ocean-2d-mld-1monthly-mean-ym_185{simyear}_01.nc' for simyear in simyears]
229+
try:
230+
print("Loading mlotst data")
231+
mlotst_ds = open_my_dataset(paths)
232+
print("\nmlotst_ds: ", mlotst_ds)
233+
print("Slicing mlotst for the time period")
234+
mlotst_ds_sel = mlotst_ds.sel(time=slice(start_time, end_time))
235+
print(f"Averaging mlotst over each {lumpby}")
236+
mlotst = climatology(mlotst_ds_sel["mld"], lumpby)
237+
print("\nmlotst: ", mlotst)
238+
print("Saving mlotst to: ", f'{outputdir}/mlotst.nc')
239+
mlotst.to_dataset(name='mlotst').to_netcdf(f'{outputdir}/mlotst.nc', compute=True)
240+
except Exception:
241+
print(f'Error processing {model} {member} mlotst')
242+
print(traceback.format_exc())
243+
244+
client.close()
245+
246+
247+
248+
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#!/bin/bash
2+
3+
#PBS -P xv83
4+
#PBS -N cycloAAavg
5+
#PBS -q express
6+
#PBS -l ncpus=48
7+
#PBS -l mem=190GB
8+
#PBS -l jobfs=4GB
9+
#PBS -l walltime=1:00:00
10+
#PBS -l storage=gdata/xv83+gdata/dk92+gdata/hh5+gdata/xp65+gdata/p73
11+
#PBS -l wd
12+
#PBS -o output/PBS/
13+
#PBS -j oe
14+
15+
echo "Going into TMIP notebooks directory"
16+
cd ~/Projects/TMIP/notebooks
17+
18+
echo "Loading conda/analysis3-24.04 module"
19+
module use /g/data/hh5/public/modules
20+
module load conda/analysis3-24.04
21+
conda activate conda/analysis3-24.04
22+
conda info
23+
24+
echo "Loading python3/3.12.1"
25+
module load python3/3.12.1
26+
27+
28+
echo "Running transport-state script"
29+
python scripts/cyclo_average_AA_variables.py &> output/cyclo_average_AA_variables.$PBS_JOBID.out
30+

0 commit comments

Comments
 (0)