Skip to content

Commit f0845c5

Browse files
committed
Add script for SSP + archive all the runs I can
1 parent 26f8b26 commit f0845c5

14 files changed

+634
-89
lines changed

notebooks/ACCESS_output_checks.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2634,7 +2634,7 @@
26342634
"source": [
26352635
"umo_datadask = select_latest_data(searched_cat,\n",
26362636
" dict(\n",
2637-
" # chunks={'i': 60, 'j': 60, 'time': -1, 'lev':50}\n",
2637+
" chunks={'time': -1, 'lev':-1}\n",
26382638
" ),\n",
26392639
" variable_id = \"umo\",\n",
26402640
" member_id = \"r2i1p1f1\",\n",

scripts/archive_unarchived_CMIP6_ACCESS_GM_files.py

Lines changed: 35 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@
66
# interactive use only
77
model="ACCESS-ESM1-5"
88
experiment="historical"
9-
# year_start = 1850
10-
year_start = 1940
11-
# year_end = 2015
12-
year_end = 1950
9+
# decade_start = 1850
10+
decade_start = 1940
11+
# decade_end = 2015
12+
decade_end = 1950
1313

1414

1515
# Model etc. defined from script input
@@ -19,10 +19,10 @@
1919
print("Experiment: ", experiment, " (type: ", type(experiment), ")")
2020
members = sys.argv[3].split(',')
2121
print("members: ", members, " (type: ", type(members), ")")
22-
year_start = int(sys.argv[4])
23-
print("year_start: ", year_start, " (type: ", type(year_start), ")")
24-
year_end = int(sys.argv[5])
25-
print("year_end: ", year_end, " (type: ", type(year_end), ")")
22+
decade_start = int(sys.argv[4])
23+
print("decade_start: ", decade_start, " (type: ", type(decade_start), ")")
24+
decade_end = int(sys.argv[5])
25+
print("decade_end: ", decade_end, " (type: ", type(decade_end), ")")
2626

2727
# 1. Load packages
2828

@@ -45,6 +45,20 @@
4545
# # Load xmip for preprocessing (trying to get consistent metadata for making matrices down the road)
4646
# from xmip.preprocessing import combined_preprocessing
4747

48+
decades = range(decade_start, decade_end, 10)
49+
print(f"\nDecades:\n")
50+
print(*decades)
51+
52+
# Historical runs go from 1850 to 2015, and future scenarios from 2015 to 2100.
53+
# I want to save data per decade, so I need some logic branching to deal with the 2010s.
54+
def decade_years(decade, experiment):
55+
if decade == 2010:
56+
if (experiment == "historical"):
57+
return range(2010, 2015)
58+
else:
59+
return range(2015, 2020)
60+
else:
61+
return range(decade, decade + 10)
4862

4963

5064

@@ -57,13 +71,11 @@
5771
# members = ["HI-09", "HI-10", "HI-11", "HI-12"]
5872
# members = ["HI-05"]
5973

60-
61-
6274
print("Starting client")
6375

6476
# This `if` statement is required in scripts (not required in Jupyter)
6577
if __name__ == '__main__':
66-
client = Client(n_workers=24, threads_per_worker=1)
78+
client = Client(n_workers=40, threads_per_worker=1)
6779
#, threads_per_worker=1, memory_limit='16GB') # Note: with 1thread/worker cannot plot thetao. Maybe I need to understand why?
6880
# added threads_per_worker=1 back again because I possibly hitting some random unsafe multithreading issue:
6981
# https://forum.access-hive.org.au/t/netcdf-not-a-valid-id-errors/389
@@ -72,29 +84,31 @@
7284
for member in members:
7385

7486
# print ensemble/member
75-
inputdir = f'/scratch/p66/pbd562/petrichor/get/{member}/history/ocn'
76-
outputdir = f'{gdatadatadir}/{model}/{member}'
87+
if experiment == "historical":
88+
inputdir = f'/scratch/p66/pbd562/petrichor/get/{member}/history/ocn'
89+
else:
90+
inputdir = f'/scratch/p66/pbd562/petrichor/get/{experiment}/{member}/history/ocn'
91+
92+
outputdir = f'{gdatadatadir}/{model}/{experiment}/{member}'
7793
print(f"\nProcessing {member}")
7894

7995
# directory to save the data to (as NetCDF)
8096
print("Creating directory: ", outputdir)
8197
os.makedirs(outputdir, exist_ok=True)
8298

83-
for decade in range(year_start, year_end, 10):
99+
for decade in decades:
84100

85-
print(f'\nDecade {decade}')
101+
print(f'\nDecade {decade}:\n')
86102

87-
if decade == 2010:
88-
num_years = 5
89-
else:
90-
num_years = 10
103+
years = decade_years(decade, experiment)
104+
print(*years)
91105

92106
# subset of the files required
93-
paths = [f'{inputdir}/ocean_month.nc-{year}1231' for year in range(decade, decade + num_years)]
107+
paths = [f'{inputdir}/ocean_month.nc-{year}1231' for year in years]
94108

95109
# Exit early if cannot find all files
96110
if not all([os.path.isfile(fname) for fname in paths]):
97-
print(f'Missing files for {member} {decade}-{decade + num_years}')
111+
print(f'Missing files for {member} {decade}s')
98112
continue
99113

100114
# load the data

scripts/archive_unarchived_CMIP6_ACCESS_GM_files.sh

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
#PBS -P xv83
44
#PBS -N archive_GM
5-
#PBS -l ncpus=28
5+
#PBS -l ncpus=48
66
#PBS -l mem=180GB
77
#PBS -l jobfs=4GB
8-
#PBS -l walltime=1:00:00
8+
#PBS -l walltime=24:00:00
99
#PBS -l storage=gdata/xv83+gdata/dk92+gdata/hh5+gdata/xp65+gdata/p73+scratch/p66
1010
#PBS -l wd
1111
#PBS -o output/PBS/
@@ -23,20 +23,22 @@ conda info
2323
echo "Loading python3/3.12.1"
2424
module load python3/3.12.1
2525

26-
# CHANGE HERE the model, experiment, ensemble, etc.
26+
# CHANGE HERE the model, experiment, members, decades.
2727
model=ACCESS-ESM1-5
2828
# model=ACCESS-CM2
29+
2930
experiment=historical
30-
# year_start=1850
31-
year_start=1960
32-
# year_end=2015
33-
year_end=1970
34-
# members=("HI-05","HI-06")
35-
members=("HI-12")
31+
members=("HI-37","HI-38","HI-39","HI-40","HI-41","HI-42","HI-43","HI-44")
32+
decade_start=1850
33+
decade_end=2020
34+
# experiment=ssp370
35+
# members=("SSP-370-39","SSP-370-40","SSP-370-41","SSP-370-42","SSP-370-43","SSP-370-44")
36+
# decade_start=2010
37+
# decade_end=2100
3638

3739

3840
echo "Running transport-state script"
39-
python scripts/archive_unarchived_CMIP6_ACCESS_GM_files.py $model $experiment $members $year_start $year_end \
41+
python scripts/archive_unarchived_CMIP6_ACCESS_GM_files.py $model $experiment $members $decade_start $decade_end \
4042
&> output/$PBS_JOBID.$model.monthly.datafromTilo.out
4143

4244

scripts/build_average_CMIP5_ACCESS_transport_state_on_Gadi.py renamed to scripts/average_CMIP5_ACCESS_variables.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ def sort_ensembles(ensembles):
227227
print("Loading volcello data")
228228
volcello_datadask = select_latest_data(searched_cat,
229229
dict(
230-
chunks={'i': 60, 'j': 60, 'lev':50}
230+
chunks={'time': -1, 'lev':-1}
231231
),
232232
variable = "volcello",
233233
ensemble = "r0i0p0", # <- in the CMIP5 ACCESS catalog, the fixed data is in ensemble r0i0p0 (not in any other ensemble)
@@ -247,7 +247,7 @@ def sort_ensembles(ensembles):
247247
print("Loading areacello data")
248248
areacello_datadask = select_latest_data(searched_cat,
249249
dict(
250-
chunks={'i': 60, 'j': 60}
250+
chunks={'time': -1, 'lev':-1}
251251
),
252252
variable = "areacello",
253253
ensemble = "r0i0p0", # <- in the CMIP5 ACCESS catalog, the fixed data is in ensemble r0i0p0 (not in any other ensemble)
@@ -267,7 +267,7 @@ def sort_ensembles(ensembles):
267267
print("Loading umo data")
268268
umo_datadask = select_latest_data(searched_cat,
269269
dict(
270-
# chunks={'i': 60, 'j': 60, 'time': -1, 'lev':50}
270+
chunks={'time': -1, 'lev':-1}
271271
),
272272
variable = "umo",
273273
ensemble = ensemble,
@@ -290,7 +290,7 @@ def sort_ensembles(ensembles):
290290
print("Loading vmo data")
291291
vmo_datadask = select_latest_data(searched_cat,
292292
dict(
293-
# chunks={'i': 60, 'j': 60, 'time': -1, 'lev':50}
293+
chunks={'time': -1, 'lev':-1}
294294
),
295295
variable = "vmo",
296296
ensemble = ensemble,
@@ -313,7 +313,7 @@ def sort_ensembles(ensembles):
313313
print("Loading uo data")
314314
uo_datadask = select_latest_data(searched_cat,
315315
dict(
316-
# chunks={'i': 60, 'j': 60, 'time': -1, 'lev':50}
316+
chunks={'time': -1, 'lev':-1}
317317
),
318318
variable = "uo",
319319
ensemble = ensemble,
@@ -336,7 +336,7 @@ def sort_ensembles(ensembles):
336336
print("Loading vo data")
337337
vo_datadask = select_latest_data(searched_cat,
338338
dict(
339-
# chunks={'i': 60, 'j': 60, 'time': -1, 'lev':50}
339+
chunks={'time': -1, 'lev':-1}
340340
),
341341
variable = "vo",
342342
ensemble = ensemble,
@@ -359,7 +359,7 @@ def sort_ensembles(ensembles):
359359
print("Loading mlotst data")
360360
mlotst_datadask = select_latest_data(searched_cat,
361361
dict(
362-
# chunks={'i': 60, 'j': 60, 'time': -1, 'lev':50}
362+
chunks={'time': -1, 'lev':-1}
363363
),
364364
variable = "mlotst",
365365
ensemble = ensemble,
@@ -384,7 +384,7 @@ def sort_ensembles(ensembles):
384384
print("Loading thetao data")
385385
thetao_datadask = select_latest_data(searched_cat,
386386
dict(
387-
# chunks={'i': 60, 'j': 60, 'time': -1, 'lev':50}
387+
chunks={'time': -1, 'lev':-1}
388388
),
389389
variable = "thetao",
390390
ensemble = ensemble,
@@ -407,7 +407,7 @@ def sort_ensembles(ensembles):
407407
print("Loading so data")
408408
so_datadask = select_latest_data(searched_cat,
409409
dict(
410-
# chunks={'i': 60, 'j': 60, 'time': -1, 'lev':50}
410+
chunks={'time': -1, 'lev':-1}
411411
),
412412
variable = "so",
413413
ensemble = ensemble,
@@ -430,7 +430,7 @@ def sort_ensembles(ensembles):
430430
print("Loading agessc data")
431431
agessc_datadask = select_latest_data(searched_cat,
432432
dict(
433-
# chunks={'i': 60, 'j': 60, 'time': -1, 'lev':50}
433+
chunks={'time': -1, 'lev':-1}
434434
),
435435
variable = "agessc",
436436
ensemble = ensemble,

scripts/build_average_CMIP5_ACCESS_transport_state_on_Gadi.sh renamed to scripts/average_CMIP5_ACCESS_variables.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
#PBS -P xv83
44
#PBS -N CMIP5_ACCESS_preprocessing
5-
#PBS -l ncpus=5
5+
#PBS -l ncpus=28
66
#PBS -l mem=180GB
77
#PBS -l jobfs=4GB
88
#PBS -l walltime=3:00:00
@@ -17,7 +17,7 @@ cd ~/Projects/TMIP/notebooks
1717
echo "Loading conda/analysis3-24.04 module"
1818
module use /g/data/hh5/public/modules
1919
module load conda/analysis3-24.04
20-
conda init
20+
2121
conda activate conda/analysis3-24.04
2222
conda info
2323

@@ -32,7 +32,7 @@ year_start=1990
3232
num_years=10
3333

3434
echo "Running transport-state script"
35-
python scripts/build_average_CMIP5_ACCESS_transport_state_on_Gadi.py $model $experiment $ensemble $year_start $num_years \
35+
python scripts/average_CMIP5_ACCESS_variables.py $model $experiment $ensemble $year_start $num_years \
3636
&> output/$model.$experiment.allensembles.$year_start.$num_years.$PBS_JOBID.out
3737

3838

scripts/average_CMIP6_ACCESS_variables.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ def sort_members(members):
214214

215215
# This `if` statement is required in scripts (not required in Jupyter)
216216
if __name__ == '__main__':
217-
client = Client(n_workers=4)#, threads_per_worker=1, memory_limit='16GB') # Note: with 1thread/worker cannot plot thetao. Maybe I need to understand why?
217+
client = Client(n_workers=4, threads_per_worker=1) #, memory_limit='16GB') # Note: with 1thread/worker cannot plot thetao. Maybe I need to understand why?
218218

219219
for member in sorted_members:
220220

@@ -231,7 +231,7 @@ def sort_members(members):
231231
print("Loading volcello data")
232232
volcello_datadask = select_latest_data(searched_cat,
233233
dict(
234-
# chunks={'i': 60, 'j': 60, 'lev':50}
234+
chunks={'time': -1, 'lev':-1}
235235
),
236236
variable_id = "volcello",
237237
member_id = member,
@@ -251,7 +251,7 @@ def sort_members(members):
251251
print("Loading areacello data")
252252
areacello_datadask = select_latest_data(searched_cat,
253253
dict(
254-
# chunks={'i': 60, 'j': 60}
254+
chunks={'time': -1, 'lev':-1}
255255
),
256256
variable_id = "areacello",
257257
member_id = member,
@@ -271,7 +271,7 @@ def sort_members(members):
271271
print("Loading umo data")
272272
umo_datadask = select_latest_data(searched_cat,
273273
dict(
274-
# chunks={'i': 60, 'j': 60, 'time': -1, 'lev':50}
274+
chunks={'time': -1, 'lev':-1}
275275
),
276276
variable_id = "umo",
277277
member_id = member,
@@ -294,7 +294,7 @@ def sort_members(members):
294294
print("Loading vmo data")
295295
vmo_datadask = select_latest_data(searched_cat,
296296
dict(
297-
# chunks={'i': 60, 'j': 60, 'time': -1, 'lev':50}
297+
chunks={'time': -1, 'lev':-1}
298298
),
299299
variable_id = "vmo",
300300
member_id = member,
@@ -317,7 +317,7 @@ def sort_members(members):
317317
print("Loading uo data")
318318
uo_datadask = select_latest_data(searched_cat,
319319
dict(
320-
# chunks={'i': 60, 'j': 60, 'time': -1, 'lev':50}
320+
chunks={'time': -1, 'lev':-1}
321321
),
322322
variable_id = "uo",
323323
member_id = member,
@@ -340,7 +340,7 @@ def sort_members(members):
340340
print("Loading vo data")
341341
vo_datadask = select_latest_data(searched_cat,
342342
dict(
343-
# chunks={'i': 60, 'j': 60, 'time': -1, 'lev':50}
343+
chunks={'time': -1, 'lev':-1}
344344
),
345345
variable_id = "vo",
346346
member_id = member,
@@ -363,7 +363,7 @@ def sort_members(members):
363363
print("Loading mlotst data")
364364
mlotst_datadask = select_latest_data(searched_cat,
365365
dict(
366-
# chunks={'i': 60, 'j': 60, 'time': -1, 'lev':50}
366+
chunks={'time': -1, 'lev':-1}
367367
),
368368
variable_id = "mlotst",
369369
member_id = member,
@@ -392,7 +392,7 @@ def sort_members(members):
392392
print("Loading thetao data")
393393
thetao_datadask = select_latest_data(searched_cat,
394394
dict(
395-
# chunks={'i': 60, 'j': 60, 'time': -1, 'lev':50}
395+
chunks={'time': -1, 'lev':-1}
396396
),
397397
variable_id = "thetao",
398398
member_id = member,
@@ -415,7 +415,7 @@ def sort_members(members):
415415
print("Loading so data")
416416
so_datadask = select_latest_data(searched_cat,
417417
dict(
418-
# chunks={'i': 60, 'j': 60, 'time': -1, 'lev':50}
418+
chunks={'time': -1, 'lev':-1}
419419
),
420420
variable_id = "so",
421421
member_id = member,
@@ -438,7 +438,7 @@ def sort_members(members):
438438
print("Loading agessc data")
439439
agessc_datadask = select_latest_data(searched_cat,
440440
dict(
441-
# chunks={'i': 60, 'j': 60, 'time': -1, 'lev':50}
441+
chunks={'time': -1, 'lev':-1}
442442
),
443443
variable_id = "agessc",
444444
member_id = member,

scripts/build_average_CMIP6_ACCESS_piControl_transport_state_on_Gadi.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ def sort_members(members):
167167
print("Loading volcello data")
168168
volcello_datadask = select_latest_data(searched_cat,
169169
dict(
170-
chunks={'i': 60, 'j': 60, 'lev':50}
170+
chunks={'time': -1, 'lev':-1}
171171
),
172172
variable_id = "volcello",
173173
member_id = member,
@@ -178,7 +178,7 @@ def sort_members(members):
178178
print("Loading areacello data")
179179
areacello_datadask = select_latest_data(searched_cat,
180180
dict(
181-
chunks={'i': 60, 'j': 60}
181+
chunks={'time': -1, 'lev':-1}
182182
),
183183
variable_id = "areacello",
184184
member_id = member,

0 commit comments

Comments
 (0)