Skip to content

Commit 7352272

Browse files
authored
Merge pull request #1210 from CliMA/kp/pipeline
Rewrite land calibration pipeline
2 parents 2879a7e + c5b02cd commit 7352272

19 files changed

+1208
-1031
lines changed

.buildkite/Manifest-v1.11.toml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
julia_version = "1.11.6"
44
manifest_format = "2.0"
5-
project_hash = "55c55a1c8b475869d4bb069be5aa7bbf8ba540bb"
5+
project_hash = "e0ba7ef43de83ce3ffcfd057845d46204b407ad8"
66

77
[[deps.ADTypes]]
88
git-tree-sha1 = "be7ae030256b8ef14a441726c4c37766b90b93a3"
@@ -416,9 +416,9 @@ weakdeps = ["SparseArrays"]
416416

417417
[[deps.ClimaAnalysis]]
418418
deps = ["Artifacts", "Dates", "Interpolations", "NCDatasets", "NaNStatistics", "OrderedCollections", "Reexport", "Statistics", "Unitful"]
419-
git-tree-sha1 = "79279dce43bac22423b5d7b83fdf8209bf00a331"
419+
git-tree-sha1 = "2488781691ad571f17dc40b02e5380296a80cd2e"
420420
uuid = "29b5916a-a76c-4e73-9657-3c8fd22e65e6"
421-
version = "0.5.18"
421+
version = "0.5.19"
422422
weakdeps = ["GeoMakie", "Makie"]
423423

424424
[deps.ClimaAnalysis.extensions]
@@ -427,9 +427,9 @@ weakdeps = ["GeoMakie", "Makie"]
427427

428428
[[deps.ClimaCalibrate]]
429429
deps = ["Dates", "Distributed", "Distributions", "EnsembleKalmanProcesses", "JLD2", "Logging", "Random", "TOML", "YAML"]
430-
git-tree-sha1 = "2408ba55a30ac0f67550ef9d87cebfc372af3cca"
430+
git-tree-sha1 = "2ba8eb38b9b4b1db74f7ac9ba4eddc92cdc44fef"
431431
uuid = "4347a170-ebd6-470c-89d3-5c705c0cacc2"
432-
version = "0.1.2"
432+
version = "0.1.3"
433433

434434
[deps.ClimaCalibrate.extensions]
435435
CESExt = "CalibrateEmulateSample"

.buildkite/Manifest.toml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
julia_version = "1.10.9"
44
manifest_format = "2.0"
5-
project_hash = "3f0124403c94b3ee688b94982d79b9dea53973ab"
5+
project_hash = "cccb69712e00d039f84ec7dabace280c32ba79f1"
66

77
[[deps.ADTypes]]
88
git-tree-sha1 = "be7ae030256b8ef14a441726c4c37766b90b93a3"
@@ -413,9 +413,9 @@ weakdeps = ["SparseArrays"]
413413

414414
[[deps.ClimaAnalysis]]
415415
deps = ["Artifacts", "Dates", "Interpolations", "NCDatasets", "NaNStatistics", "OrderedCollections", "Reexport", "Statistics", "Unitful"]
416-
git-tree-sha1 = "79279dce43bac22423b5d7b83fdf8209bf00a331"
416+
git-tree-sha1 = "2488781691ad571f17dc40b02e5380296a80cd2e"
417417
uuid = "29b5916a-a76c-4e73-9657-3c8fd22e65e6"
418-
version = "0.5.18"
418+
version = "0.5.19"
419419
weakdeps = ["GeoMakie", "Makie"]
420420

421421
[deps.ClimaAnalysis.extensions]
@@ -424,9 +424,9 @@ weakdeps = ["GeoMakie", "Makie"]
424424

425425
[[deps.ClimaCalibrate]]
426426
deps = ["Dates", "Distributed", "Distributions", "EnsembleKalmanProcesses", "JLD2", "Logging", "Random", "TOML", "YAML"]
427-
git-tree-sha1 = "2408ba55a30ac0f67550ef9d87cebfc372af3cca"
427+
git-tree-sha1 = "2ba8eb38b9b4b1db74f7ac9ba4eddc92cdc44fef"
428428
uuid = "4347a170-ebd6-470c-89d3-5c705c0cacc2"
429-
version = "0.1.2"
429+
version = "0.1.3"
430430

431431
[deps.ClimaCalibrate.extensions]
432432
CESExt = "CalibrateEmulateSample"
@@ -882,9 +882,9 @@ version = "2.2.4+0"
882882

883883
[[deps.EnsembleKalmanProcesses]]
884884
deps = ["Convex", "Distributions", "DocStringExtensions", "FFMPEG", "GaussianRandomFields", "Interpolations", "LinearAlgebra", "MathOptInterface", "Optim", "QuadGK", "Random", "RecipesBase", "SCS", "SparseArrays", "Statistics", "StatsBase", "TOML", "TSVD"]
885-
git-tree-sha1 = "938ba137333f7be93194cf5ffc4592b68b0efb36"
885+
git-tree-sha1 = "d2cec7e039df146c39723d908ce06ceb8b233536"
886886
uuid = "aa8a2aa5-91d8-4396-bcef-d4f2ec43552d"
887-
version = "2.4.2"
887+
version = "2.5.0"
888888
weakdeps = ["Makie"]
889889

890890
[deps.EnsembleKalmanProcesses.extensions]

.buildkite/Project.toml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ GeoMakie = "db073c08-6b98-4ee5-b6a4-5efafb3259c6"
2525
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
2626
Insolation = "e98cc03f-d57e-4e3c-b70c-8d51efe9e0d8"
2727
Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59"
28+
JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
2829
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
2930
LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
3031
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
@@ -46,11 +47,11 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
4647
Thermodynamics = "b60c26fb-14c3-4610-9d3e-2d17fe7ff00c"
4748

4849
[compat]
49-
ClimaAnalysis = "0.5.17"
50-
ClimaCalibrate = "0.1"
50+
ClimaAnalysis = "0.5.19"
51+
ClimaCalibrate = "0.1.3"
5152
ClimaDiagnostics = "0.2.13"
5253
ClimaTimeSteppers = "0.7, 0.8"
53-
EnsembleKalmanProcesses = "2.4.1"
54+
EnsembleKalmanProcesses = "2.5.0"
5455
Flux = "0.15"
5556
GeoMakie = "< 0.7.13" # v0.7.13 causes infinite recursion with GridPositions
5657
Statistics = "1"

docs/src/calibration.md

Lines changed: 299 additions & 104 deletions
Large diffs are not rendered by default.

experiments/calibration/PBS_calibration.pbs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,16 @@
22
#PBS -N derecho_calibration
33
#PBS -o output.txt
44
#PBS -e error.txt
5-
#PBS -l walltime=12:00:00
6-
#PBS -l select=1:ncpus=4:ngpus=1
5+
#PBS -l walltime=6:00:00
6+
#PBS -l select=1:ncpus=1:ngpus=1
77

88
## Account number for CliMA
99
#PBS -A UCIT0011
1010
#PBS -q main
1111

12+
## Note: Using this script requires changes to addprocs in the
13+
## experiments/calibration/run_calibration.jl
14+
1215
export PBS_ACCOUNT="UCIT0011"
1316
export MODULEPATH="/glade/campaign/univ/ucit0011/ClimaModules-Derecho:$MODULEPATH"
1417
module load climacommon
@@ -17,4 +20,4 @@ export CLIMACOMMS_DEVICE="CUDA"
1720
export CLIMACOMMS_CONTEXT="SINGLETON"
1821
julia --project=.buildkite -e 'using Pkg; Pkg.instantiate(;verbose=true)'
1922

20-
julia --project=.buildkite/ experiments/calibration/calibrate_land.jl
23+
julia --project=.buildkite/ experiments/calibration/run_calibration.jl

experiments/calibration/api.jl

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
import Dates
2+
3+
"""
4+
struct CalibrateConfig{SPINUP <: Dates.Period, EXTEND <: Dates.Period}
5+
short_names::Vector{String}
6+
minibatch_size::Int64
7+
n_iterations::Int64
8+
sample_date_ranges::Vector{NTuple{2, DATE}}
9+
extend::EXTEND
10+
spinup::SPINUP
11+
nelements::Tuple{Int64, Int64}
12+
output_dir::String
13+
rng_seed::Int64
14+
end
15+
16+
A configuration struct for keeping track of multiple fields that are of interest
17+
to a user running calibration, or that are needed in multiple places (e.g., for
18+
ensemble members and generating observations).
19+
"""
20+
struct CalibrateConfig{SPINUP <: Dates.Period, EXTEND <: Dates.Period}
21+
"The short names of the observations used for calibration. The short names
22+
should match the same names used for the diagnostics."
23+
short_names::Vector{String}
24+
25+
"The size of the minibatch for each iteration"
26+
minibatch_size::Int64
27+
28+
"The number of iterations to run the calibration for"
29+
n_iterations::Int64
30+
31+
"The date ranges of the samples for calibration and used to determine the
32+
start and end dates of a simulation for each iteration of calibration"
33+
sample_date_ranges::Vector{NTuple{2, Dates.DateTime}}
34+
35+
"The amount of time to run a simulation after the last date of the
36+
minibatch"
37+
extend::EXTEND
38+
39+
"The amount of time to run a simulation before the first date of the
40+
minibatch"
41+
spinup::SPINUP
42+
43+
"The number of horizontal and vertical elements of the model. Used for the
44+
simulation and determining the ocean mask"
45+
nelements::Tuple{Int64, Int64}
46+
47+
"The directory to store the iterations and members of the calibration."
48+
output_dir::String
49+
50+
"An integer value for ensuring calibrations are the same between multiple
51+
calibrations with the same settings"
52+
rng_seed::Int64
53+
end
54+
55+
"""
56+
CalibrateConfig(;
57+
short_names,
58+
sample_date_ranges,
59+
extend,
60+
spinup = Dates.Month(3),
61+
minibatch_size,
62+
n_iterations,
63+
nelements = (101, 15),
64+
output_dir = "experiments/calibration/land_model",
65+
rng_seed = 42,
66+
)
67+
68+
Initializes a CalibrateConfig, which is of interest to a user running
69+
calibration or contains values needed in multiple places during calibration.
70+
71+
Keyword arguments
72+
=====================
73+
74+
- `short_names`: Short names of the observations. The currently supported short
75+
names are `lhf`, `shf`, `lwu`, and `swu`.
76+
77+
- `minibatch_size`: The size of the minibatch for each iteration.
78+
79+
- `n_iterations`: The number of iterations to run the calibration for.
80+
81+
- `sample_date_ranges`: The date ranges for each sample. The dates should be the
82+
same as found in the time series data of the observations. Since the land
83+
calibration calibrates using seasonal averages, the times passed must be the
84+
first day of December, March, June, or September. The seasons are December to
85+
February (DJF), March to May (MAM), June to August (JJA), and September to
86+
November (SON). In addition, the start and end dates of the simulation is
87+
automatically determined from `sample_date_ranges`.
88+
89+
- `extend`: The amount of time to run the simulation after the end date
90+
determined by `sample_date_ranges`. For seasonal averages, `extend` should be
91+
`Dates.Month(3)` and for monthly averages, `extend` should be
92+
`Dates.Month(1)`.
93+
94+
- `spinup`: The amount of time to run the simulation before the start date
95+
determined by `sample_date_ranges`.
96+
97+
- `nelements`: The resolution of the model. This is also used to determine the
98+
mask of the observations.
99+
100+
- `output_dir`: The location to save the calibration at.
101+
102+
- `rng_seed`: An integer to ensure that calibration runs with the same settings
103+
are the same.
104+
"""
105+
function CalibrateConfig(;
106+
short_names,
107+
minibatch_size,
108+
n_iterations,
109+
sample_date_ranges,
110+
extend,
111+
spinup = Dates.Month(3),
112+
nelements = (101, 15),
113+
output_dir = "experiments/calibration/land_model",
114+
rng_seed = 42,
115+
)
116+
isempty(short_names) && error("Cannot run calibration with no short names")
117+
isempty(sample_date_ranges) &&
118+
error("Cannot run calibration with no date ranges for the samples")
119+
120+
sample_date_ranges = [
121+
(Dates.DateTime(date_pair[1]), Dates.DateTime(date_pair[2])) for
122+
date_pair in sample_date_ranges
123+
]
124+
125+
for (start_date, end_date) in sample_date_ranges
126+
start_date <= end_date || error(
127+
"The start date ($start_date) should be before the end date ($end_date)",
128+
)
129+
end
130+
issorted(sample_date_ranges) ||
131+
error("The samples in $sample_date_ranges should be sorted")
132+
133+
minibatch_size > 0 ||
134+
error("The minibatch size ($minibatch_size) should be positive")
135+
n_iterations > 0 ||
136+
error("The number of iterations ($n_iterations) should be positive")
137+
138+
num_samples = length(sample_date_ranges)
139+
minibatch_size > num_samples && error(
140+
"The minibatch size is $minibatch_size, but the number of samples is $num_samples",
141+
)
142+
143+
remaining = num_samples % minibatch_size
144+
remaining == 0 || @warn(
145+
"Number of samples is not divisible by the minibatch size; the last $remaining samples may be missing when running the calibration"
146+
)
147+
148+
return CalibrateConfig(
149+
short_names,
150+
minibatch_size,
151+
n_iterations,
152+
sample_date_ranges,
153+
extend,
154+
spinup,
155+
nelements,
156+
output_dir,
157+
rng_seed,
158+
)
159+
160+
end

0 commit comments

Comments
 (0)