Skip to content

Commit acdc4e7

Browse files
authored
Merge pull request #1402 from CliMA/kp/rmse-regression
Add check for RMSEs
2 parents b1ecb02 + 62f55c3 commit acdc4e7

File tree

3 files changed

+132
-11
lines changed

3 files changed

+132
-11
lines changed

experiments/ClimaEarth/leaderboard/data_sources.jl

Lines changed: 47 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -78,14 +78,18 @@ includes unit conversion and shifting the dates.
7878
The variable should have only three dimensions: latitude, longitude, and time.
7979
"""
8080
function get_sim_var_dict(diagnostics_folder_path)
81-
# List of short names
82-
available_short_names = ClimaAnalysis.available_vars(ClimaAnalysis.SimDir(diagnostics_folder_path))
81+
available_short_names = get_short_names_monthly_averages(diagnostics_folder_path)
8382
sim_var_dict = Dict{String, Any}()
8483
# Dict for loading in simulation data
8584
"pr" in available_short_names && (
8685
sim_var_dict["pr"] =
8786
() -> begin
88-
sim_var = get(ClimaAnalysis.SimDir(diagnostics_folder_path), short_name = "pr")
87+
sim_var = get(
88+
ClimaAnalysis.SimDir(diagnostics_folder_path),
89+
short_name = "pr",
90+
reduction = "average",
91+
period = "1M",
92+
)
8993
sim_var = ClimaAnalysis.convert_units(
9094
sim_var,
9195
"mm/day",
@@ -101,7 +105,12 @@ function get_sim_var_dict(diagnostics_folder_path)
101105
short_name in available_short_names && (
102106
sim_var_dict[short_name] =
103107
() -> begin
104-
sim_var = get(ClimaAnalysis.SimDir(diagnostics_folder_path), short_name = short_name)
108+
sim_var = get(
109+
ClimaAnalysis.SimDir(diagnostics_folder_path),
110+
short_name = short_name,
111+
reduction = "average",
112+
period = "1M",
113+
)
105114
sim_var = ClimaAnalysis.shift_to_start_of_previous_month(sim_var)
106115
return sim_var
107116
end
@@ -224,13 +233,24 @@ function get_sim_var_in_pfull_dict(diagnostics_folder_path)
224233
available_short_names = ClimaAnalysis.available_vars(ClimaAnalysis.SimDir(diagnostics_folder_path))
225234
sim_var_pfull_dict = Dict{String, Any}()
226235

227-
short_names = ["ta", "hur", "hus"]
236+
short_names = get_short_names_monthly_averages(diagnostics_folder_path)
237+
available_short_names = intersect(short_names, Set(["ta", "hur", "hus"]))
228238
for short_name in short_names
229239
short_name in available_short_names && (
230240
sim_var_pfull_dict[short_name] =
231241
() -> begin
232-
sim_var = get(ClimaAnalysis.SimDir(diagnostics_folder_path), short_name = short_name)
233-
pfull_var = get(ClimaAnalysis.SimDir(diagnostics_folder_path), short_name = "pfull")
242+
sim_var = get(
243+
ClimaAnalysis.SimDir(diagnostics_folder_path),
244+
short_name = short_name,
245+
reduction = "average",
246+
period = "1M",
247+
)
248+
pfull_var = get(
249+
ClimaAnalysis.SimDir(diagnostics_folder_path),
250+
short_name = "pfull",
251+
reduction = "average",
252+
period = "1M",
253+
)
234254

235255
(ClimaAnalysis.units(sim_var) == "") &&
236256
(sim_var = ClimaAnalysis.set_units(sim_var, "unitless"))
@@ -360,3 +380,23 @@ function get_compare_vars_biases_heatmap_extrema_pfull()
360380
compare_vars_biases_heatmap_extrema = Dict("ta" => (-10.0, 10.0), "hur" => (-0.4, 0.4), "hus" => (-0.001, 0.001))
361381
return compare_vars_biases_heatmap_extrema
362382
end
383+
384+
"""
385+
get_short_names_of_monthly_averages(diagnostics_folder_path)
386+
387+
Get all the short names of the monthly averages.
388+
"""
389+
function get_short_names_monthly_averages(diagnostics_folder_path)
390+
available_short_names = Set{String}()
391+
simdir = ClimaAnalysis.SimDir(diagnostics_folder_path)
392+
for short_name in ClimaAnalysis.available_vars(simdir)
393+
for reduction in ClimaAnalysis.available_reductions(simdir, short_name = short_name)
394+
for period in ClimaAnalysis.available_periods(simdir, short_name = short_name, reduction = reduction)
395+
if reduction == "average" && period == "1M"
396+
push!(available_short_names, short_name)
397+
end
398+
end
399+
end
400+
end
401+
return available_short_names
402+
end
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
import ClimaAnalysis
2+
import Dates
3+
import Test: @test, @testset
4+
5+
include("data_sources.jl")
6+
7+
"""
8+
test_rmse_thresholds(diagnostics_folder_path, spinup)
9+
10+
Test that the annual RMSE values for specific variables have not increased
11+
beyond acceptable thresholds. The variables tested are:
12+
- pr (precipitation)
13+
- rsut (top of atmosphere outgoing shortwave radiation)
14+
- rsutcs (clear-sky top of atmosphere outgoing shortwave radiation)
15+
16+
The spinup is the number of months to remove from the beginning of the
17+
simulation.
18+
19+
More variables can be added by adding the short name and RMSE pair to the
20+
dictionary returned by `get_rmse_thresholds`.
21+
22+
If this test fails, it indicates a regression in the model's physics, resulting
23+
in a higher RMSE. If this increased RMSE is considered acceptable, then the
24+
thresholds should be updated accordingly.
25+
"""
26+
function test_rmse_thresholds(diagnostics_folder_path, spinup)
27+
sim_var_dict = get_sim_var_dict(diagnostics_folder_path)
28+
obs_var_dict = get_obs_var_dict()
29+
rmse_thresholds = get_rmse_thresholds()
30+
31+
sim_vars = (sim_var_dict[short_name]() for short_name in keys(rmse_thresholds))
32+
obs_vars =
33+
(obs_var_dict[ClimaAnalysis.short_name(sim_var)](sim_var.attributes["start_date"]) for sim_var in sim_vars)
34+
short_names = (ClimaAnalysis.short_name(var) for var in sim_vars)
35+
36+
rmses = map(sim_vars, obs_vars) do sim_var, obs_var
37+
# Remove first spin_up_months from simulation
38+
spinup_cutoff = spinup * 31 * 86400.0
39+
ClimaAnalysis.times(sim_var)[end] >= spinup_cutoff &&
40+
(sim_var = ClimaAnalysis.window(sim_var, "time", left = spinup_cutoff))
41+
42+
obs_var = ClimaAnalysis.resampled_as(obs_var, sim_var)
43+
obs_var = ClimaAnalysis.average_time(obs_var)
44+
sim_var = ClimaAnalysis.average_time(sim_var)
45+
46+
ClimaAnalysis.global_rmse(sim_var, obs_var)
47+
end
48+
49+
@testset "RMSE thresholds" begin
50+
for (short_name, rmse) in zip(short_names, rmses)
51+
@info "RMSE for $short_name: $rmse"
52+
@test rmse < rmse_thresholds[short_name]
53+
end
54+
end
55+
end
56+
57+
"""
58+
get_rmse_thresholds()
59+
60+
Return a dictionary mapping short names to maximum acceptable RMSE values.
61+
"""
62+
function get_rmse_thresholds()
63+
rmse_thresholds = Dict(
64+
"pr" => 3.0, # mm/day
65+
"rsut" => 20.0, # W/m²
66+
"rsutcs" => 7.0, # W/m²
67+
)
68+
return rmse_thresholds
69+
end
70+
71+
if abspath(PROGRAM_FILE) == @__FILE__
72+
if length(ARGS) != 1
73+
error("Usage: julia test_rmses.jl <Filepath to simulation data>")
74+
end
75+
leaderboard_base_path = ARGS[begin]
76+
spinup = 3
77+
test_rmse_thresholds(leaderboard_base_path, spinup)
78+
end

experiments/ClimaEarth/user_io/postprocessing.jl

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
include("debug_plots.jl")
33
include("diagnostics_plots.jl")
44
include("../leaderboard/leaderboard.jl")
5+
include("../leaderboard/test_rmses.jl")
56

67
"""
78
postprocess_sim(cs, postprocessing_vars)
@@ -32,14 +33,16 @@ function postprocess_sim(cs, postprocessing_vars)
3233

3334
# If we have enough data (in time, but also enough variables), plot the leaderboard.
3435
# We need pressure to compute the leaderboard.
35-
pressure_in_output = "pfull" in CAN.available_vars(CAN.SimDir(atmos_output_dir))
36-
if pressure_in_output
37-
times = CAN.times(get(CAN.SimDir(atmos_output_dir), "pfull"))
36+
simdir = CAN.SimDir(atmos_output_dir)
37+
if !isempty(simdir)
38+
pressure_in_output = "pfull" in CAN.available_vars(simdir)
39+
times = CAN.times(get(simdir, first(CAN.available_vars(simdir))))
3840
t_end = times[end]
3941
if t_end > 84600 * 31 * 3 # 3 months for spin up
4042
leaderboard_base_path = artifact_dir
4143
compute_leaderboard(leaderboard_base_path, atmos_output_dir, 3)
42-
compute_pfull_leaderboard(leaderboard_base_path, atmos_output_dir, 6)
44+
test_rmse_thresholds(atmos_output_dir, 3)
45+
pressure_in_output && compute_pfull_leaderboard(leaderboard_base_path, atmos_output_dir, 6)
4346
end
4447
end
4548

0 commit comments

Comments
 (0)