Skip to content

Commit de630ad

Browse files
committed
clean up perf configs
1 parent 82edd09 commit de630ad

25 files changed

+161
-383
lines changed

.buildkite/pipeline.yml

Lines changed: 69 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -508,23 +508,10 @@ steps:
508508
--data_dir prep_remap/output_active --out_dir remap_pipeline_output
509509
artifact_paths: "remap_pipeline_output/*"
510510

511-
- group: "Configs"
512-
steps:
513-
514-
# TODO: we should somehow decouple this unit test from the perf env / scripts
515-
- label: ":computer: checkbounds"
516-
command: >
517-
julia --color=yes --check-bounds=yes --project=.buildkite perf/benchmark.jl
518-
--config_file $PERF_CONFIG_PATH/checkbounds.yml
519-
--job_id checkbounds
520-
artifact_paths: "checkbounds/output_active/*"
521-
agents:
522-
slurm_mem: 20GB
523-
524511
- group: "EDOnlyEDMFX"
525512
steps:
526513

527-
- label: ":man_in_business_suit_levitating: EDOnlyEDMFX test on a sphere"
514+
- label: ":man_in_business_suit_levitating: EDOnly EDMFX aquaplanet"
528515
command: >
529516
julia --color=yes --project=.buildkite .buildkite/ci_driver.jl
530517
--config_file $CONFIG_PATH/edonly_edmfx_aquaplanet.yml
@@ -533,18 +520,6 @@ steps:
533520
agents:
534521
slurm_mem: 20GB
535522

536-
- label: ":man_in_business_suit_levitating: EDOnlyEDMFX test on a sphere GPU"
537-
command: >
538-
julia --color=yes --project=.buildkite .buildkite/ci_driver.jl
539-
--config_file $CONFIG_PATH/edonly_edmfx_aquaplanet.yml
540-
--job_id edonly_edmfx_aquaplanet_gpu
541-
artifact_paths: "edonly_edmfx_aquaplanet_gpu/output_active/*"
542-
env:
543-
CLIMACOMMS_DEVICE: "CUDA"
544-
agents:
545-
slurm_mem: 20GB
546-
slurm_gpus: 1
547-
548523
- group: "Diagnostic EDMFX"
549524
steps:
550525

@@ -647,7 +622,7 @@ steps:
647622
agents:
648623
slurm_mem: 20GB
649624

650-
- label: ":genie: Diagnostic EDMFX aquaplanet with TKE"
625+
- label: ":genie: Diagnostic EDMFX aquaplanet"
651626
command: >
652627
julia --color=yes --project=.buildkite .buildkite/ci_driver.jl
653628
--config_file $CONFIG_PATH/diagnostic_edmfx_aquaplanet.yml
@@ -682,6 +657,15 @@ steps:
682657
artifact_paths: "prognostic_edmfx_simpleplume_column/output_active/*"
683658
agents:
684659
slurm_mem: 20GB
660+
661+
- label: ":genie: Prognostic EDMFX Soares in a column"
662+
command: >
663+
julia --color=yes --project=.buildkite .buildkite/ci_driver.jl
664+
--config_file $CONFIG_PATH/prognostic_edmfx_soares_column.yml
665+
--job_id prognostic_edmfx_soares_column
666+
artifact_paths: "prognostic_edmfx_soares_column/output_active/*"
667+
agents:
668+
slurm_mem: 20GB
685669

686670
- label: ":genie: Prognostic EDMFX GABLS in a column"
687671
command: >
@@ -803,15 +787,6 @@ steps:
803787
agents:
804788
slurm_mem: 20GB
805789

806-
- label: ":genie: Prognostic EDMFX Soares in a column"
807-
command: >
808-
julia --color=yes --project=.buildkite .buildkite/ci_driver.jl
809-
--config_file $CONFIG_PATH/prognostic_edmfx_soares_column.yml
810-
--job_id prognostic_edmfx_soares_column
811-
artifact_paths: "prognostic_edmfx_soares_column/output_active/*"
812-
agents:
813-
slurm_mem: 20GB
814-
815790
- group: "GPU"
816791
steps:
817792

@@ -923,31 +898,29 @@ steps:
923898
slurm_gpus: 1
924899
slurm_mem: 16G
925900

926-
- label: ":umbrella: GPU: gpu_aquaplanet_dyamond"
927-
command:
928-
- mkdir -p gpu_aquaplanet_dyamond
929-
- >
930-
julia --color=yes --project=.buildkite .buildkite/ci_driver.jl
931-
--config_file ${CONFIG_PATH}/gpu_aquaplanet_dyamond.yml
932-
--job_id gpu_aquaplanet_dyamond
933-
artifact_paths: "gpu_aquaplanet_dyamond/output_active/*"
901+
- label: "GPU: test DYAMOND interpolated initial conditions"
902+
command: >
903+
julia --color=yes --project=.buildkite .buildkite/ci_driver.jl
904+
--config_file $GPU_CONFIG_PATH/gpu_aquaplanet_dyamond_summer.yml
905+
--job_id gpu_aquaplanet_dyamond_summer
906+
artifact_paths: "gpu_aquaplanet_dyamond_summer/output_active/*"
934907
env:
935908
CLIMACOMMS_DEVICE: "CUDA"
936909
agents:
937910
slurm_gpus: 1
938911
slurm_mem: 16G
939912

940-
- label: "GPU: test DYAMOND interpolated initial conditions"
913+
- label: "GPU: EDOnly EDMFX aquaplanet"
941914
command: >
942915
julia --color=yes --project=.buildkite .buildkite/ci_driver.jl
943-
--config_file $GPU_CONFIG_PATH/gpu_aquaplanet_dyamond_summer.yml
944-
--job_id gpu_aquaplanet_dyamond_summer
945-
artifact_paths: "gpu_aquaplanet_dyamond_summer/output_active/*"
916+
--config_file $CONFIG_PATH/edonly_edmfx_aquaplanet.yml
917+
--job_id edonly_edmfx_aquaplanet_gpu
918+
artifact_paths: "edonly_edmfx_aquaplanet_gpu/output_active/*"
946919
env:
947920
CLIMACOMMS_DEVICE: "CUDA"
948921
agents:
922+
slurm_mem: 20GB
949923
slurm_gpus: 1
950-
slurm_mem: 16G
951924

952925
- label: "GPU: Diagnostic EDMFX aquaplanet"
953926
key: "diagnostic_edmfx_aquaplanet_gpu"
@@ -977,47 +950,34 @@ steps:
977950
- group: "Benchmarks"
978951
steps:
979952

980-
- label: ":computer: Benchmark: GPU implicit baro wave"
953+
- label: ":computer: Benchmark: CPU baroclinic wave moist"
981954
command: >
982-
983955
julia --color=yes --project=.buildkite perf/benchmark_step.jl
984-
--config_file $PERF_CONFIG_PATH/gpu_implicit_barowave.yml
985-
--job_id gpu_implicit_barowave
986-
artifact_paths: "gpu_implicit_barowave/output_active/*"
987-
env:
988-
CLIMACOMMS_DEVICE: "CUDA"
989-
agents:
990-
slurm_gpus: 1
956+
--config_file $PERF_CONFIG_PATH/bm_baroclinic_wave_moist.yml
957+
--job_id bm_baroclinic_wave_moist
958+
artifact_paths: "bm_baroclinic_wave_moist/output_active/*"
991959

992-
- label: ":computer: Benchmark: GPU implicit baro wave moist"
960+
- label: ":computer: Benchmark: GPU baroclinic wave moist"
993961
command: >
994962
995963
julia --color=yes --project=.buildkite perf/benchmark_step.jl
996-
--config_file $PERF_CONFIG_PATH/gpu_implicit_barowave_moist.yml
997-
--job_id gpu_implicit_barowave_moist
998-
artifact_paths: "gpu_implicit_barowave_moist/output_active/*"
964+
--config_file $PERF_CONFIG_PATH/bm_baroclinic_wave_moist.yml
965+
--job_id bm_baroclinic_wave_moist_gpu
966+
artifact_paths: "bm_baroclinic_wave_moist_gpu/output_active/*"
999967
env:
1000968
CLIMACOMMS_DEVICE: "CUDA"
1001969
agents:
1002970
slurm_mem: 16G
1003971
slurm_gpus: 1
1004972

1005-
- label: ":computer: Benchmark: CPU implicit baro wave"
1006-
command: >
1007-
julia --color=yes --project=.buildkite perf/benchmark_step.jl
1008-
--config_file $PERF_CONFIG_PATH/cpu_implicit_barowave.yml
1009-
--job_id cpu_implicit_barowave
1010-
artifact_paths: "cpu_implicit_barowave/output_active/*"
1011-
1012-
# Benchmarks
1013973
- label: ":computer: Benchmark: CPU perf target (default)"
1014974
command: >
1015975
julia --color=yes --project=.buildkite perf/benchmark.jl
1016976
--config_file $PERF_CONFIG_PATH/bm_perf_target.yml
1017977
--job_id bm_perf_target
1018978
agents:
1019979
slurm_mem: 24GB
1020-
980+
1021981
- label: ":computer: Benchmark: GPU perf target"
1022982
command: >
1023983
julia --color=yes --project=.buildkite perf/benchmark.jl
@@ -1055,6 +1015,19 @@ steps:
10551015
- group: "Flame graphs"
10561016
steps:
10571017

1018+
- label: ":fire: Flame graph: gpu job"
1019+
command: >
1020+
julia --color=yes --project=.buildkite perf/flame.jl
1021+
--config_file $PERF_CONFIG_PATH/bm_baroclinic_wave_moist.yml
1022+
--job_id flame_baroclinic_wave_moist_gpu
1023+
artifact_paths: "flame_baroclinic_wave_moist_gpu/*"
1024+
env:
1025+
CLIMACOMMS_DEVICE: "CUDA"
1026+
agents:
1027+
slurm_mem: 48GB
1028+
slurm_gpus: 1
1029+
gres: "gpu:p100:1"
1030+
10581031
- label: ":fire: Flame graph: perf target (default)"
10591032
command: >
10601033
julia --color=yes --project=.buildkite perf/flame.jl
@@ -1072,6 +1045,15 @@ steps:
10721045
artifact_paths: "flame_perf_target_tracers/*"
10731046
agents:
10741047
slurm_mem: 24GB
1048+
1049+
- label: ":fire: Flame graph: perf target (diagnostics)"
1050+
command: >
1051+
julia --color=yes --project=.buildkite perf/flame.jl
1052+
--config_file $PERF_CONFIG_PATH/flame_perf_diagnostics.yml
1053+
--job_id flame_perf_diagnostics
1054+
artifact_paths: "flame_perf_diagnostics/*"
1055+
agents:
1056+
slurm_mem: 24GB
10751057

10761058
- label: ":fire: Flame graph: perf target (diagnostic edmfx)"
10771059
command: >
@@ -1082,12 +1064,12 @@ steps:
10821064
agents:
10831065
slurm_mem: 24GB
10841066

1085-
- label: ":fire: Flame graph: perf target (prognostic edmfx aquaplanet)"
1067+
- label: ":fire: Flame graph: perf target (prognostic edmfx)"
10861068
command: >
10871069
julia --color=yes --project=.buildkite perf/flame.jl
1088-
--config_file $PERF_CONFIG_PATH/flame_perf_target_prognostic_edmfx_aquaplanet.yml
1089-
--job_id flame_perf_target_prognostic_edmfx_aquaplanet
1090-
artifact_paths: "flame_perf_target_prognostic_edmfx_aquaplanet/*"
1070+
--config_file $PERF_CONFIG_PATH/flame_perf_target_prognostic_edmfx.yml
1071+
--job_id flame_perf_target_prognostic_edmfx
1072+
artifact_paths: "flame_perf_target_prognostic_edmfx/*"
10911073
agents:
10921074
slurm_mem: 32GB
10931075

@@ -1118,36 +1100,25 @@ steps:
11181100
agents:
11191101
slurm_mem: 24GB
11201102

1121-
- label: ":fire: Flame graph: perf target (diagnostics)"
1122-
command: >
1123-
julia --color=yes --project=.buildkite perf/flame.jl
1124-
--config_file $PERF_CONFIG_PATH/flame_perf_diagnostics.yml
1125-
--job_id flame_perf_diagnostics
1126-
artifact_paths: "flame_perf_diagnostics/*"
1127-
agents:
1128-
slurm_mem: 24GB
1103+
- group: "Checkbounds/Inference/Invalidations"
1104+
steps:
11291105

1130-
- label: ":fire: Flame graph: gpu job"
1106+
# TODO: we should somehow decouple this unit test from the perf env / scripts
1107+
# Checkbounds
1108+
- label: ":computer: checkbounds"
11311109
command: >
1132-
julia --color=yes --project=.buildkite perf/flame.jl
1133-
--config_file $PERF_CONFIG_PATH/flame_gpu_implicit_barowave_moist.yml
1134-
--job_id flame_gpu_implicit_barowave_moist
1135-
artifact_paths: "flame_gpu_implicit_barowave_moist/*"
1136-
env:
1137-
CLIMACOMMS_DEVICE: "CUDA"
1110+
julia --color=yes --check-bounds=yes --project=.buildkite perf/benchmark.jl
1111+
--config_file $PERF_CONFIG_PATH/flame_perf_target.yml
1112+
--job_id checkbounds
1113+
artifact_paths: "checkbounds/output_active/*"
11381114
agents:
1139-
slurm_mem: 48GB
1140-
slurm_gpus: 1
1141-
gres: "gpu:p100:1"
1142-
1143-
- group: "Inference/Invalidations"
1144-
steps:
1115+
slurm_mem: 20GB
11451116

11461117
# Inference
11471118
- label: ":rocket: JET n-failures (inference)"
11481119
command: >
11491120
julia --color=yes --project=.buildkite perf/jet_test_nfailures.jl
1150-
--config_file $PERF_CONFIG_PATH/jet_n_failures.yml
1121+
--config_file $PERF_CONFIG_PATH/flame_perf_target.yml
11511122
--job_id jet_n_failures
11521123
agents:
11531124
slurm_mem: 24GB
@@ -1156,6 +1127,7 @@ steps:
11561127
- label: ":mag::rocket: Invalidations"
11571128
command: >
11581129
julia --color=yes --project=.buildkite perf/invalidations.jl
1130+
--config_file $PERF_CONFIG_PATH/flame_perf_target.yml
11591131
artifact_paths: "invalidations/*"
11601132
agents:
11611133
slurm_mem: 24GB

config/gpu_configs/gpu_aquaplanet_dyamond_summer.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,8 @@ prescribed_aerosols: ["CB1", "CB2", "DST01", "DST02", "DST03", "DST04", "DST05",
2525
start_date: "20160801"
2626
initial_condition: "artifact\"DYAMOND_SUMMER_ICS_p98deg\"/DYAMOND_SUMMER_ICS_p98deg.nc"
2727
topography: "Earth"
28+
diagnostics:
29+
- short_name: [massa, energya]
30+
period: 1hours
31+
writer: dict
32+

config/model_configs/gpu_aquaplanet_dyamond.yml

Lines changed: 0 additions & 30 deletions
This file was deleted.

config/model_configs/single_column_radiative_equilibrium_clearsky_prognostic_surface_temp.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,4 @@ dt_rad: "3hours"
1212
dt_save_to_sol: "30hours"
1313
dt_save_state_to_disk: "100days"
1414
prognostic_surface: true
15-
surface_setup: DefaultExchangeCoefficients
1615
toml: [toml/single_column_radiative_equilibrium_clearsky_prognostic_surface_temp.toml]
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
apply_limiter: true
2+
h_elem: 12
3+
z_elem: 25
4+
dt: "1secs"
5+
t_end: "1mins"
6+
dt_save_to_sol: "Inf"
7+
log_progress: false
8+
moist: "equil"
9+
initial_condition: "MoistBaroclinicWave"
10+
precip_model: "0M"
Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,15 @@
1-
moist: "equil"
21
apply_limiter: true
2+
h_elem: 12
33
z_elem: 25
44
dt: "1secs"
5-
surface_setup: "DefaultExchangeCoefficients"
65
t_end: "10secs"
7-
vert_diff: true
6+
dt_save_to_sol: "Inf"
7+
log_progress: false
88
implicit_diffusion: true
99
approximate_linear_solve_iters: 2
10-
h_elem: 12
11-
forcing: "held_suarez"
12-
precip_model: "0M"
13-
dt_save_to_sol: "Inf"
10+
moist: "equil"
11+
surface_setup: "DefaultMoninObukhov"
1412
rad: "allskywithclear"
15-
log_progress: false
13+
vert_diff: true
14+
precip_model: "0M"
1615
perf_summary: true

config/perf_configs/checkbounds.yml

Lines changed: 0 additions & 16 deletions
This file was deleted.

0 commit comments

Comments
 (0)