Skip to content

Commit eb3193e

Browse files
committed
clean up benchmark and flame graph jobs
1 parent 719cc53 commit eb3193e

13 files changed

+77
-161
lines changed

.buildkite/pipeline.yml

Lines changed: 52 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -828,48 +828,32 @@ steps:
828828
depends_on:
829829
- "baroclinic_wave"
830830
- "baroclinic_wave_gpu"
831-
832-
- label: "GPU: GPU dry baroclinic wave"
833-
key: "baroclinic_wave_helem30"
834-
command:
835-
- mkdir -p baroclinic_wave_helem30
836-
- >
837-
nsys profile --delay 100 --trace=nvtx,cuda --output=baroclinic_wave_helem30/output_active/report
838-
julia --color=yes --project=.buildkite .buildkite/ci_driver.jl
839-
--config_file ${GPU_CONFIG_PATH}/baroclinic_wave_helem30.yml
840-
--job_id baroclinic_wave_helem30
841-
artifact_paths: "baroclinic_wave_helem30/output_active/*"
842-
env:
843-
CLIMACOMMS_DEVICE: "CUDA"
844-
agents:
845-
slurm_mem: 32GB
846-
slurm_gpus: 1
847831

848832

849-
- label: "GPU: GPU dry baroclinic wave - 4 gpus"
850-
key: "baroclinic_wave_helem30_4process"
833+
- label: "GPU: baroclinic wave - 2 gpus"
834+
key: "baroclinic_wave_2gpu"
851835
command:
852-
- mkdir -p baroclinic_wave_helem30_4process
836+
- mkdir -p baroclinic_wave_2gpu
853837
# - >
854838
# srun --cpu-bind=threads --cpus-per-task=4
855-
# nsys profile --delay 100 --trace=nvtx,cuda,mpi --output=baroclinic_wave_helem30_4process/output_active/report-%q{PMI_RANK}
839+
# nsys profile --delay 100 --trace=nvtx,cuda,mpi --output=baroclinic_wave_2gpu/output_active/report-%q{PMI_RANK}
856840
# julia --threads=3 --color=yes --project=.buildkite .buildkite/ci_driver.jl
857-
# --config_file ${GPU_CONFIG_PATH}/baroclinic_wave_helem30.yml
858-
# --job_id baroclinic_wave_helem30_4process
841+
# --config_file ${CONFIG_PATH}/baroclinic_wave.yml
842+
# --job_id baroclinic_wave_2gpu
859843
- >
860844
srun --cpu-bind=threads --cpus-per-task=4
861845
julia --threads=3 --color=yes --project=.buildkite .buildkite/ci_driver.jl
862-
--config_file ${GPU_CONFIG_PATH}/baroclinic_wave_helem30.yml
863-
--job_id baroclinic_wave_helem30_4process
864-
artifact_paths: "baroclinic_wave_helem30_4process/output_active/*"
846+
--config_file ${CONFIG_PATH}/baroclinic_wave.yml
847+
--job_id baroclinic_wave_2gpu
848+
artifact_paths: "baroclinic_wave_2gpu/output_active/*"
865849
env:
866850
CLIMACOMMS_CONTEXT: "MPI"
867851
CLIMACOMMS_DEVICE: "CUDA"
868852
agents:
869853
slurm_gpus_per_task: 1
870854
slurm_cpus_per_task: 4
871-
slurm_ntasks: 4
872-
slurm_mem: 32GB
855+
slurm_ntasks: 2
856+
slurm_mem: 16GB
873857

874858
- label: "GPU: test DYAMOND interpolated initial conditions"
875859
command: >
@@ -942,19 +926,19 @@ steps:
942926
slurm_mem: 16G
943927
slurm_gpus: 1
944928

945-
- label: ":computer: Benchmark: CPU perf target (default)"
929+
- label: ":computer: Benchmark: CPU default"
946930
command: >
947931
julia --color=yes --project=.buildkite perf/benchmark.jl
948-
--config_file $PERF_CONFIG_PATH/bm_perf_target.yml
949-
--job_id bm_perf_target
932+
--config_file $PERF_CONFIG_PATH/bm_default.yml
933+
--job_id bm_default
950934
agents:
951935
slurm_mem: 24GB
952936

953-
- label: ":computer: Benchmark: GPU perf target"
937+
- label: ":computer: Benchmark: GPU default"
954938
command: >
955939
julia --color=yes --project=.buildkite perf/benchmark.jl
956-
--config_file $PERF_CONFIG_PATH/bm_perf_target.yml
957-
--job_id bm_perf_target_gpu
940+
--config_file $PERF_CONFIG_PATH/bm_default.yml
941+
--job_id bm_default_gpu
958942
env:
959943
CLIMACOMMS_DEVICE: "CUDA"
960944
agents:
@@ -964,7 +948,7 @@ steps:
964948
- label: ":computer: Benchmark: GPU diag edmf"
965949
command: >
966950
julia --color=yes --project=.buildkite perf/benchmark.jl
967-
--config_file ${CONFIG_PATH}/aquaplanet_diagedmf.yml
951+
--config_file $PERF_CONFIG_PATH/bm_aquaplanet_diagedmf.yml
968952
--job_id bm_diag_edmf_gpu
969953
env:
970954
CLIMACOMMS_DEVICE: "CUDA"
@@ -975,14 +959,13 @@ steps:
975959
- label: ":computer: Benchmark: GPU prog edmf"
976960
command: >
977961
julia --color=yes --project=.buildkite perf/benchmark.jl
978-
--config_file ${CONFIG_PATH}/aquaplanet_progedmf.yml
962+
--config_file $PERF_CONFIG_PATH/bm_aquaplanet_progedmf.yml
979963
--job_id bm_prog_edmf_gpu
980964
env:
981965
CLIMACOMMS_DEVICE: "CUDA"
982966
agents:
983967
slurm_mem: 24GB
984968
slurm_gpus: 1
985-
soft_fail: true
986969

987970
- group: "Flame graphs"
988971
steps:
@@ -1000,75 +983,75 @@ steps:
1000983
slurm_gpus: 1
1001984
gres: "gpu:p100:1"
1002985

1003-
- label: ":fire: Flame graph: perf target (default)"
986+
- label: ":fire: Flame graph: perf target "
1004987
command: >
1005988
julia --color=yes --project=.buildkite perf/flame.jl
1006-
--config_file $PERF_CONFIG_PATH/flame_perf_target.yml
1007-
--job_id flame_perf_target
1008-
artifact_paths: "flame_perf_target/*"
989+
--config_file $PERF_CONFIG_PATH/bm_default.yml
990+
--job_id flame_default
991+
artifact_paths: "flame_default/*"
1009992
agents:
1010993
slurm_mem: 24GB
1011994

1012-
- label: ":fire: Flame graph: perf target (with tracers)"
995+
- label: ":fire: Flame graph: 1 moment"
1013996
command: >
1014997
julia --color=yes --project=.buildkite perf/flame.jl
1015-
--config_file $PERF_CONFIG_PATH/flame_perf_target_tracers.yml
1016-
--job_id flame_perf_target_tracers
1017-
artifact_paths: "flame_perf_target_tracers/*"
998+
--config_file $PERF_CONFIG_PATH/bm_default_1m.yml
999+
--job_id flame_default_1m
1000+
artifact_paths: "flame_default_1m/*"
10181001
agents:
10191002
slurm_mem: 24GB
10201003

1021-
- label: ":fire: Flame graph: perf target (diagnostics)"
1004+
- label: ":fire: Flame graph: diagnostics"
10221005
command: >
10231006
julia --color=yes --project=.buildkite perf/flame.jl
1024-
--config_file $PERF_CONFIG_PATH/flame_perf_diagnostics.yml
1025-
--job_id flame_perf_diagnostics
1026-
artifact_paths: "flame_perf_diagnostics/*"
1007+
--config_file $PERF_CONFIG_PATH/bm_diagnostics.yml
1008+
--job_id flame_diagnostics
1009+
artifact_paths: "flame_diagnostics/*"
10271010
agents:
10281011
slurm_mem: 24GB
10291012

1030-
- label: ":fire: Flame graph: perf target (diagnostic edmfx)"
1013+
- label: ":fire: Flame graph: diagnostics edmf"
10311014
command: >
10321015
julia --color=yes --project=.buildkite perf/flame.jl
1033-
--config_file $PERF_CONFIG_PATH/flame_perf_target_diagnostic_edmfx.yml
1034-
--job_id flame_perf_target_diagnostic_edmfx
1035-
artifact_paths: "flame_perf_target_diagnostic_edmfx/*"
1016+
--config_file $PERF_CONFIG_PATH/bm_aquaplanet_diagedmf.yml
1017+
--job_id flame_aquaplanet_diagedmf
1018+
artifact_paths: "flame_aquaplanet_diagedmf/*"
10361019
agents:
10371020
slurm_mem: 24GB
10381021

1039-
- label: ":fire: Flame graph: perf target (prognostic edmfx)"
1022+
- label: ":fire: Flame graph: prognostic edmf"
10401023
command: >
10411024
julia --color=yes --project=.buildkite perf/flame.jl
1042-
--config_file $PERF_CONFIG_PATH/flame_perf_target_prognostic_edmfx.yml
1043-
--job_id flame_perf_target_prognostic_edmfx
1044-
artifact_paths: "flame_perf_target_prognostic_edmfx/*"
1025+
--config_file $PERF_CONFIG_PATH/bm_aquaplanet_progedmf.yml
1026+
--job_id flame_aquaplanet_progedmf
1027+
artifact_paths: "flame_aquaplanet_progedmf/*"
10451028
agents:
10461029
slurm_mem: 32GB
10471030

1048-
- label: ":fire: Flame graph: perf target (diffusion)"
1031+
- label: ":fire: Flame graph: diffusion"
10491032
command: >
10501033
julia --color=yes --project=.buildkite perf/flame.jl
1051-
--config_file $PERF_CONFIG_PATH/flame_perf_target_diffusion.yml
1052-
--job_id flame_perf_target_diffusion
1053-
artifact_paths: "flame_perf_target_diffusion/*"
1034+
--config_file $PERF_CONFIG_PATH/bm_diffusion.yml
1035+
--job_id flame_diffusion
1036+
artifact_paths: "flame_diffusion/*"
10541037
agents:
10551038
slurm_mem: 24GB
10561039

10571040
- label: ":fire: Flame graph: perf target (Callbacks)"
10581041
command: >
10591042
julia --color=yes --project=.buildkite perf/flame.jl
1060-
--config_file $PERF_CONFIG_PATH/flame_perf_target_callbacks.yml
1061-
--job_id flame_perf_target_callbacks
1062-
artifact_paths: "flame_perf_target_callbacks/*"
1043+
--config_file $PERF_CONFIG_PATH/bm_callbacks.yml
1044+
--job_id flame_callbacks
1045+
artifact_paths: "flame_callbacks/*"
10631046
agents:
10641047
slurm_mem: 24GB
10651048

10661049
- label: ":fire: Flame graph: gravity wave"
10671050
command: >
10681051
julia --color=yes --project=.buildkite perf/flame.jl
1069-
--config_file $PERF_CONFIG_PATH/flame_perf_gw.yml
1070-
--job_id flame_perf_gw
1071-
artifact_paths: "flame_perf_gw/*"
1052+
--config_file $PERF_CONFIG_PATH/bm_gravity_wave.yml
1053+
--job_id flame_gravity_wave
1054+
artifact_paths: "flame_gravity_wave/*"
10721055
agents:
10731056
slurm_mem: 24GB
10741057

@@ -1080,7 +1063,7 @@ steps:
10801063
- label: ":computer: checkbounds"
10811064
command: >
10821065
julia --color=yes --check-bounds=yes --project=.buildkite perf/benchmark.jl
1083-
--config_file $PERF_CONFIG_PATH/flame_perf_target.yml
1066+
--config_file $PERF_CONFIG_PATH/bm_default.yml
10841067
--job_id checkbounds
10851068
artifact_paths: "checkbounds/output_active/*"
10861069
agents:
@@ -1090,7 +1073,7 @@ steps:
10901073
- label: ":rocket: JET n-failures (inference)"
10911074
command: >
10921075
julia --color=yes --project=.buildkite perf/jet_test_nfailures.jl
1093-
--config_file $PERF_CONFIG_PATH/flame_perf_target.yml
1076+
--config_file $PERF_CONFIG_PATH/bm_default.yml
10941077
--job_id jet_n_failures
10951078
agents:
10961079
slurm_mem: 24GB
@@ -1099,7 +1082,7 @@ steps:
10991082
- label: ":mag::rocket: Invalidations"
11001083
command: >
11011084
julia --color=yes --project=.buildkite perf/invalidations.jl
1102-
--config_file $PERF_CONFIG_PATH/flame_perf_target.yml
1085+
--config_file $PERF_CONFIG_PATH/bm_default.yml
11031086
artifact_paths: "invalidations/*"
11041087
agents:
11051088
slurm_mem: 24GB
Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,20 @@
1+
h_elem: 12
2+
z_elem: 25
3+
dt: 90secs
4+
t_end: 61mins
15
dt_save_state_to_disk: "Inf"
26
dt_save_to_sol: "Inf"
3-
output_default_diagnostics: false
4-
h_elem: 30
5-
z_max: 60000.0
6-
z_elem: 63
7-
dz_bottom: 30.0
7+
log_progress: false
88
rayleigh_sponge: true
99
viscous_sponge: true
10+
implicit_diffusion: true
11+
approximate_linear_solve_iters: 2
1012
moist: equil
1113
surface_setup: DefaultMoninObukhov
1214
rad: allskywithclear
13-
co2_model: maunaloa
14-
insolation: "timevarying"
1515
dt_rad: 1hours
1616
dt_cloud_fraction: 1hours
1717
turbconv: diagnostic_edmfx
18-
implicit_diffusion: true
19-
approximate_linear_solve_iters: 2
2018
prognostic_tke: true
2119
edmfx_upwinding: first_order
2220
edmfx_entr_model: "Generalized"
@@ -26,6 +24,4 @@ edmfx_sgs_mass_flux: true
2624
edmfx_sgs_diffusive_flux: true
2725
cloud_model: "quadrature_sgs"
2826
precip_model: 0M
29-
dt: 90secs
30-
t_end: 61mins
3127
toml: [toml/diagnostic_edmfx.toml]
Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,21 @@
1+
h_elem: 12
2+
z_elem: 25
3+
dt: 10secs
4+
t_end: 61mins
15
dt_save_state_to_disk: "Inf"
26
dt_save_to_sol: "Inf"
3-
output_default_diagnostics: false
4-
h_elem: 30
5-
z_max: 60000.0
6-
z_elem: 63
7-
dz_bottom: 30.0
7+
log_progress: false
88
rayleigh_sponge: true
99
viscous_sponge: true
10+
implicit_diffusion: true
11+
implicit_sgs_advection: true
12+
approximate_linear_solve_iters: 2
1013
moist: equil
1114
surface_setup: DefaultMoninObukhov
1215
rad: allskywithclear
13-
co2_model: fixed
14-
insolation: "timevarying"
1516
dt_rad: 1hours
1617
dt_cloud_fraction: 1hours
1718
turbconv: prognostic_edmfx
18-
implicit_diffusion: true
19-
implicit_sgs_advection: true
20-
approximate_linear_solve_iters: 2
2119
max_newton_iters_ode: 3
2220
prognostic_tke: true
2321
edmfx_upwinding: first_order
@@ -27,7 +25,6 @@ edmfx_nh_pressure: true
2725
edmfx_filter: true
2826
edmfx_sgs_mass_flux: true
2927
edmfx_sgs_diffusive_flux: true
28+
cloud_model: "quadrature_sgs"
3029
precip_model: 0M
31-
dt: 10secs
32-
t_end: 61mins
3330
toml: [toml/prognostic_edmfx.toml]

config/perf_configs/bm_perf_target.yml renamed to config/perf_configs/bm_default.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
apply_limiter: true
21
h_elem: 12
32
z_elem: 25
43
dt: "1secs"

config/perf_configs/flame_perf_target.yml

Lines changed: 0 additions & 14 deletions
This file was deleted.

0 commit comments

Comments
 (0)