@@ -828,48 +828,32 @@ steps:
828
828
depends_on :
829
829
- " baroclinic_wave"
830
830
- " baroclinic_wave_gpu"
831
-
832
- - label : " GPU: GPU dry baroclinic wave"
833
- key : " baroclinic_wave_helem30"
834
- command :
835
- - mkdir -p baroclinic_wave_helem30
836
- - >
837
- nsys profile --delay 100 --trace=nvtx,cuda --output=baroclinic_wave_helem30/output_active/report
838
- julia --color=yes --project=.buildkite .buildkite/ci_driver.jl
839
- --config_file ${GPU_CONFIG_PATH}/baroclinic_wave_helem30.yml
840
- --job_id baroclinic_wave_helem30
841
- artifact_paths : " baroclinic_wave_helem30/output_active/*"
842
- env :
843
- CLIMACOMMS_DEVICE : " CUDA"
844
- agents :
845
- slurm_mem : 32GB
846
- slurm_gpus : 1
847
831
848
832
849
- - label : " GPU: GPU dry baroclinic wave - 4 gpus"
850
- key : " baroclinic_wave_helem30_4process "
833
+ - label : " GPU: baroclinic wave - 2 gpus"
834
+ key : " baroclinic_wave_2gpu "
851
835
command :
852
- - mkdir -p baroclinic_wave_helem30_4process
836
+ - mkdir -p baroclinic_wave_2gpu
853
837
# - >
854
838
# srun --cpu-bind=threads --cpus-per-task=4
855
- # nsys profile --delay 100 --trace=nvtx,cuda,mpi --output=baroclinic_wave_helem30_4process /output_active/report-%q{PMI_RANK}
839
+ # nsys profile --delay 100 --trace=nvtx,cuda,mpi --output=baroclinic_wave_2gpu /output_active/report-%q{PMI_RANK}
856
840
# julia --threads=3 --color=yes --project=.buildkite .buildkite/ci_driver.jl
857
- # --config_file ${GPU_CONFIG_PATH}/baroclinic_wave_helem30 .yml
858
- # --job_id baroclinic_wave_helem30_4process
841
+ # --config_file ${CONFIG_PATH}/baroclinic_wave .yml
842
+ # --job_id baroclinic_wave_2gpu
859
843
- >
860
844
srun --cpu-bind=threads --cpus-per-task=4
861
845
julia --threads=3 --color=yes --project=.buildkite .buildkite/ci_driver.jl
862
- --config_file ${GPU_CONFIG_PATH}/baroclinic_wave_helem30 .yml
863
- --job_id baroclinic_wave_helem30_4process
864
- artifact_paths : " baroclinic_wave_helem30_4process /output_active/*"
846
+ --config_file ${CONFIG_PATH}/baroclinic_wave .yml
847
+ --job_id baroclinic_wave_2gpu
848
+ artifact_paths : " baroclinic_wave_2gpu /output_active/*"
865
849
env :
866
850
CLIMACOMMS_CONTEXT : " MPI"
867
851
CLIMACOMMS_DEVICE : " CUDA"
868
852
agents :
869
853
slurm_gpus_per_task : 1
870
854
slurm_cpus_per_task : 4
871
- slurm_ntasks : 4
872
- slurm_mem : 32GB
855
+ slurm_ntasks : 2
856
+ slurm_mem : 16GB
873
857
874
858
- label : " GPU: test DYAMOND interpolated initial conditions"
875
859
command : >
@@ -942,19 +926,19 @@ steps:
942
926
slurm_mem : 16G
943
927
slurm_gpus : 1
944
928
945
- - label : " :computer: Benchmark: CPU perf target ( default) "
929
+ - label : " :computer: Benchmark: CPU default"
946
930
command : >
947
931
julia --color=yes --project=.buildkite perf/benchmark.jl
948
- --config_file $PERF_CONFIG_PATH/bm_perf_target .yml
949
- --job_id bm_perf_target
932
+ --config_file $PERF_CONFIG_PATH/bm_default .yml
933
+ --job_id bm_default
950
934
agents :
951
935
slurm_mem : 24GB
952
936
953
- - label : " :computer: Benchmark: GPU perf target "
937
+ - label : " :computer: Benchmark: GPU default "
954
938
command : >
955
939
julia --color=yes --project=.buildkite perf/benchmark.jl
956
- --config_file $PERF_CONFIG_PATH/bm_perf_target .yml
957
- --job_id bm_perf_target_gpu
940
+ --config_file $PERF_CONFIG_PATH/bm_default .yml
941
+ --job_id bm_default_gpu
958
942
env :
959
943
CLIMACOMMS_DEVICE : " CUDA"
960
944
agents :
@@ -964,7 +948,7 @@ steps:
964
948
- label : " :computer: Benchmark: GPU diag edmf"
965
949
command : >
966
950
julia --color=yes --project=.buildkite perf/benchmark.jl
967
- --config_file ${CONFIG_PATH}/aquaplanet_diagedmf .yml
951
+ --config_file $PERF_CONFIG_PATH/bm_aquaplanet_diagedmf .yml
968
952
--job_id bm_diag_edmf_gpu
969
953
env :
970
954
CLIMACOMMS_DEVICE : " CUDA"
@@ -975,14 +959,13 @@ steps:
975
959
- label : " :computer: Benchmark: GPU prog edmf"
976
960
command : >
977
961
julia --color=yes --project=.buildkite perf/benchmark.jl
978
- --config_file ${CONFIG_PATH}/aquaplanet_progedmf .yml
962
+ --config_file $PERF_CONFIG_PATH/bm_aquaplanet_progedmf .yml
979
963
--job_id bm_prog_edmf_gpu
980
964
env :
981
965
CLIMACOMMS_DEVICE : " CUDA"
982
966
agents :
983
967
slurm_mem : 24GB
984
968
slurm_gpus : 1
985
- soft_fail : true
986
969
987
970
- group : " Flame graphs"
988
971
steps :
@@ -1000,75 +983,75 @@ steps:
1000
983
slurm_gpus : 1
1001
984
gres : " gpu:p100:1"
1002
985
1003
- - label : " :fire: Flame graph: perf target (default) "
986
+ - label : " :fire: Flame graph: perf target "
1004
987
command : >
1005
988
julia --color=yes --project=.buildkite perf/flame.jl
1006
- --config_file $PERF_CONFIG_PATH/flame_perf_target .yml
1007
- --job_id flame_perf_target
1008
- artifact_paths : " flame_perf_target /*"
989
+ --config_file $PERF_CONFIG_PATH/bm_default .yml
990
+ --job_id flame_default
991
+ artifact_paths : " flame_default /*"
1009
992
agents :
1010
993
slurm_mem : 24GB
1011
994
1012
- - label : " :fire: Flame graph: perf target (with tracers) "
995
+ - label : " :fire: Flame graph: 1 moment "
1013
996
command : >
1014
997
julia --color=yes --project=.buildkite perf/flame.jl
1015
- --config_file $PERF_CONFIG_PATH/flame_perf_target_tracers .yml
1016
- --job_id flame_perf_target_tracers
1017
- artifact_paths : " flame_perf_target_tracers /*"
998
+ --config_file $PERF_CONFIG_PATH/bm_default_1m .yml
999
+ --job_id flame_default_1m
1000
+ artifact_paths : " flame_default_1m /*"
1018
1001
agents :
1019
1002
slurm_mem : 24GB
1020
1003
1021
- - label : " :fire: Flame graph: perf target ( diagnostics) "
1004
+ - label : " :fire: Flame graph: diagnostics"
1022
1005
command : >
1023
1006
julia --color=yes --project=.buildkite perf/flame.jl
1024
- --config_file $PERF_CONFIG_PATH/flame_perf_diagnostics .yml
1025
- --job_id flame_perf_diagnostics
1026
- artifact_paths : " flame_perf_diagnostics /*"
1007
+ --config_file $PERF_CONFIG_PATH/bm_diagnostics .yml
1008
+ --job_id flame_diagnostics
1009
+ artifact_paths : " flame_diagnostics /*"
1027
1010
agents :
1028
1011
slurm_mem : 24GB
1029
1012
1030
- - label : " :fire: Flame graph: perf target (diagnostic edmfx) "
1013
+ - label : " :fire: Flame graph: diagnostics edmf "
1031
1014
command : >
1032
1015
julia --color=yes --project=.buildkite perf/flame.jl
1033
- --config_file $PERF_CONFIG_PATH/flame_perf_target_diagnostic_edmfx .yml
1034
- --job_id flame_perf_target_diagnostic_edmfx
1035
- artifact_paths : " flame_perf_target_diagnostic_edmfx /*"
1016
+ --config_file $PERF_CONFIG_PATH/bm_aquaplanet_diagedmf .yml
1017
+ --job_id flame_aquaplanet_diagedmf
1018
+ artifact_paths : " flame_aquaplanet_diagedmf /*"
1036
1019
agents :
1037
1020
slurm_mem : 24GB
1038
1021
1039
- - label : " :fire: Flame graph: perf target ( prognostic edmfx) "
1022
+ - label : " :fire: Flame graph: prognostic edmf "
1040
1023
command : >
1041
1024
julia --color=yes --project=.buildkite perf/flame.jl
1042
- --config_file $PERF_CONFIG_PATH/flame_perf_target_prognostic_edmfx .yml
1043
- --job_id flame_perf_target_prognostic_edmfx
1044
- artifact_paths : " flame_perf_target_prognostic_edmfx /*"
1025
+ --config_file $PERF_CONFIG_PATH/bm_aquaplanet_progedmf .yml
1026
+ --job_id flame_aquaplanet_progedmf
1027
+ artifact_paths : " flame_aquaplanet_progedmf /*"
1045
1028
agents :
1046
1029
slurm_mem : 32GB
1047
1030
1048
- - label : " :fire: Flame graph: perf target ( diffusion) "
1031
+ - label : " :fire: Flame graph: diffusion"
1049
1032
command : >
1050
1033
julia --color=yes --project=.buildkite perf/flame.jl
1051
- --config_file $PERF_CONFIG_PATH/flame_perf_target_diffusion .yml
1052
- --job_id flame_perf_target_diffusion
1053
- artifact_paths : " flame_perf_target_diffusion /*"
1034
+ --config_file $PERF_CONFIG_PATH/bm_diffusion .yml
1035
+ --job_id flame_diffusion
1036
+ artifact_paths : " flame_diffusion /*"
1054
1037
agents :
1055
1038
slurm_mem : 24GB
1056
1039
1057
1040
- label : " :fire: Flame graph: perf target (Callbacks)"
1058
1041
command : >
1059
1042
julia --color=yes --project=.buildkite perf/flame.jl
1060
- --config_file $PERF_CONFIG_PATH/flame_perf_target_callbacks .yml
1061
- --job_id flame_perf_target_callbacks
1062
- artifact_paths : " flame_perf_target_callbacks /*"
1043
+ --config_file $PERF_CONFIG_PATH/bm_callbacks .yml
1044
+ --job_id flame_callbacks
1045
+ artifact_paths : " flame_callbacks /*"
1063
1046
agents :
1064
1047
slurm_mem : 24GB
1065
1048
1066
1049
- label : " :fire: Flame graph: gravity wave"
1067
1050
command : >
1068
1051
julia --color=yes --project=.buildkite perf/flame.jl
1069
- --config_file $PERF_CONFIG_PATH/flame_perf_gw .yml
1070
- --job_id flame_perf_gw
1071
- artifact_paths : " flame_perf_gw /*"
1052
+ --config_file $PERF_CONFIG_PATH/bm_gravity_wave .yml
1053
+ --job_id flame_gravity_wave
1054
+ artifact_paths : " flame_gravity_wave /*"
1072
1055
agents :
1073
1056
slurm_mem : 24GB
1074
1057
@@ -1080,7 +1063,7 @@ steps:
1080
1063
- label : " :computer: checkbounds"
1081
1064
command : >
1082
1065
julia --color=yes --check-bounds=yes --project=.buildkite perf/benchmark.jl
1083
- --config_file $PERF_CONFIG_PATH/flame_perf_target .yml
1066
+ --config_file $PERF_CONFIG_PATH/bm_default .yml
1084
1067
--job_id checkbounds
1085
1068
artifact_paths : " checkbounds/output_active/*"
1086
1069
agents :
@@ -1090,7 +1073,7 @@ steps:
1090
1073
- label : " :rocket: JET n-failures (inference)"
1091
1074
command : >
1092
1075
julia --color=yes --project=.buildkite perf/jet_test_nfailures.jl
1093
- --config_file $PERF_CONFIG_PATH/flame_perf_target .yml
1076
+ --config_file $PERF_CONFIG_PATH/bm_default .yml
1094
1077
--job_id jet_n_failures
1095
1078
agents :
1096
1079
slurm_mem : 24GB
@@ -1099,7 +1082,7 @@ steps:
1099
1082
- label : " :mag::rocket: Invalidations"
1100
1083
command : >
1101
1084
julia --color=yes --project=.buildkite perf/invalidations.jl
1102
- --config_file $PERF_CONFIG_PATH/flame_perf_target .yml
1085
+ --config_file $PERF_CONFIG_PATH/bm_default .yml
1103
1086
artifact_paths : " invalidations/*"
1104
1087
agents :
1105
1088
slurm_mem : 24GB
0 commit comments