Skip to content

Commit 333bb01

Browse files
authored
Merge pull request #270 from GEOS-ESM/feature/mathomp4/update-components-fvdycore
Use CMake to determine MPI Stack
2 parents ed8fbb2 + 68c4a5c commit 333bb01

File tree

1 file changed

+94
-65
lines changed

1 file changed

+94
-65
lines changed

scripts/fv3_setup

Lines changed: 94 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -162,26 +162,8 @@ endif
162162
# Test for Compiler and MPI Setup
163163
#######################################################################
164164

165-
setenv BASEDIR `awk '{print $2}' $ETCDIR/BASEDIR.rc`
166-
167-
if ( `echo $BASEDIR | grep -i mvapich2` != '') then
168-
set MPI = mvapich2
169-
else if ( `echo $BASEDIR | grep -i mpich` != '') then
170-
set MPI = mpich
171-
else if ( `echo $BASEDIR | grep -i openmpi` != '') then
172-
set MPI = openmpi
173-
else if ( `echo $BASEDIR | grep -i hpcx` != '') then
174-
set MPI = openmpi
175-
else if ( `echo $BASEDIR | grep -i impi` != '') then
176-
set MPI = intelmpi
177-
else if ( `echo $BASEDIR | grep -i intelmpi` != '') then
178-
set MPI = intelmpi
179-
else if ( `echo $BASEDIR | grep -i mpt` != '') then
180-
set MPI = mpt
181-
else
182-
# Assume default is Intel MPI in case of older baselibs
183-
set MPI = intelmpi
184-
endif
165+
# Get MPI stack from CMake
166+
set MPI_STACK = @MPI_STACK@
185167

186168
#######################################################################
187169
# Enter Experiment Specific Run Parameters
@@ -310,7 +292,6 @@ if ( $SITE == 'NCCS' ) then
310292
set BUILT_ON_SLES15 = @BUILT_ON_SLES15@
311293

312294
if ("$BUILT_ON_SLES15" == "TRUE") then
313-
set DEFAULT_MODEL = 'mil'
314295
echo "Enter the ${C1}Processor Type${CN} you wish to run on:"
315296
echo " ${C2}mil (Milan)${CN} (default)"
316297
echo " "
@@ -329,7 +310,7 @@ if ( $SITE == 'NCCS' ) then
329310
else
330311
echo "Enter the ${C1}Processor Type${CN} you wish to run on:"
331312
echo " ${C2}sky (Skylake)${CN}"
332-
echo " ${C2}cas (Cascade Lake)${CN} (default)"
313+
echo " ${C2}cas (Cascade Lake) (default)${CN}"
333314
echo " "
334315
set MODEL = `echo $<`
335316
set MODEL = `echo $MODEL | tr "[:upper:]" "[:lower:]"`
@@ -358,20 +339,17 @@ else if ( $SITE == 'NAS' ) then
358339
echo "Enter the ${C1}Processor Type${CN} you wish to run on:"
359340
echo " ${C2}has (Haswell)${CN}"
360341
echo " ${C2}bro (Broadwell)${CN}"
361-
echo " ${C2}sky (Skylake)${CN}"
362-
echo " ${C2}cas (Cascade Lake)${CN} (default)"
342+
echo " ${C2}sky (Skylake)${CN} (default)"
343+
echo " ${C2}cas (Cascade Lake)${CN}"
363344
echo " ${C2}rom (AMD Rome)${CN}"
364345
echo " "
365-
echo " NOTE 1: Due to how FV3 is compiled by default, Sandy Bridge"
366-
echo " and Ivy Bridge are not supported by current GEOS"
367-
echo " "
368-
echo " NOTE 2: GEOS is non-zero-diff when running on AMD Rome"
369-
echo " compared to the other Intel nodes."
346+
echo " NOTE Due to how FV3 is compiled by default, Sandy Bridge"
347+
echo " and Ivy Bridge are not supported by current GEOS"
370348
echo " "
371349
set MODEL = `echo $<`
372350
set MODEL = `echo $MODEL | tr "[:upper:]" "[:lower:]"`
373351
if ( .$MODEL == .) then
374-
set MODEL = 'cas'
352+
set MODEL = 'sky'
375353
endif
376354

377355
if( $MODEL != 'has' & \
@@ -715,60 +693,91 @@ echo $GROUP > $HOME/.GROUProot
715693
# Set Recommended MPI Stack Settings
716694
#######################################################################
717695

696+
# By default do not write restarts by oserver
697+
set RESTART_BY_OSERVER = NO
698+
718699
/bin/rm -f $EXPDIR/SETENV.commands
719700

720-
if( $MPI == openmpi ) then
701+
if( $MPI_STACK == openmpi ) then
702+
703+
# Open MPI and GEOS has issues with restart writing. Having the
704+
# oserver write them can be orders of magnitude faster
705+
706+
set RESTART_BY_OSERVER = YES
721707

722-
# This turns off an annoying warning when running
723-
# Open MPI on a system where TMPDIRs are on a networked
724-
# file system
708+
# Testing by Bill Putman determined some useful
709+
# Open MPI parameters. Testing shows these work
710+
# on both OSs at NCCS and on macOS
725711

726712
cat > $EXPDIR/SETENV.commands << EOF
727-
setenv OMPI_MCA_shmem_mmap_enable_nfs_warning 0
713+
# Turn off warning about TMPDIR on NFS
714+
setenv OMPI_MCA_shmem_mmap_enable_nfs_warning 0
715+
# pre-connect MPI procs on mpi_init
716+
setenv OMPI_MCA_mpi_preconnect_all 1
717+
setenv OMPI_MCA_coll_tuned_bcast_algorithm 7
718+
setenv OMPI_MCA_coll_tuned_scatter_algorithm 2
719+
setenv OMPI_MCA_coll_tuned_reduce_scatter_algorithm 3
720+
setenv OMPI_MCA_coll_tuned_allreduce_algorithm 3
721+
setenv OMPI_MCA_coll_tuned_allgather_algorithm 4
722+
setenv OMPI_MCA_coll_tuned_allgatherv_algorithm 3
723+
setenv OMPI_MCA_coll_tuned_gather_algorithm 1
724+
setenv OMPI_MCA_coll_tuned_barrier_algorithm 0
725+
# required for a tuned flag to be effective
726+
setenv OMPI_MCA_coll_tuned_use_dynamic_rules 1
727+
# disable file locks
728+
setenv OMPI_MCA_sharedfp "^lockedfile,individual"
728729
EOF
729730

730731
# The below settings seem to be recommended for hybrid
731-
# systems using MVAPICH2 but could change
732+
# systems using MVAPICH but could change
732733

733-
else if( $MPI == mvapich ) then
734+
else if( $MPI_STACK == mvapich ) then
735+
736+
# MVAPICH and GEOS has issues with restart writing. Having the
737+
# oserver write them seems to...work
738+
set RESTART_BY_OSERVER = YES
734739

735740
cat > $EXPDIR/SETENV.commands << EOF
736-
setenv MV2_ENABLE_AFFINITY 0
737-
setenv SLURM_DISTRIBUTION block
738-
setenv MV2_MPIRUN_TIMEOUT 100
739-
setenv MV2_GATHERV_SSEND_THRESHOLD 256
741+
setenv MV2_ENABLE_AFFINITY 0
742+
setenv SLURM_DISTRIBUTION block
743+
setenv MV2_MPIRUN_TIMEOUT 100
744+
setenv MV2_GATHERV_SSEND_THRESHOLD 256
740745
EOF
741746

742-
else if( $MPI == mpt ) then
747+
else if( $MPI_STACK == mpt ) then
743748

744749
cat > $EXPDIR/SETENV.commands << EOF
745750
746-
setenv MPI_COLL_REPRODUCIBLE
747-
setenv SLURM_DISTRIBUTION block
751+
setenv MPI_COLL_REPRODUCIBLE
752+
setenv SLURM_DISTRIBUTION block
753+
754+
#setenv MPI_DISPLAY_SETTINGS 1
755+
#setenv MPI_VERBOSE 1
748756
749-
#setenv MPI_DISPLAY_SETTINGS 1
750-
#setenv MPI_VERBOSE 1
757+
setenv MPI_MEMMAP_OFF
758+
unsetenv MPI_NUM_MEMORY_REGIONS
759+
setenv MPI_XPMEM_ENABLED yes
760+
unsetenv SUPPRESS_XPMEM_TRIM_THRESH
751761
752-
unsetenv MPI_MEMMAP_OFF
753-
unsetenv MPI_NUM_MEMORY_REGIONS
754-
setenv MPI_XPMEM_ENABLED yes
755-
unsetenv SUPPRESS_XPMEM_TRIM_THRESH
762+
setenv MPI_LAUNCH_TIMEOUT 40
756763
757-
setenv MPI_LAUNCH_TIMEOUT 40
764+
setenv MPI_COMM_MAX 1024
765+
setenv MPI_GROUP_MAX 1024
766+
setenv MPI_BUFS_PER_PROC 256
758767
759-
# For some reason, PMI_RANK is randomly set and interferes
760-
# with binarytile.x and other executables.
761-
unsetenv PMI_RANK
768+
# For some reason, PMI_RANK is randomly set and interferes
769+
# with binarytile.x and other executables.
770+
unsetenv PMI_RANK
762771
763-
# Often when debugging on MPT, the traceback from Intel Fortran
764-
# is "absorbed" and only MPT's errors are displayed. To allow the
765-
# compiler's traceback to be displayed, uncomment this environment
766-
# variable
767-
#setenv FOR_IGNORE_EXCEPTIONS false
772+
# Often when debugging on MPT, the traceback from Intel Fortran
773+
# is "absorbed" and only MPT's errors are displayed. To allow the
774+
# compiler's traceback to be displayed, uncomment this environment
775+
# variable
776+
#setenv FOR_IGNORE_EXCEPTIONS false
768777
769778
EOF
770779

771-
else if( $MPI == intelmpi ) then
780+
else if( $MPI_STACK == intelmpi ) then
772781

773782
cat > $EXPDIR/SETENV.commands << EOF
774783
#setenv MPS_STAT_LEVEL 4
@@ -800,13 +809,32 @@ EOF
800809

801810
endif # if NOT Singularity
802811

803-
# Testing on SLES15 showed that the mlx provider did not seem
804-
# to work at scale. So we move to use the verbs provider. Note:
805-
# still seems to have issues at c720
812+
# Testing by Bill Putman found these to be
813+
# useful flags with Intel MPI on SLES15 on the
814+
# Milan nodes.
815+
# Note 1: Testing by NCCS shows the PSM3 provider
816+
# runs on the Infiniband fabric. Tests show it runs
817+
# up to C720.
818+
# Note 2: When the Cascade Lakes are moved to
819+
# SLES15, these will need to be Milan-only flags
820+
# as Intel MPI will probably work just fine with
821+
# Intel chips.
806822
if ("$BUILT_ON_SLES15" == "TRUE") then
807823
cat >> $EXPDIR/SETENV.commands << EOF
808-
setenv I_MPI_OFI_PROVIDER verbs
809-
setenv I_MPI_COLL_EXTERNAL 0
824+
setenv I_MPI_FALLBACK 0
825+
setenv I_MPI_FABRICS ofi
826+
setenv I_MPI_OFI_PROVIDER psm3
827+
setenv I_MPI_ADJUST_SCATTER 2
828+
setenv I_MPI_ADJUST_SCATTERV 2
829+
setenv I_MPI_ADJUST_GATHER 2
830+
setenv I_MPI_ADJUST_GATHERV 3
831+
setenv I_MPI_ADJUST_ALLGATHER 3
832+
setenv I_MPI_ADJUST_ALLGATHERV 3
833+
setenv I_MPI_ADJUST_ALLREDUCE 12
834+
setenv I_MPI_ADJUST_REDUCE 10
835+
setenv I_MPI_ADJUST_BCAST 11
836+
setenv I_MPI_ADJUST_REDUCE_SCATTER 4
837+
setenv I_MPI_ADJUST_BARRIER 9
810838
EOF
811839

812840
endif # if SLES15
@@ -815,6 +843,7 @@ endif # if NCCS
815843

816844
endif # if mpi
817845

846+
818847
#######################################################################
819848
# Create Local Scripts and Resource Files
820849
#######################################################################

0 commit comments

Comments
 (0)