@@ -162,26 +162,8 @@ endif
162162# Test for Compiler and MPI Setup
163163# ######################################################################
164164
165- setenv BASEDIR ` awk ' {print $2}' $ETCDIR /BASEDIR.rc`
166-
167- if ( ` echo $BASEDIR | grep -i mvapich2` ! = ' ' ) then
168- set MPI = mvapich2
169- else if ( ` echo $BASEDIR | grep -i mpich` ! = ' ' ) then
170- set MPI = mpich
171- else if ( ` echo $BASEDIR | grep -i openmpi` ! = ' ' ) then
172- set MPI = openmpi
173- else if ( ` echo $BASEDIR | grep -i hpcx` ! = ' ' ) then
174- set MPI = openmpi
175- else if ( ` echo $BASEDIR | grep -i impi` ! = ' ' ) then
176- set MPI = intelmpi
177- else if ( ` echo $BASEDIR | grep -i intelmpi` ! = ' ' ) then
178- set MPI = intelmpi
179- else if ( ` echo $BASEDIR | grep -i mpt` ! = ' ' ) then
180- set MPI = mpt
181- else
182- # Assume default is Intel MPI in case of older baselibs
183- set MPI = intelmpi
184- endif
165+ # Get MPI stack from CMake
166+ set MPI_STACK = @MPI_STACK@
185167
186168# ######################################################################
187169# Enter Experiment Specific Run Parameters
@@ -310,7 +292,6 @@ if ( $SITE == 'NCCS' ) then
310292 set BUILT_ON_SLES15 = @BUILT_ON_SLES15@
311293
312294 if (" $BUILT_ON_SLES15 " == " TRUE" ) then
313- set DEFAULT_MODEL = ' mil'
314295 echo " Enter the ${C1} Processor Type${CN} you wish to run on:"
315296 echo " ${C2} mil (Milan)${CN} (default)"
316297 echo " "
@@ -329,7 +310,7 @@ if ( $SITE == 'NCCS' ) then
329310 else
330311 echo " Enter the ${C1} Processor Type${CN} you wish to run on:"
331312 echo " ${C2} sky (Skylake)${CN} "
332- echo " ${C2} cas (Cascade Lake)${CN} (default)"
313+ echo " ${C2} cas (Cascade Lake) (default)${CN} "
333314 echo " "
334315 set MODEL = ` echo $< `
335316 set MODEL = ` echo $MODEL | tr " [:upper:]" " [:lower:]" `
@@ -358,20 +339,17 @@ else if ( $SITE == 'NAS' ) then
358339 echo " Enter the ${C1} Processor Type${CN} you wish to run on:"
359340 echo " ${C2} has (Haswell)${CN} "
360341 echo " ${C2} bro (Broadwell)${CN} "
361- echo " ${C2} sky (Skylake)${CN} "
362- echo " ${C2} cas (Cascade Lake)${CN} (default) "
342+ echo " ${C2} sky (Skylake)${CN} (default) "
343+ echo " ${C2} cas (Cascade Lake)${CN} "
363344 echo " ${C2} rom (AMD Rome)${CN} "
364345 echo " "
365- echo " NOTE 1: Due to how FV3 is compiled by default, Sandy Bridge"
366- echo " and Ivy Bridge are not supported by current GEOS"
367- echo " "
368- echo " NOTE 2: GEOS is non-zero-diff when running on AMD Rome"
369- echo " compared to the other Intel nodes."
346+ echo " NOTE Due to how FV3 is compiled by default, Sandy Bridge"
347+ echo " and Ivy Bridge are not supported by current GEOS"
370348 echo " "
371349 set MODEL = ` echo $< `
372350 set MODEL = ` echo $MODEL | tr " [:upper:]" " [:lower:]" `
373351 if ( .$MODEL == .) then
374- set MODEL = ' cas '
352+ set MODEL = ' sky '
375353 endif
376354
377355 if( $MODEL ! = ' has' & \
@@ -715,60 +693,91 @@ echo $GROUP > $HOME/.GROUProot
715693# Set Recommended MPI Stack Settings
716694# ######################################################################
717695
696+ # By default do not write restarts by oserver
697+ set RESTART_BY_OSERVER = NO
698+
718699/bin/rm -f $EXPDIR /SETENV.commands
719700
720- if( $MPI == openmpi ) then
701+ if( $MPI_STACK == openmpi ) then
702+
703+ # Open MPI and GEOS has issues with restart writing. Having the
704+ # oserver write them can be orders of magnitude faster
705+
706+ set RESTART_BY_OSERVER = YES
721707
722- # This turns off an annoying warning when running
723- # Open MPI on a system where TMPDIRs are on a networked
724- # file system
708+ # Testing by Bill Putman determined some useful
709+ # Open MPI parameters. Testing shows these work
710+ # on both OSs at NCCS and on macOS
725711
726712cat > $EXPDIR /SETENV.commands << EOF
727- setenv OMPI_MCA_shmem_mmap_enable_nfs_warning 0
713+ # Turn off warning about TMPDIR on NFS
714+ setenv OMPI_MCA_shmem_mmap_enable_nfs_warning 0
715+ # pre-connect MPI procs on mpi_init
716+ setenv OMPI_MCA_mpi_preconnect_all 1
717+ setenv OMPI_MCA_coll_tuned_bcast_algorithm 7
718+ setenv OMPI_MCA_coll_tuned_scatter_algorithm 2
719+ setenv OMPI_MCA_coll_tuned_reduce_scatter_algorithm 3
720+ setenv OMPI_MCA_coll_tuned_allreduce_algorithm 3
721+ setenv OMPI_MCA_coll_tuned_allgather_algorithm 4
722+ setenv OMPI_MCA_coll_tuned_allgatherv_algorithm 3
723+ setenv OMPI_MCA_coll_tuned_gather_algorithm 1
724+ setenv OMPI_MCA_coll_tuned_barrier_algorithm 0
725+ # required for a tuned flag to be effective
726+ setenv OMPI_MCA_coll_tuned_use_dynamic_rules 1
727+ # disable file locks
728+ setenv OMPI_MCA_sharedfp "^lockedfile,individual"
728729EOF
729730
730731# The below settings seem to be recommended for hybrid
731- # systems using MVAPICH2 but could change
732+ # systems using MVAPICH but could change
732733
733- else if( $MPI == mvapich ) then
734+ else if( $MPI_STACK == mvapich ) then
735+
736+ # MVAPICH and GEOS has issues with restart writing. Having the
737+ # oserver write them seems to...work
738+ set RESTART_BY_OSERVER = YES
734739
735740cat > $EXPDIR /SETENV.commands << EOF
736- setenv MV2_ENABLE_AFFINITY 0
737- setenv SLURM_DISTRIBUTION block
738- setenv MV2_MPIRUN_TIMEOUT 100
739- setenv MV2_GATHERV_SSEND_THRESHOLD 256
741+ setenv MV2_ENABLE_AFFINITY 0
742+ setenv SLURM_DISTRIBUTION block
743+ setenv MV2_MPIRUN_TIMEOUT 100
744+ setenv MV2_GATHERV_SSEND_THRESHOLD 256
740745EOF
741746
742- else if( $MPI == mpt ) then
747+ else if( $MPI_STACK == mpt ) then
743748
744749cat > $EXPDIR /SETENV.commands << EOF
745750
746- setenv MPI_COLL_REPRODUCIBLE
747- setenv SLURM_DISTRIBUTION block
751+ setenv MPI_COLL_REPRODUCIBLE
752+ setenv SLURM_DISTRIBUTION block
753+
754+ #setenv MPI_DISPLAY_SETTINGS 1
755+ #setenv MPI_VERBOSE 1
748756
749- #setenv MPI_DISPLAY_SETTINGS 1
750- #setenv MPI_VERBOSE 1
757+ setenv MPI_MEMMAP_OFF
758+ unsetenv MPI_NUM_MEMORY_REGIONS
759+ setenv MPI_XPMEM_ENABLED yes
760+ unsetenv SUPPRESS_XPMEM_TRIM_THRESH
751761
752- unsetenv MPI_MEMMAP_OFF
753- unsetenv MPI_NUM_MEMORY_REGIONS
754- setenv MPI_XPMEM_ENABLED yes
755- unsetenv SUPPRESS_XPMEM_TRIM_THRESH
762+ setenv MPI_LAUNCH_TIMEOUT 40
756763
757- setenv MPI_LAUNCH_TIMEOUT 40
764+ setenv MPI_COMM_MAX 1024
765+ setenv MPI_GROUP_MAX 1024
766+ setenv MPI_BUFS_PER_PROC 256
758767
759- # For some reason, PMI_RANK is randomly set and interferes
760- # with binarytile.x and other executables.
761- unsetenv PMI_RANK
768+ # For some reason, PMI_RANK is randomly set and interferes
769+ # with binarytile.x and other executables.
770+ unsetenv PMI_RANK
762771
763- # Often when debugging on MPT, the traceback from Intel Fortran
764- # is "absorbed" and only MPT's errors are displayed. To allow the
765- # compiler's traceback to be displayed, uncomment this environment
766- # variable
767- #setenv FOR_IGNORE_EXCEPTIONS false
772+ # Often when debugging on MPT, the traceback from Intel Fortran
773+ # is "absorbed" and only MPT's errors are displayed. To allow the
774+ # compiler's traceback to be displayed, uncomment this environment
775+ # variable
776+ #setenv FOR_IGNORE_EXCEPTIONS false
768777
769778EOF
770779
771- else if( $MPI == intelmpi ) then
780+ else if( $MPI_STACK == intelmpi ) then
772781
773782cat > $EXPDIR /SETENV.commands << EOF
774783#setenv MPS_STAT_LEVEL 4
@@ -800,13 +809,32 @@ EOF
800809
801810endif # if NOT Singularity
802811
803- # Testing on SLES15 showed that the mlx provider did not seem
804- # to work at scale. So we move to use the verbs provider. Note:
805- # still seems to have issues at c720
812+ # Testing by Bill Putman found these to be
813+ # useful flags with Intel MPI on SLES15 on the
814+ # Milan nodes.
815+ # Note 1: Testing by NCCS shows the PSM3 provider
816+ # runs on the Infiniband fabric. Tests show it runs
817+ # up to C720.
818+ # Note 2: When the Cascade Lakes are moved to
819+ # SLES15, these will need to be Milan-only flags
820+ # as Intel MPI will probably work just fine with
821+ # Intel chips.
806822if (" $BUILT_ON_SLES15 " == " TRUE" ) then
807823cat >> $EXPDIR /SETENV.commands << EOF
808- setenv I_MPI_OFI_PROVIDER verbs
809- setenv I_MPI_COLL_EXTERNAL 0
824+ setenv I_MPI_FALLBACK 0
825+ setenv I_MPI_FABRICS ofi
826+ setenv I_MPI_OFI_PROVIDER psm3
827+ setenv I_MPI_ADJUST_SCATTER 2
828+ setenv I_MPI_ADJUST_SCATTERV 2
829+ setenv I_MPI_ADJUST_GATHER 2
830+ setenv I_MPI_ADJUST_GATHERV 3
831+ setenv I_MPI_ADJUST_ALLGATHER 3
832+ setenv I_MPI_ADJUST_ALLGATHERV 3
833+ setenv I_MPI_ADJUST_ALLREDUCE 12
834+ setenv I_MPI_ADJUST_REDUCE 10
835+ setenv I_MPI_ADJUST_BCAST 11
836+ setenv I_MPI_ADJUST_REDUCE_SCATTER 4
837+ setenv I_MPI_ADJUST_BARRIER 9
810838EOF
811839
812840endif # if SLES15
@@ -815,6 +843,7 @@ endif # if NCCS
815843
816844endif # if mpi
817845
846+
818847# ######################################################################
819848# Create Local Scripts and Resource Files
820849# ######################################################################
0 commit comments