@@ -292,7 +292,6 @@ if ( $SITE == 'NCCS' ) then
292292 set BUILT_ON_SLES15 = @BUILT_ON_SLES15@
293293
294294 if (" $BUILT_ON_SLES15 " == " TRUE" ) then
295- set DEFAULT_MODEL = ' mil'
296295 echo " Enter the ${C1} Processor Type${CN} you wish to run on:"
297296 echo " ${C2} mil (Milan)${CN} (default)"
298297 echo " "
@@ -311,7 +310,7 @@ if ( $SITE == 'NCCS' ) then
311310 else
312311 echo " Enter the ${C1} Processor Type${CN} you wish to run on:"
313312 echo " ${C2} sky (Skylake)${CN} "
314- echo " ${C2} cas (Cascade Lake)${CN} (default)"
313+ echo " ${C2} cas (Cascade Lake) (default)${CN} "
315314 echo " "
316315 set MODEL = ` echo $< `
317316 set MODEL = ` echo $MODEL | tr " [:upper:]" " [:lower:]" `
@@ -340,20 +339,17 @@ else if ( $SITE == 'NAS' ) then
340339 echo " Enter the ${C1} Processor Type${CN} you wish to run on:"
341340 echo " ${C2} has (Haswell)${CN} "
342341 echo " ${C2} bro (Broadwell)${CN} "
343- echo " ${C2} sky (Skylake)${CN} "
344- echo " ${C2} cas (Cascade Lake)${CN} (default) "
342+ echo " ${C2} sky (Skylake)${CN} (default) "
343+ echo " ${C2} cas (Cascade Lake)${CN} "
345344 echo " ${C2} rom (AMD Rome)${CN} "
346345 echo " "
347- echo " NOTE 1: Due to how FV3 is compiled by default, Sandy Bridge"
348- echo " and Ivy Bridge are not supported by current GEOS"
349- echo " "
350- echo " NOTE 2: GEOS is non-zero-diff when running on AMD Rome"
351- echo " compared to the other Intel nodes."
346+ echo " NOTE Due to how FV3 is compiled by default, Sandy Bridge"
347+ echo " and Ivy Bridge are not supported by current GEOS"
352348 echo " "
353349 set MODEL = ` echo $< `
354350 set MODEL = ` echo $MODEL | tr " [:upper:]" " [:lower:]" `
355351 if ( .$MODEL == .) then
356- set MODEL = ' cas '
352+ set MODEL = ' sky '
357353 endif
358354
359355 if( $MODEL ! = ' has' & \
@@ -697,56 +693,87 @@ echo $GROUP > $HOME/.GROUProot
697693# Set Recommended MPI Stack Settings
698694# ######################################################################
699695
696+ # By default do not write restarts by oserver
697+ set RESTART_BY_OSERVER = NO
698+
700699/bin/rm -f $EXPDIR /SETENV.commands
701700
702701if( $MPI_STACK == openmpi ) then
703702
704- # This turns off an annoying warning when running
705- # Open MPI on a system where TMPDIRs are on a networked
706- # file system
703+ # Open MPI and GEOS has issues with restart writing. Having the
704+ # oserver write them can be orders of magnitude faster
705+
706+ set RESTART_BY_OSERVER = YES
707+
708+ # Testing by Bill Putman determined some useful
709+ # Open MPI parameters. Testing shows these work
710+ # on both OSs at NCCS and on macOS
707711
708712cat > $EXPDIR /SETENV.commands << EOF
709- setenv OMPI_MCA_shmem_mmap_enable_nfs_warning 0
713+ # Turn off warning about TMPDIR on NFS
714+ setenv OMPI_MCA_shmem_mmap_enable_nfs_warning 0
715+ # pre-connect MPI procs on mpi_init
716+ setenv OMPI_MCA_mpi_preconnect_all 1
717+ setenv OMPI_MCA_coll_tuned_bcast_algorithm 7
718+ setenv OMPI_MCA_coll_tuned_scatter_algorithm 2
719+ setenv OMPI_MCA_coll_tuned_reduce_scatter_algorithm 3
720+ setenv OMPI_MCA_coll_tuned_allreduce_algorithm 3
721+ setenv OMPI_MCA_coll_tuned_allgather_algorithm 4
722+ setenv OMPI_MCA_coll_tuned_allgatherv_algorithm 3
723+ setenv OMPI_MCA_coll_tuned_gather_algorithm 1
724+ setenv OMPI_MCA_coll_tuned_barrier_algorithm 0
725+ # required for a tuned flag to be effective
726+ setenv OMPI_MCA_coll_tuned_use_dynamic_rules 1
727+ # disable file locks
728+ setenv OMPI_MCA_sharedfp "^lockedfile,individual"
710729EOF
711730
712731# The below settings seem to be recommended for hybrid
713- # systems using MVAPICH2 but could change
732+ # systems using MVAPICH but could change
714733
715734else if( $MPI_STACK == mvapich ) then
716735
736+ # MVAPICH and GEOS has issues with restart writing. Having the
737+ # oserver write them seems to...work
738+ set RESTART_BY_OSERVER = YES
739+
717740cat > $EXPDIR /SETENV.commands << EOF
718- setenv MV2_ENABLE_AFFINITY 0
719- setenv SLURM_DISTRIBUTION block
720- setenv MV2_MPIRUN_TIMEOUT 100
721- setenv MV2_GATHERV_SSEND_THRESHOLD 256
741+ setenv MV2_ENABLE_AFFINITY 0
742+ setenv SLURM_DISTRIBUTION block
743+ setenv MV2_MPIRUN_TIMEOUT 100
744+ setenv MV2_GATHERV_SSEND_THRESHOLD 256
722745EOF
723746
724747else if( $MPI_STACK == mpt ) then
725748
726749cat > $EXPDIR /SETENV.commands << EOF
727750
728- setenv MPI_COLL_REPRODUCIBLE
729- setenv SLURM_DISTRIBUTION block
751+ setenv MPI_COLL_REPRODUCIBLE
752+ setenv SLURM_DISTRIBUTION block
730753
731- #setenv MPI_DISPLAY_SETTINGS 1
732- #setenv MPI_VERBOSE 1
754+ #setenv MPI_DISPLAY_SETTINGS 1
755+ #setenv MPI_VERBOSE 1
733756
734- unsetenv MPI_MEMMAP_OFF
735- unsetenv MPI_NUM_MEMORY_REGIONS
736- setenv MPI_XPMEM_ENABLED yes
737- unsetenv SUPPRESS_XPMEM_TRIM_THRESH
757+ setenv MPI_MEMMAP_OFF
758+ unsetenv MPI_NUM_MEMORY_REGIONS
759+ setenv MPI_XPMEM_ENABLED yes
760+ unsetenv SUPPRESS_XPMEM_TRIM_THRESH
738761
739- setenv MPI_LAUNCH_TIMEOUT 40
762+ setenv MPI_LAUNCH_TIMEOUT 40
740763
741- # For some reason, PMI_RANK is randomly set and interferes
742- # with binarytile.x and other executables.
743- unsetenv PMI_RANK
764+ setenv MPI_COMM_MAX 1024
765+ setenv MPI_GROUP_MAX 1024
766+ setenv MPI_BUFS_PER_PROC 256
744767
745- # Often when debugging on MPT, the traceback from Intel Fortran
746- # is "absorbed" and only MPT's errors are displayed. To allow the
747- # compiler's traceback to be displayed, uncomment this environment
748- # variable
749- #setenv FOR_IGNORE_EXCEPTIONS false
768+ # For some reason, PMI_RANK is randomly set and interferes
769+ # with binarytile.x and other executables.
770+ unsetenv PMI_RANK
771+
772+ # Often when debugging on MPT, the traceback from Intel Fortran
773+ # is "absorbed" and only MPT's errors are displayed. To allow the
774+ # compiler's traceback to be displayed, uncomment this environment
775+ # variable
776+ #setenv FOR_IGNORE_EXCEPTIONS false
750777
751778EOF
752779
@@ -782,13 +809,32 @@ EOF
782809
783810endif # if NOT Singularity
784811
785- # Testing on SLES15 showed that the mlx provider did not seem
786- # to work at scale. So we move to use the verbs provider. Note:
787- # still seems to have issues at c720
812+ # Testing by Bill Putman found these to be
813+ # useful flags with Intel MPI on SLES15 on the
814+ # Milan nodes.
815+ # Note 1: Testing by NCCS shows the PSM3 provider
816+ # runs on the Infiniband fabric. Tests show it runs
817+ # up to C720.
818+ # Note 2: When the Cascade Lakes are moved to
819+ # SLES15, these will need to be Milan-only flags
820+ # as Intel MPI will probably work just fine with
821+ # Intel chips.
788822if (" $BUILT_ON_SLES15 " == " TRUE" ) then
789823cat >> $EXPDIR /SETENV.commands << EOF
790- setenv I_MPI_OFI_PROVIDER verbs
791- setenv I_MPI_COLL_EXTERNAL 0
824+ setenv I_MPI_FALLBACK 0
825+ setenv I_MPI_FABRICS ofi
826+ setenv I_MPI_OFI_PROVIDER psm3
827+ setenv I_MPI_ADJUST_SCATTER 2
828+ setenv I_MPI_ADJUST_SCATTERV 2
829+ setenv I_MPI_ADJUST_GATHER 2
830+ setenv I_MPI_ADJUST_GATHERV 3
831+ setenv I_MPI_ADJUST_ALLGATHER 3
832+ setenv I_MPI_ADJUST_ALLGATHERV 3
833+ setenv I_MPI_ADJUST_ALLREDUCE 12
834+ setenv I_MPI_ADJUST_REDUCE 10
835+ setenv I_MPI_ADJUST_BCAST 11
836+ setenv I_MPI_ADJUST_REDUCE_SCATTER 4
837+ setenv I_MPI_ADJUST_BARRIER 9
792838EOF
793839
794840endif # if SLES15
@@ -797,6 +843,7 @@ endif # if NCCS
797843
798844endif # if mpi
799845
846+
800847# ######################################################################
801848# Create Local Scripts and Resource Files
802849# ######################################################################
0 commit comments