Skip to content

Commit 48bc9de

Browse files
Merge branch 'develop' into feature/saraqzhang/addbkglfo2hist
2 parents ce2f1b3 + aa19b05 commit 48bc9de

File tree

7 files changed

+50
-6
lines changed

7 files changed

+50
-6
lines changed

gcm_forecast.tmpl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -766,7 +766,7 @@ else
766766
endif
767767

768768
@SINGULARITY_BUILD @OCEAN_PRELOAD $RUN_CMD $TOTAL_PES $SINGULARITY_RUN $GEOSEXE $IOSERVER_OPTIONS $IOSERVER_EXTRA --logging_config 'logging.yaml'
769-
@NATIVE_BUILD @OCEAN_PRELOAD $RUN_CMD $TOTAL_PES $GEOSEXE $IOSERVER_OPTIONS $IOSERVER_EXTRA --logging_config 'logging.yaml'
769+
@NATIVE_BUILD @OCEAN_PRELOAD @SEVERAL_TRIES $RUN_CMD $TOTAL_PES $GEOSEXE $IOSERVER_OPTIONS $IOSERVER_EXTRA --logging_config 'logging.yaml'
770770

771771
if( $USE_SHMEM == 1 ) $GEOSBIN/RmShmKeys_sshmpi.csh >& /dev/null
772772

gcm_regress.j

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -451,7 +451,7 @@ if( $RUN_STARTSTOP == TRUE ) then
451451

452452
echo "=== Running test of duration ${test_duration_step1} with NX = $NX and NY = $NY starting at $nymd0 $nhms0 ==="
453453

454-
@OCEAN_PRELOAD $RUN_CMD $NPES ./GEOSgcm.x --logging_config 'logging.yaml'
454+
@OCEAN_PRELOAD @SEVERAL_TRIES $RUN_CMD $NPES ./GEOSgcm.x --logging_config 'logging.yaml'
455455

456456
set date = `cat cap_restart`
457457
set nymde1 = $date[1]
@@ -517,7 +517,7 @@ set NY = `grep "^ *NY": AGCM.rc | cut -d':' -f2`
517517

518518
echo "=== Running test of duration ${test_duration_step2} with NX = $NX and NY = $NY starting at $nymd0 $nhms0 ==="
519519

520-
@OCEAN_PRELOAD $RUN_CMD $NPES ./GEOSgcm.x --logging_config 'logging.yaml'
520+
@OCEAN_PRELOAD @SEVERAL_TRIES $RUN_CMD $NPES ./GEOSgcm.x --logging_config 'logging.yaml'
521521

522522
set date = `cat cap_restart`
523523
set nymde2 = $date[1]
@@ -623,7 +623,7 @@ if ($RUN_STARTSTOP == TRUE) then
623623

624624
echo "=== Running test of duration ${test_duration_step3} with NX = $NX and NY = $NY starting at $nymdb $nhmsb ==="
625625

626-
@OCEAN_PRELOAD $RUN_CMD $NPES ./GEOSgcm.x --logging_config 'logging.yaml'
626+
@OCEAN_PRELOAD @SEVERAL_TRIES $RUN_CMD $NPES ./GEOSgcm.x --logging_config 'logging.yaml'
627627

628628
set date = `cat cap_restart`
629629
set nymde3 = $date[1]
@@ -737,7 +737,7 @@ if ( $RUN_LAYOUT == TRUE) then
737737

738738
echo "=== Running test of duration ${test_duration_step4} with NX = $test_NX and NY = $test_NY starting at $nymd0 $nhms0 ==="
739739

740-
@OCEAN_PRELOAD $RUN_CMD $NPES ./GEOSgcm.x --logging_config 'logging.yaml'
740+
@OCEAN_PRELOAD @SEVERAL_TRIES $RUN_CMD $NPES ./GEOSgcm.x --logging_config 'logging.yaml'
741741

742742
set date = `cat cap_restart`
743743
set nymde4 = $date[1]

gcm_run.j

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1059,7 +1059,7 @@ else
10591059
endif
10601060

10611061
@SINGULARITY_BUILD @OCEAN_PRELOAD $RUN_CMD $TOTAL_PES $SINGULARITY_RUN $GEOSEXE $IOSERVER_OPTIONS $IOSERVER_EXTRA --logging_config 'logging.yaml'
1062-
@NATIVE_BUILD @OCEAN_PRELOAD $RUN_CMD $TOTAL_PES $GEOSEXE $IOSERVER_OPTIONS $IOSERVER_EXTRA --logging_config 'logging.yaml'
1062+
@NATIVE_BUILD @OCEAN_PRELOAD @SEVERAL_TRIES $RUN_CMD $TOTAL_PES $GEOSEXE $IOSERVER_OPTIONS $IOSERVER_EXTRA --logging_config 'logging.yaml'
10631063

10641064
if( $USE_SHMEM == 1 ) $GEOSBIN/RmShmKeys_sshmpi.csh >& /dev/null
10651065

gcm_setup

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2157,6 +2157,9 @@ set RESTART_BY_OSERVER = NO
21572157

21582158
/bin/rm -f $HOMDIR/SETENV.commands
21592159

2160+
# NAS has a "several_tries" script but we need an empty
2161+
# default
2162+
set SEVERAL_TRIES = ''
21602163

21612164
if( $MPI_STACK == openmpi ) then
21622165

@@ -2206,6 +2209,10 @@ EOF
22062209

22072210
else if( $MPI_STACK == mpt ) then
22082211

2212+
# NAS recommends several_tries for MPT job issues
2213+
# https://www.nas.nasa.gov/hecc/support/kb/mpt-startup-failures-workarounds_526.html
2214+
set SEVERAL_TRIES = '/u/scicon/tools/bin/several_tries'
2215+
22092216
cat > $HOMDIR/SETENV.commands << EOF
22102217
22112218
setenv MPI_COLL_REPRODUCIBLE
@@ -2268,6 +2275,9 @@ setenv I_MPI_ADJUST_GATHERV 3
22682275
22692276
setenv I_MPI_FABRICS shm:ofi
22702277
setenv I_MPI_OFI_PROVIDER psm3
2278+
2279+
# This has been found to help with congestion
2280+
setenv FI_PSM3_CONN_TIMEOUT 120
22712281
EOF
22722282

22732283
endif # if NCCS
@@ -2433,6 +2443,7 @@ s/@USE_IOSERVER/$USE_IOSERVER/g
24332443
s/@NUM_OSERVER_NODES/$NUM_OSERVER_NODES/g
24342444
s/@NUM_BACKEND_PES/$NUM_BACKEND_PES/g
24352445
s/@RESTART_BY_OSERVER/$RESTART_BY_OSERVER/g
2446+
s#@SEVERAL_TRIES#$SEVERAL_TRIES#g
24362447
s/@NCPUS_PER_NODE/$NCPUS_PER_NODE/g
24372448
s/@NUM_READERS/$NUM_READERS/g
24382449
s/@NUM_WRITERS/$NUM_WRITERS/g

geoschemchem_setup

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2187,6 +2187,9 @@ set RESTART_BY_OSERVER = NO
21872187

21882188
/bin/rm -f $HOMDIR/SETENV.commands
21892189

2190+
# NAS has a "several_tries" script but we need an empty
2191+
# default
2192+
set SEVERAL_TRIES = ''
21902193

21912194
if( $MPI_STACK == openmpi ) then
21922195

@@ -2236,6 +2239,10 @@ EOF
22362239

22372240
else if( $MPI_STACK == mpt ) then
22382241

2242+
# NAS recommends several_tries for MPT job issues
2243+
# https://www.nas.nasa.gov/hecc/support/kb/mpt-startup-failures-workarounds_526.html
2244+
set SEVERAL_TRIES = '/u/scicon/tools/bin/several_tries'
2245+
22392246
cat > $HOMDIR/SETENV.commands << EOF
22402247
22412248
setenv MPI_COLL_REPRODUCIBLE
@@ -2298,6 +2305,9 @@ setenv I_MPI_ADJUST_GATHERV 3
22982305
22992306
setenv I_MPI_FABRICS shm:ofi
23002307
setenv I_MPI_OFI_PROVIDER psm3
2308+
2309+
# This has been found to help with congestion
2310+
setenv FI_PSM3_CONN_TIMEOUT 120
23012311
EOF
23022312

23032313
endif # if NCCS
@@ -2463,6 +2473,7 @@ s/@USE_IOSERVER/$USE_IOSERVER/g
24632473
s/@NUM_OSERVER_NODES/$NUM_OSERVER_NODES/g
24642474
s/@NUM_BACKEND_PES/$NUM_BACKEND_PES/g
24652475
s/@RESTART_BY_OSERVER/$RESTART_BY_OSERVER/g
2476+
s#@SEVERAL_TRIES#$SEVERAL_TRIES#g
24662477
s/@NCPUS_PER_NODE/$NCPUS_PER_NODE/g
24672478
s/@NUM_READERS/$NUM_READERS/g
24682479
s/@NUM_WRITERS/$NUM_WRITERS/g

gmichem_setup

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2359,6 +2359,9 @@ set RESTART_BY_OSERVER = NO
23592359

23602360
/bin/rm -f $HOMDIR/SETENV.commands
23612361

2362+
# NAS has a "several_tries" script but we need an empty
2363+
# default
2364+
set SEVERAL_TRIES = ''
23622365

23632366
if( $MPI_STACK == openmpi ) then
23642367

@@ -2408,6 +2411,10 @@ EOF
24082411

24092412
else if( $MPI_STACK == mpt ) then
24102413

2414+
# NAS recommends several_tries for MPT job issues
2415+
# https://www.nas.nasa.gov/hecc/support/kb/mpt-startup-failures-workarounds_526.html
2416+
set SEVERAL_TRIES = '/u/scicon/tools/bin/several_tries'
2417+
24112418
cat > $HOMDIR/SETENV.commands << EOF
24122419
24132420
setenv MPI_COLL_REPRODUCIBLE
@@ -2470,6 +2477,9 @@ setenv I_MPI_ADJUST_GATHERV 3
24702477
24712478
setenv I_MPI_FABRICS shm:ofi
24722479
setenv I_MPI_OFI_PROVIDER psm3
2480+
2481+
# This has been found to help with congestion
2482+
setenv FI_PSM3_CONN_TIMEOUT 120
24732483
EOF
24742484

24752485
endif # if NCCS
@@ -2636,6 +2646,7 @@ s/@USE_IOSERVER/$USE_IOSERVER/g
26362646
s/@NUM_OSERVER_NODES/$NUM_OSERVER_NODES/g
26372647
s/@NUM_BACKEND_PES/$NUM_BACKEND_PES/g
26382648
s/@RESTART_BY_OSERVER/$RESTART_BY_OSERVER/g
2649+
s#@SEVERAL_TRIES#$SEVERAL_TRIES#g
26392650
s/@NCPUS_PER_NODE/$NCPUS_PER_NODE/g
26402651
s/@NUM_READERS/$NUM_READERS/g
26412652
s/@NUM_WRITERS/$NUM_WRITERS/g

stratchem_setup

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2172,6 +2172,9 @@ set RESTART_BY_OSERVER = NO
21722172

21732173
/bin/rm -f $HOMDIR/SETENV.commands
21742174

2175+
# NAS has a "several_tries" script but we need an empty
2176+
# default
2177+
set SEVERAL_TRIES = ''
21752178

21762179
if( $MPI_STACK == openmpi ) then
21772180

@@ -2221,6 +2224,10 @@ EOF
22212224

22222225
else if( $MPI_STACK == mpt ) then
22232226

2227+
# NAS recommends several_tries for MPT job issues
2228+
# https://www.nas.nasa.gov/hecc/support/kb/mpt-startup-failures-workarounds_526.html
2229+
set SEVERAL_TRIES = '/u/scicon/tools/bin/several_tries'
2230+
22242231
cat > $HOMDIR/SETENV.commands << EOF
22252232
22262233
setenv MPI_COLL_REPRODUCIBLE
@@ -2283,6 +2290,9 @@ setenv I_MPI_ADJUST_GATHERV 3
22832290
22842291
setenv I_MPI_FABRICS shm:ofi
22852292
setenv I_MPI_OFI_PROVIDER psm3
2293+
2294+
# This has been found to help with congestion
2295+
setenv FI_PSM3_CONN_TIMEOUT 120
22862296
EOF
22872297

22882298
endif # if NCCS
@@ -2449,6 +2459,7 @@ s/@USE_IOSERVER/$USE_IOSERVER/g
24492459
s/@NUM_OSERVER_NODES/$NUM_OSERVER_NODES/g
24502460
s/@NUM_BACKEND_PES/$NUM_BACKEND_PES/g
24512461
s/@RESTART_BY_OSERVER/$RESTART_BY_OSERVER/g
2462+
s#@SEVERAL_TRIES#$SEVERAL_TRIES#g
24522463
s/@NCPUS_PER_NODE/$NCPUS_PER_NODE/g
24532464
s/@NUM_READERS/$NUM_READERS/g
24542465
s/@NUM_WRITERS/$NUM_WRITERS/g

0 commit comments

Comments
 (0)