Skip to content

Commit e6641e6

Browse files
authored
Merge pull request #229 from GEOS-ESM/develop
GitFlow: Merge Develop into Main
2 parents 22edab7 + e13d7c3 commit e6641e6

File tree

4 files changed

+136
-17
lines changed

4 files changed

+136
-17
lines changed
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
---
2+
3+
# Based on code from https://github.com/marketplace/actions/yaml-lint
4+
5+
name: Yaml Lint
6+
7+
on:
8+
pull_request:
9+
types: [opened, synchronize, reopened, ready_for_review, labeled, unlabeled]
10+
11+
# This validation is equivalent to running on the command line:
12+
# yamllint -d relaxed --no-warnings
13+
# and is controlled by the .yamllint.yml file
14+
jobs:
15+
validate-YAML:
16+
runs-on: ubuntu-latest
17+
steps:
18+
- uses: actions/checkout@v3.3.0
19+
- id: yaml-lint
20+
name: yaml-lint
21+
uses: ibiqlik/action-yamllint@v3
22+
with:
23+
no_warnings: true
24+
format: colored
25+
config_file: .yamllint.yml
26+
27+
- uses: actions/upload-artifact@v3
28+
if: always()
29+
with:
30+
name: yamllint-logfile
31+
path: ${{ steps.yaml-lint.outputs.logfile }}

.yamllint.yml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
---
2+
3+
extends: default
4+
5+
rules:
6+
braces:
7+
level: warning
8+
max-spaces-inside: 1
9+
brackets:
10+
level: warning
11+
max-spaces-inside: 1
12+
colons:
13+
level: warning
14+
commas:
15+
level: warning
16+
comments: disable
17+
comments-indentation: disable
18+
document-start: disable
19+
empty-lines:
20+
level: warning
21+
hyphens:
22+
level: warning
23+
indentation:
24+
level: warning
25+
indent-sequences: consistent
26+
line-length:
27+
level: warning
28+
allow-non-breakable-inline-mappings: true
29+
truthy: disable

scripts/fv3.j

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -142,11 +142,11 @@ module list
142142
@SETENVS
143143

144144
setenv EXETAG "$TAG"
145-
setenv FV3EXE '@FV_PRECISION'
145+
setenv FV3PRC '@FV_PRECISION'
146146
setenv EXPID "c${AGCM_IM}_L${AGCM_LM}_T${N_TRACERS}_${NX}x${NY}_${N_OMP}threads"
147147
setenv EXPDSC "c${AGCM_IM}_L${AGCM_LM}_T${N_TRACERS}_${NX}x${NY}_${N_OMP}threads"
148148
setenv EXPDIR @EXPDIR
149-
setenv SCRDIR $EXPDIR/scratch_${EXPID}_${EXETAG}-${FV3EXE}
149+
setenv SCRDIR $EXPDIR/scratch_${EXPID}_${EXETAG}-${FV3PRC}
150150
if ($NH) setenv SCRDIR ${SCRDIR}_NH.$$
151151

152152
#######################################################################
@@ -172,6 +172,32 @@ cat > ExtData.rc << EOF
172172
USE_EXTDATA: .false.
173173
EOF
174174

175+
/bin/rm -f logging.yaml
176+
cat > logging.yaml << EOF
177+
schema_version: 1
178+
locks:
179+
mpi: {class: MpiLock, comm: MPI_COMM_WORLD}
180+
formatters:
181+
plain: {class: Formatter, format: '%(message)a'}
182+
basic: {class: Formatter, format: '%(short_name)a15~: %(level_name)a~: %(message)a'}
183+
mpi: {class: MpiFormatter, format: '%(mpi_rank)i4.4~: %(name)~: %(level_name)a~: %(message)a', comm: MPI_COMM_WORLD}
184+
handlers:
185+
console: {class: streamhandler, formatter: basic, unit: OUTPUT_UNIT, level: INFO}
186+
console_plain: {class: streamhandler, formatter: plain, unit: OUTPUT_UNIT, level: INFO}
187+
warnings: {class: FileHandler, filename: warnings_and_errors.log, lock: mpi, level: WARNING, formatter: basic}
188+
errors: {class: StreamHandler, formatter: basic, unit: ERROR_UNIT, level: ERROR}
189+
mpi_shared: {class: FileHandler, filename: allPEs.log, formatter: mpi, comm: MPI_COMM_WORLD, lock: mpi, rank_keyword: rank, level: DEBUG}
190+
root:
191+
handlers: [warnings, errors, console]
192+
level: WARNING
193+
root_level: WARNING
194+
loggers:
195+
errors: {handlers: [errors], level: ERROR}
196+
CAP: {level: WARNING, root_level: INFO}
197+
MAPL: {handlers: [mpi_shared], level: WARNING, root_level: INFO}
198+
MAPL.profiler: {handlers: [console_plain], propagate: FALSE, level: WARNING, root_level: INFO}
199+
EOF
200+
175201
/bin/rm -f CAP.rc
176202
cat > CAP.rc << EOF
177203
MAPLROOT_COMPNAME: DYN
@@ -210,6 +236,7 @@ IOSERVER_NODES: @IOS_NDS
210236
DYN.LM: ${AGCM_LM}
211237
DYN.IM_WORLD: ${AGCM_IM}
212238
FV3_STANDALONE: 1
239+
FV3_CONFIG: MONOTONIC
213240
DYCORE: FV3
214241
COLDSTART: 1
215242
CASE_ID: 1
@@ -331,7 +358,6 @@ if ($N_OMP > 1) then
331358
setenv KMP_AFFINITY compact
332359
setenv KMP_STACKSIZE 16m
333360
endif
334-
#env | grep MPI
335361

336362
#######################################################################
337363
# Settings for Singularity - EXPERIMENTAL
@@ -396,7 +422,6 @@ endif
396422
# Run the Model
397423
#######################################################################
398424
echo " "
399-
#pwd
400425
echo "***** USING **** $FV3EXE *********************"
401426

402427
if( $USE_SHMEM == 1 ) $GEOSBIN/RmShmKeys_sshmpi.csh >& /dev/null
@@ -407,7 +432,7 @@ else
407432
set IOSERVER_OPTIONS = ""
408433
endif
409434

410-
$RUN_CMD $NPES $SINGULARITY_RUN $FV3EXE $IOSERVER_OPTIONS |& tee ${SCRDIR}.log
435+
$RUN_CMD $NPES $SINGULARITY_RUN $FV3EXE $IOSERVER_OPTIONS --logging_config logging.yaml |& tee ${SCRDIR}.log
411436

412437
if( $USE_SHMEM == 1 ) $GEOSBIN/RmShmKeys_sshmpi.csh >& /dev/null
413438

scripts/fv3_setup

Lines changed: 46 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -317,8 +317,43 @@ else if ( $SITE == 'NAS' ) then
317317
set NCPUS_PER_NODE = 128
318318
endif
319319

320+
else if( $SITE == 'AWS' | $SITE == 'Azure' ) then
321+
322+
# Because we do not know the name of the model or the number of CPUs
323+
# per node. We ask the user to set these variables in the script
324+
325+
# AWS and Azure users must set the MODEL and NCPUS_PER_NODE
326+
set MODEL = USER_MUST_SET
327+
set NCPUS_PER_NODE = USER_MUST_SET
328+
329+
# Above we need a user to set the MODEL and NCPUS_PER_NODE
330+
# variables. Here we check that they have been set. If not,
331+
# we ask the user to set them
332+
# --------------------------------------------------------
333+
334+
if ( $MODEL == USER_MUST_SET | $NCPUS_PER_NODE == USER_MUST_SET ) then
335+
echo "ERROR: We have detected you are on $SITE. As we do not have"
336+
echo " official fixed node info yet, we ask you to edit $0"
337+
echo " and set the MODEL and NCPUS_PER_NODE variables."
338+
echo " Look for the section that says:"
339+
echo " "
340+
echo " # AWS and Azure users must set the MODEL and NCPUS_PER_NODE"
341+
exit 1
342+
endif
343+
320344
else
321345
set MODEL = 'UNKNOWN'
346+
# As we do not know how many CPUs per node, we detect the number
347+
# of CPUs per node by looking at the number of CPUs. This is different
348+
# on Linux and macOS
349+
if ( $ARCH == 'Linux' ) then
350+
set NCPUS_PER_NODE = `grep -c ^processor /proc/cpuinfo`
351+
else if ( $ARCH == 'Darwin' ) then
352+
set NCPUS_PER_NODE = `sysctl -n hw.ncpu`
353+
else
354+
echo "ERROR: Unknown architecture $ARCH"
355+
exit 1
356+
endif
322357
endif
323358

324359
# Set DEFAULT SHMEM Parameter
@@ -394,9 +429,9 @@ else
394429
set DEF_IOS_NDS = 5
395430
endif
396431

397-
# On desktop, we need default to 6 processes
432+
# On desktop, we default to 6 processes
398433
# Must be set here due to MODEL_NPES calc below
399-
if( $SITE != 'NAS' && $SITE != 'NCCS' && $SITE != 'AWS') then
434+
if( $SITE != 'NAS' && $SITE != 'NCCS' && $SITE != 'AWS' && $SITE != 'Azure' ) then
400435
set FV_NX = 1
401436
endif
402437

@@ -418,7 +453,7 @@ else
418453
set IOS_NDS = 0
419454
endif
420455

421-
setenv RUN_N $EXPID # RUN Job Name
456+
setenv RUN_N $EXPID # RUN Job Name
422457

423458
if( $SITE == 'NAS' ) then
424459
setenv RUN_N `echo $EXPID | cut -b1-200`_RUN # RUN Job Name
@@ -455,7 +490,7 @@ else if( $SITE == 'NCCS' ) then
455490

456491
setenv WRKDIR /discover/nobackup/$LOGNAME # user work directory
457492

458-
else if( $SITE == 'AWS' ) then
493+
else if( $SITE == 'AWS' | $SITE == "Azure" ) then
459494
setenv BATCH_CMD "sbatch" # SLURM Batch command
460495
setenv BATCH_GROUP "DELETE" # SLURM Syntax for account name
461496
setenv BATCH_TIME "SBATCH --time=" # SLURM Syntax for walltime
@@ -464,15 +499,13 @@ else if( $SITE == 'AWS' ) then
464499
setenv BATCH_JOINOUTERR "DELETE" # SLURM joins out and err by default
465500
setenv RUN_T "01:00:00" # Wallclock Time for fv3.j
466501

467-
setenv RUN_Q "DELETE"
468-
setenv RUN_P "SBATCH --ntasks=${MODEL_NPES}" # PE Configuration for fv3.j
502+
@ NODES = `echo "( ($MODEL_NPES + $NCPUS_PER_NODE) + ($IOS_NDS * $NCPUS_PER_NODE) - 1)/$NCPUS_PER_NODE" | bc`
469503

470-
# By default on AWS, just ignore IOSERVER for now until testing
471-
set USE_IOSERVER = 0
472-
set IOS_NDS = 0
473-
set NCPUS_PER_NODE = 0
504+
setenv RUN_Q "SBATCH --constraint=${MODEL}" # batch queue name for fv3.j
505+
setenv RUN_P "SBATCH --nodes=${NODES} --ntasks-per-node=${NCPUS_PER_NODE}" # PE Configuration for fv3.j
474506

475507
setenv WRKDIR /home/$LOGNAME # user work directory
508+
476509
else
477510
# These are defaults for others (assume slurm)
478511
setenv BATCH_CMD "sbatch" # SLURM Batch command
@@ -489,7 +522,6 @@ else
489522
# By default on desktop, just ignore IOSERVER for now until testing
490523
set USE_IOSERVER = 0
491524
set IOS_NDS = 0
492-
set NCPUS_PER_NODE = 0
493525

494526
setenv WRKDIR /home/$LOGNAME # user work directory
495527
endif
@@ -751,7 +783,6 @@ foreach FILE ($FILES)
751783
exit 2
752784
endif
753785

754-
755786
sed -f $EXPDIR/sedfile $EXPDIR/tmpfile > $EXPDIR/$FILE
756787

757788
echo "Creating ${C1}${FILE}${CN} for Experiment: $EXPID "
@@ -791,6 +822,9 @@ chmod +x $EXPDIR/fv3.j
791822
echo "Done!"
792823
echo "-----"
793824
echo ""
825+
echo "You can find your experiment in the directory:"
826+
echo " ${C2}${EXPDIR}${CN}"
827+
echo ""
794828
echo "NOTE: fv3.j by default will run StandAlone_FV3_Dycore.x from the installation directory:"
795829
echo " $GEOSBIN"
796830
echo ""

0 commit comments

Comments
 (0)