-
Notifications
You must be signed in to change notification settings - Fork 6
Building and Running on Summit RHEL8
Cameron Smith edited this page Apr 21, 2022
·
7 revisions
module load gcc/10.2.0 cmake/3.21.3 adios2/2.7.1
git clone [email protected]:SCOREC/redev.git
cmake -S redev -B buildRedev_sysAdios2 -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DMPIEXEC_EXECUTABLE=jsrun -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=buildRedev_sysAdios2/install
cmake --build buildRedev_sysAdios2 -j2 --target install
By default the tests will use the BP4 engine. Copy and rename the adios2_sst.yaml in the source dir to the build directory as adios2.yaml to run with the SST engine.
bsub -q debug -Is -W 0:10 -nnodes 1 -P fus123 $SHELL
module load gcc/10.2.0 cmake/3.21.3 adios2/2.7.1
cd buildRedev_sysAdios2
ctest
Create an adios2.yaml configuration file and a script named runSendRecvLarge.sh with the following contents. Edit the script for the path to your install.
Submit the job with:
bsub runSendRecvLarge.sh
---
# adios2 config.yaml
- IO: "rendezvous"
Engine:
Type: SST
#!/bin/sh
# vim: set tw=120:
#BSUB -P fus123
#BSUB -q debug
#BSUB -W 00:30
#BSUB -nnodes 7
#BSUB -alloc_flags "smt4"
#BSUB -J redevBenchSRLarge
#BSUB -o %J.out
#BSUB -e %J.out
run() {
local nodes=$1
local processes=$2
local exe=$3
local args=$4
local outfile=$5
local mbpr=$6
local rdvRanks=$7
local factor=$8
module load spectrum-mpi/10.4.0.3-20210112 gcc/10.2.0
#system adios2 install {
#from https://github.com/ornladios/ADIOS2/issues/2887#issuecomment-1021428076
module load adios2/2.7.1
local rdmaVars="-EFABRIC_IFACE=mlx5_0 \
-EOMPI_MCA_coll_ibm_skip_barrier=true \
-EFI_MR_CACHE_MAX_COUNT=0 \
-EFI_OFI_RXM_USE_SRX=1"
#}
cat ${hostfile}
set -x
local ranksPerNode=$((processes/nodes))
jsrun --nrs ${processes} \
--tasks_per_rs 1 \
--cpu_per_rs 1 \
--gpu_per_rs 0 \
--rs_per_host ${ranksPerNode} \
--latency_priority CPU-CPU \
--launch_distribution packed \
--bind packed:1 \
-EOMP_NUM_THREADS=1 \
${rdmaVars} \
${exe} ${args} ${mbpr} ${rdvRanks} ${factor} &>> ${outfile} &
set +x
module purge
}
echo $(date)
root=$PWD
getNumNodes() {
local ranks=$1
local p9ranksPerNode=$((21*2))
local nodes=$((ranks/p9ranksPerNode))
[[ $((ranks % p9ranksPerNode)) > 0 ]] && nodes=$((nodes+1))
echo $nodes
}
rmAdiosFiles() {
rm -rf *.bp
rm -rf *.sst
}
rmAdiosFiles
echo "LSB_MCPU_HOSTS ${LSB_MCPU_HOSTS}"
numJobNodes=$(echo ${LSB_MCPU_HOSTS} | grep -o -i ' [a-z][0-9]\+n' | wc -l)
bin=/path/to/buildRedev_sysAdios2/util_benchsrLarge
#252 writers (6 nodes * 42 ranks/node), 18 readers
factor=14
for ranks in 18; do #readerRanks
for mbpr in 2 10 25 50 100; do
#set ranks and then check to see that there are enough nodes
rdvRanks=$ranks
rdvNodes=$(getNumNodes $rdvRanks)
nrRanks=$((ranks*factor))
nrNodes=$(getNumNodes $nrRanks)
totNodes=$((rdvNodes+nrNodes))
[[ $totNodes > $numJobNodes ]] && \
echo "ERROR: Requested nodes is greater than allocated nodes ($totNodes > $numJobNodes)" && \
break
rdvExe=${bin}
rdvArgs="1"
rdvOut=rdv_${ranks}p_${factor}f_${LSB_JOBID}.out
run $rdvNodes $rdvRanks $rdvExe $rdvArgs $rdvOut $mbpr $ranks $factor
nrExe=${bin}
nrArgs="0"
nrOut=nr_${ranks}p_${factor}f_${LSB_JOBID}.out
run $nrNodes $nrRanks $nrExe $nrArgs $nrOut $mbpr $ranks $factor
jswait all
rmAdiosFiles
done
done
wait