Skip to content

Building and Running on Summit RHEL8

Cameron Smith edited this page Apr 21, 2022 · 7 revisions

Environment Setup

module load gcc/10.2.0 cmake/3.21.3 adios2/2.7.1

Download, Configure, and Build

git clone [email protected]:SCOREC/redev.git
cmake -S redev -B buildRedev_sysAdios2 -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DMPIEXEC_EXECUTABLE=jsrun -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=buildRedev_sysAdios2/install
cmake --build buildRedev_sysAdios2 -j2 --target install

Run ctest

By default the tests will use the BP4 engine. Copy and rename the adios2_sst.yaml in the source dir to the build directory as adios2.yaml to run with the SST engine.

bsub -q debug -Is -W 0:10 -nnodes 1 -P fus123 $SHELL
module load gcc/10.2.0 cmake/3.21.3 adios2/2.7.1
cd buildRedev_sysAdios2
ctest

Run large test case

Create an adios2.yaml configuration file and a script named runSendRecvLarge.sh with the following contents. Edit the script for the path to your install.

Submit the job with:

bsub runSendRecvLarge.sh

adios2.yaml

---
# adios2 config.yaml
- IO: "rendezvous"
  Engine:
      Type: SST

runSendRecvLarge.sh

#!/bin/sh
# vim: set tw=120:
#BSUB -P fus123
#BSUB -q debug
#BSUB -W 00:30
#BSUB -nnodes 7
#BSUB -alloc_flags "smt4"
#BSUB -J redevBenchSRLarge
#BSUB -o %J.out
#BSUB -e %J.out

run() {
  local nodes=$1
  local processes=$2
  local exe=$3
  local args=$4
  local outfile=$5
  local mbpr=$6
  local rdvRanks=$7
  local factor=$8

  module load spectrum-mpi/10.4.0.3-20210112 gcc/10.2.0

  #system adios2 install {
  #from https://github.com/ornladios/ADIOS2/issues/2887#issuecomment-1021428076
  module load adios2/2.7.1
  local rdmaVars="-EFABRIC_IFACE=mlx5_0 \
                  -EOMPI_MCA_coll_ibm_skip_barrier=true \
                  -EFI_MR_CACHE_MAX_COUNT=0 \
                  -EFI_OFI_RXM_USE_SRX=1"
  #}

  cat ${hostfile}
  set -x
  local ranksPerNode=$((processes/nodes))
  jsrun --nrs ${processes} \
    --tasks_per_rs 1 \
    --cpu_per_rs 1 \
    --gpu_per_rs 0 \
    --rs_per_host ${ranksPerNode} \
    --latency_priority CPU-CPU \
    --launch_distribution packed \
    --bind packed:1 \
    -EOMP_NUM_THREADS=1 \
    ${rdmaVars} \
    ${exe} ${args} ${mbpr} ${rdvRanks} ${factor} &>> ${outfile} &
  set +x
  module purge
}

echo $(date)

root=$PWD


getNumNodes() {
  local ranks=$1
  local p9ranksPerNode=$((21*2))
  local nodes=$((ranks/p9ranksPerNode))
  [[ $((ranks % p9ranksPerNode)) > 0 ]] && nodes=$((nodes+1))
  echo $nodes
}

rmAdiosFiles() {
  rm -rf *.bp
  rm -rf *.sst
}

rmAdiosFiles

echo "LSB_MCPU_HOSTS ${LSB_MCPU_HOSTS}"
numJobNodes=$(echo ${LSB_MCPU_HOSTS} | grep -o -i ' [a-z][0-9]\+n'  | wc -l)

bin=/path/to/buildRedev_sysAdios2/util_benchsrLarge

#252 writers (6 nodes * 42 ranks/node), 18 readers
factor=14
for ranks in 18; do #readerRanks
  for mbpr in 2 10 25 50 100; do
    #set ranks and then check to see that there are enough nodes
    rdvRanks=$ranks
    rdvNodes=$(getNumNodes $rdvRanks)

    nrRanks=$((ranks*factor))
    nrNodes=$(getNumNodes $nrRanks)

    totNodes=$((rdvNodes+nrNodes))
    [[ $totNodes > $numJobNodes ]] && \
      echo "ERROR: Requested nodes is greater than allocated nodes ($totNodes > $numJobNodes)" && \
      break

    rdvExe=${bin}
    rdvArgs="1"
    rdvOut=rdv_${ranks}p_${factor}f_${LSB_JOBID}.out
    run $rdvNodes $rdvRanks $rdvExe $rdvArgs $rdvOut $mbpr $ranks $factor

    nrExe=${bin}
    nrArgs="0"
    nrOut=nr_${ranks}p_${factor}f_${LSB_JOBID}.out
    run $nrNodes $nrRanks $nrExe $nrArgs $nrOut $mbpr $ranks $factor
    
    jswait all
    rmAdiosFiles
  done
done

wait

Clone this wiki locally